From a3b4a501db10a3d457ddaf7dae8aa440f366201f Mon Sep 17 00:00:00 2001 From: n08i40k Date: Fri, 10 Oct 2025 01:37:52 +0400 Subject: [PATCH] feat(parser): improve names regex to exclude some non-standard cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Like "Название ФАмилия. И.О.". In that case regex will grab "Название ФА", instead of "Амилия. И. О." (we can't add 'Ф', bc it will make regex checks way more complex). Now it will ignore "Название ФА" if after that lower or upper char is placed. Previously only lower chars are excluded and check won't exclude "Название ФА" and grabs "Название Ф" bc after 'Ф' uppercase char is present. --- providers/provider-engels-polytechnic/src/parser/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/providers/provider-engels-polytechnic/src/parser/mod.rs b/providers/provider-engels-polytechnic/src/parser/mod.rs index 472cf88..e096891 100644 --- a/providers/provider-engels-polytechnic/src/parser/mod.rs +++ b/providers/provider-engels-polytechnic/src/parser/mod.rs @@ -377,7 +377,7 @@ fn parse_name_and_subgroups(text: &str, row: u32, column: u32) -> Result = LazyLock::new(|| { fancy_regex::Regex::new( - r"([А-Я][а-я]+(?:[\s.]*[А-Я]){1,2})(?=[^а-я])[.\s]*(?:\(?(\d)[\sа-я]*\)?)?", + r"([А-Я][а-я]+(?:[\s.]*[А-Я]){1,2})(?=[^А-Яа-я])[.\s]*(?:\(?(\d)[\sа-я]*\)?)?", ) .unwrap() });