2 Commits

4 changed files with 150 additions and 125 deletions

46
Cargo.lock generated
View File

@@ -652,6 +652,21 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "bit-set"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
dependencies = [
"bit-vec",
]
[[package]]
name = "bit-vec"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
[[package]] [[package]]
name = "bitflags" name = "bitflags"
version = "2.9.4" version = "2.9.4"
@@ -825,9 +840,9 @@ dependencies = [
[[package]] [[package]]
name = "calamine" name = "calamine"
version = "0.30.1" version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1a9acfeb1555aa8def91fe8ff208aadaea850c109968ec35ac965edbe7d210b" checksum = "da56b262e8a827c6b12c3dde4ea4622e0ff542bd2e9ea5855e4cb523481d77b7"
dependencies = [ dependencies = [
"atoi_simd", "atoi_simd",
"byteorder 1.5.0", "byteorder 1.5.0",
@@ -835,7 +850,7 @@ dependencies = [
"encoding_rs", "encoding_rs",
"fast-float2", "fast-float2",
"log", "log",
"quick-xml 0.37.5", "quick-xml",
"serde", "serde",
"zip", "zip",
] ]
@@ -1463,6 +1478,17 @@ dependencies = [
"pin-project-lite", "pin-project-lite",
] ]
[[package]]
name = "fancy-regex"
version = "0.16.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "998b056554fbe42e03ae0e152895cd1a7e1002aec800fdc6635d20270260c46f"
dependencies = [
"bit-set",
"regex-automata",
"regex-syntax",
]
[[package]] [[package]]
name = "fast-float2" name = "fast-float2"
version = "0.2.3" version = "0.2.3"
@@ -2923,7 +2949,7 @@ checksum = "740ebea15c5d1428f910cd1a5f52cebf8d25006245ed8ade92702f4943d91e07"
dependencies = [ dependencies = [
"base64 0.22.1", "base64 0.22.1",
"indexmap 2.11.4", "indexmap 2.11.4",
"quick-xml 0.38.3", "quick-xml",
"serde", "serde",
"time 0.3.44", "time 0.3.44",
] ]
@@ -3075,6 +3101,7 @@ dependencies = [
"calamine", "calamine",
"chrono", "chrono",
"derive_more", "derive_more",
"fancy-regex",
"log", "log",
"regex", "regex",
"reqwest", "reqwest",
@@ -3121,22 +3148,13 @@ version = "1.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
[[package]]
name = "quick-xml"
version = "0.37.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb"
dependencies = [
"encoding_rs",
"memchr",
]
[[package]] [[package]]
name = "quick-xml" name = "quick-xml"
version = "0.38.3" version = "0.38.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42a232e7487fc2ef313d96dde7948e7a3c05101870d8985e4fd8d26aedd27b89" checksum = "42a232e7487fc2ef313d96dde7948e7a3c05101870d8985e4fd8d26aedd27b89"
dependencies = [ dependencies = [
"encoding_rs",
"memchr", "memchr",
] ]

View File

@@ -20,7 +20,7 @@ derive_more = { version = "2.0.1", features = ["error", "display"] }
utoipa = { version = "5.4.0", features = ["macros", "chrono"] } utoipa = { version = "5.4.0", features = ["macros", "chrono"] }
calamine = "0.30" calamine = "0.31"
async-trait = "0.1.89" async-trait = "0.1.89"
reqwest = "0.12.23" reqwest = "0.12.23"
@@ -29,4 +29,5 @@ regex = "1.11.2"
strsim = "0.11.1" strsim = "0.11.1"
log = "0.4.27" log = "0.4.27"
sentry = "0.43.0" sentry = "0.43.0"
fancy-regex = "0.16.2"

View File

@@ -65,7 +65,11 @@ impl ScheduleProvider for Wrapper {
this.snapshot = Arc::new(snapshot); this.snapshot = Arc::new(snapshot);
}, },
Err(updater::error::Error::QueryUrlFailed(updater::error::QueryUrlError::UriFetchFailed)) => {},
Err(err) => { Err(err) => {
sentry::capture_error(&err);
cancellation_token.cancel(); cancellation_token.cancel();
return Err(err.into()); return Err(err.into());
} }

View File

@@ -233,6 +233,7 @@ enum LessonParseResult {
fn guess_lesson_type(text: &str) -> Option<LessonType> { fn guess_lesson_type(text: &str) -> Option<LessonType> {
static MAP: LazyLock<HashMap<&str, LessonType>> = LazyLock::new(|| { static MAP: LazyLock<HashMap<&str, LessonType>> = LazyLock::new(|| {
HashMap::from([ HashMap::from([
("о важном", LessonType::Additional),
("консультация", LessonType::Consultation), ("консультация", LessonType::Consultation),
("самостоятельная работа", LessonType::IndependentWork), ("самостоятельная работа", LessonType::IndependentWork),
("зачет", LessonType::Exam), ("зачет", LessonType::Exam),
@@ -427,102 +428,108 @@ fn parse_name_and_subgroups(text: &str) -> Result<ParsedLessonName, Error> {
// 3. "Модификатор" (чаще всего). // 3. "Модификатор" (чаще всего).
// //
// Регулярное выражение для получения ФИО преподавателей и номеров подгрупп (aka. второй части). // Регулярное выражение для получения ФИО преподавателей и номеров подгрупп (aka. второй части).
// (?:[А-Я][а-я]+\s?(?:[А-Я][\s.]*){2}(?:\(\d\s?[а-я]+\))?(?:, )?)+[\s.]* static NAME_RE: LazyLock<fancy_regex::Regex> = LazyLock::new(|| {
// fancy_regex::Regex::new(
// Подробнее: r"([А-Я][а-я]+(?:[\s.]*[А-Я]){1,2})(?=[^а-я])[.\s]*(?:\(?(\d)[\sа-я]*\)?)?",
// (?:
// [А-Я][а-я]+ - Фамилия.
// \s? - Кто знает, будет ли там пробел.
// (?:[А-Я][\s.]*){2} - Имя и отчество с учётом случайных пробелов и точек.
// (?:
// \( - Открытие подгруппы.
// \s? - Кто знает, будет ли там пробел.
// \d - Номер подгруппы.
// \s? - Кто знает, будет ли там пробел.
// [а-я\s]+ - Слово "подгруппа" с учётов ошибок.
// \) - Закрытие подгруппы.
// )? - Явное указание подгруппы может отсутствовать по понятным причинам.
// (?:, )? - Разделители между отдельными частями.
// )+
// [\s.]* - Забираем с собой всякий мусор, что бы не передать его в третью часть.
static NAMES_REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"(?:[А-Я][а-я]+\s?(?:[А-Я][\s.]*){2}(?:\(?\s*\d\s*[а-я\s]+\)?)?(?:[\s,.]+)?){1,2}+[\s.,]*",
) )
.unwrap() .unwrap()
}); });
// Отчистка let text = text
static CLEAN_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[\s\n\t]+").unwrap()); .chars()
.filter(|c: &char| {
c.is_whitespace()
|| c.is_ascii_digit()
|| (*c >= 'а' && *c <= 'я')
|| (*c >= 'А' && *c <= 'Я')
|| *c == '.'
|| *c == '-'
})
.collect::<String>()
.replace(r"\s+", " ");
let text = CLEAN_RE let mut lesson_name: Option<&str> = None;
.replace(&text.replace([' ', '\t', '\n'], " ").replace(",", ""), " ") let mut extra: Option<&str> = None;
.to_string();
let (lesson_name, subgroups, lesson_type) = match NAMES_REGEX.captures(&text) {
Some(captures) => {
let capture = captures.get(0).unwrap();
let subgroups: Vec<Option<LessonSubGroup>> = {
let src = capture.as_str().replace([' ', '.'], "");
let mut shared_subgroup = false; let mut shared_subgroup = false;
let mut subgroups: [Option<LessonSubGroup>; 2] = [None, None]; let mut subgroups: [Option<LessonSubGroup>; 2] = [None, None];
for name in src.split(',') { for capture in NAME_RE.captures_iter(&text) {
let digit_index = name.find(|c: char| c.is_ascii_digit()); let capture = capture.unwrap();
let number: u8 = if lesson_name.is_none() {
digit_index.map_or(0, |index| name[(index)..(index + 1)].parse().unwrap()); lesson_name = Some(&text[..capture.get(0).unwrap().start()]);
}
extra = Some(&text[capture.get(0).unwrap().end()..]);
let teacher_name = { let teacher_name = {
let name_end = name let clean = capture
.find(|c: char| !c.is_alphabetic()) .get(1)
.unwrap_or(name.len()); .unwrap()
.as_str()
.chars()
.filter(|c| c.is_alphabetic())
.collect::<Vec<char>>();
// Я ебал. Как же я долго до этого доходил. if clean.get(clean.len() - 2).is_some_and(|c| c.is_uppercase()) {
let (name, remaining) = clean.split_at(clean.len() - 2);
format!( format!(
"{} {}.{}.", "{} {}.{}.",
name.get(..name_end - 4).unwrap(), name.iter().collect::<String>(),
name.get(name_end - 4..name_end - 2).unwrap(), remaining[0],
name.get(name_end - 2..name_end).unwrap(), remaining[1]
) )
} else {
let (remaining, name) = clean.split_last().unwrap();
format!("{} {}.", name.iter().collect::<String>(), remaining)
}
}; };
let lesson = Some(LessonSubGroup { let subgroup_index = capture
.get(2)
.and_then(|m| Some(m.as_str().parse::<u32>().unwrap()));
let subgroup = Some(LessonSubGroup {
cabinet: None, cabinet: None,
teacher: Some(teacher_name), teacher: Some(teacher_name),
}); });
match number { match subgroup_index {
0 => { None => {
subgroups[0] = lesson; subgroups[0] = subgroup;
subgroups[1] = None; subgroups[1] = None;
shared_subgroup = true; shared_subgroup = true;
break; break;
} }
num => { Some(num) => {
// 1 - 1 = 0 | 2 - 1 = 1 | 3 - 1 = 2 (schedule index to array index) // 1 - 1 = 0 | 2 - 1 = 1 | 3 - 1 = 2 (schedule index to array index)
// 0 % 2 = 0 | 1 % 2 = 1 | 2 % 2 = 0 (clamp) // 0 % 2 = 0 | 1 % 2 = 1 | 2 % 2 = 0 (clamp)
let normalised = (num - 1) % 2; let normalised = (num - 1) % 2;
subgroups[normalised as usize] = lesson; subgroups[normalised as usize] = subgroup;
} }
} }
} }
if shared_subgroup { let subgroups = if lesson_name.is_none() {
Vec::new()
} else if shared_subgroup {
Vec::from([subgroups[0].take()]) Vec::from([subgroups[0].take()])
} else { } else {
Vec::from(subgroups) Vec::from(subgroups)
}
}; };
let name = text[..capture.start()].trim().to_string(); if extra.is_none() {
let extra = text[capture.end()..].trim().to_string(); extra = text
.rfind(" ")
.and_then(|i| text[..i].rfind(" "))
.map(|i| &text[i + 1..]);
}
let lesson_type = if extra.len() > 4 { let lesson_type = if let Some(extra) = extra
&& extra.len() > 4
{
let result = guess_lesson_type(&extra); let result = guess_lesson_type(&extra);
if result.is_none() { if result.is_none() {
@@ -541,13 +548,8 @@ fn parse_name_and_subgroups(text: &str) -> Result<ParsedLessonName, Error> {
None None
}; };
(name, subgroups, lesson_type)
}
None => (text, Vec::new(), None),
};
Ok(ParsedLessonName { Ok(ParsedLessonName {
name: lesson_name, name: lesson_name.unwrap_or(&text).to_string(),
subgroups, subgroups,
r#type: lesson_type, r#type: lesson_type,
}) })