mirror of
https://github.com/n08i40k/schedule-parser-rusted.git
synced 2025-12-06 09:47:50 +03:00
feat(parser)!: rework of subgroups parsing
This commit is contained in:
@@ -16,6 +16,7 @@ serde_repr = "0.1.20"
|
||||
regex = "1.11.1"
|
||||
utoipa = { version = "5", features = ["chrono"] }
|
||||
strsim = "0.11.1"
|
||||
log = "0.4.26"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.6"
|
||||
|
||||
@@ -180,55 +180,54 @@ fn parse_lesson(
|
||||
group_column + 1,
|
||||
);
|
||||
|
||||
match cabinets.len() {
|
||||
// Если кабинетов нет, но есть подгруппы, назначаем им кабинет "??"
|
||||
0 => {
|
||||
for subgroup in &mut subgroups {
|
||||
subgroup.cabinet = Some("??".to_string());
|
||||
}
|
||||
}
|
||||
// Назначаем этот кабинет всем подгруппам
|
||||
1 => {
|
||||
for subgroup in &mut subgroups {
|
||||
subgroup.cabinet =
|
||||
Some(cabinets.get(0).or(Some(&String::new())).unwrap().clone())
|
||||
}
|
||||
}
|
||||
len => {
|
||||
// Если количество кабинетов совпадает с количеством подгрупп, назначаем кабинеты по порядку
|
||||
if len == subgroups.len() {
|
||||
for subgroup in &mut subgroups {
|
||||
subgroup.cabinet = Some(
|
||||
cabinets
|
||||
.get((subgroup.number - 1) as usize)
|
||||
.unwrap()
|
||||
.clone(),
|
||||
);
|
||||
}
|
||||
// Если количество кабинетов больше количества подгрупп, делаем ещё одну подгруппу.
|
||||
} else if len > subgroups.len() {
|
||||
for index in 0..subgroups.len() {
|
||||
subgroups[index].cabinet = Some(cabinets[index].clone());
|
||||
}
|
||||
let cab_count = cabinets.len();
|
||||
|
||||
while cabinets.len() > subgroups.len() {
|
||||
subgroups.push(LessonSubGroup {
|
||||
number: (subgroups.len() + 1) as u8,
|
||||
cabinet: Some(cabinets[subgroups.len()].clone()),
|
||||
teacher: "Ошибка в расписании".to_string(),
|
||||
if cab_count == 1 {
|
||||
// Назначаем этот кабинет всем подгруппам
|
||||
let cab = Some(cabinets.get(0).unwrap().clone());
|
||||
|
||||
for subgroup in &mut subgroups {
|
||||
if let Some(subgroup) = subgroup {
|
||||
subgroup.cabinet = cab.clone()
|
||||
}
|
||||
}
|
||||
} else if cab_count == 2 {
|
||||
while subgroups.len() < cab_count {
|
||||
subgroups.push(subgroups.last().unwrap_or(&None).clone());
|
||||
}
|
||||
|
||||
for i in 0..cab_count {
|
||||
let subgroup = subgroups.get_mut(i).unwrap();
|
||||
let cabinet = Some(cabinets.get(i).unwrap().clone());
|
||||
|
||||
match subgroup {
|
||||
None => {
|
||||
let _ = subgroup.insert(LessonSubGroup {
|
||||
teacher: None,
|
||||
cabinet,
|
||||
});
|
||||
}
|
||||
Some(subgroup) => {
|
||||
subgroup.cabinet = cabinet;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
let lesson = Lesson {
|
||||
lesson_type: lesson_type.unwrap_or(lesson_boundaries.lesson_type.clone()),
|
||||
default_range,
|
||||
range: default_range,
|
||||
name: Some(name),
|
||||
time: lesson_time,
|
||||
subgroups: Some(subgroups),
|
||||
subgroups: if subgroups.len() == 2
|
||||
&& subgroups.get(0).unwrap().is_none()
|
||||
&& subgroups.get(1).unwrap().is_none()
|
||||
{
|
||||
None
|
||||
} else {
|
||||
Some(subgroups)
|
||||
},
|
||||
group: None,
|
||||
};
|
||||
|
||||
@@ -241,7 +240,7 @@ fn parse_lesson(
|
||||
Ok(Lessons(Vec::from([
|
||||
Lesson {
|
||||
lesson_type: Break,
|
||||
default_range: None,
|
||||
range: None,
|
||||
name: None,
|
||||
time: LessonBoundaries {
|
||||
start: prev_lesson.time.end,
|
||||
@@ -264,11 +263,11 @@ fn parse_cabinets(worksheet: &WorkSheet, row_range: (u32, u32), column: u32) ->
|
||||
let clean = raw.replace("\n", " ");
|
||||
let parts: Vec<&str> = clean.split(" ").collect();
|
||||
|
||||
for part in parts {
|
||||
parts.iter().take(2).for_each(|part| {
|
||||
let clean_part = part.to_string().trim().to_string();
|
||||
|
||||
cabinets.push(clean_part);
|
||||
}
|
||||
});
|
||||
|
||||
break;
|
||||
}
|
||||
@@ -280,7 +279,7 @@ fn parse_cabinets(worksheet: &WorkSheet, row_range: (u32, u32), column: u32) ->
|
||||
/// Getting the "pure" name of the lesson and list of teachers from the text of the lesson cell.
|
||||
fn parse_name_and_subgroups(
|
||||
text: &String,
|
||||
) -> Result<(String, Vec<LessonSubGroup>, Option<LessonType>), ParseError> {
|
||||
) -> Result<(String, Vec<Option<LessonSubGroup>>, Option<LessonType>), ParseError> {
|
||||
// Части названия пары:
|
||||
// 1. Само название.
|
||||
// 2. Список преподавателей и подгрупп.
|
||||
@@ -308,9 +307,9 @@ fn parse_name_and_subgroups(
|
||||
|
||||
static NAMES_REGEX: LazyLock<Regex> = LazyLock::new(|| {
|
||||
Regex::new(
|
||||
r"(?:[А-Я][а-я]+\s?(?:[А-Я][\s.]*){2}(?:\(\s*\d\s*[а-я\s]+\))?(?:[\s,]+)?)+[\s.]*",
|
||||
r"(?:[А-Я][а-я]+\s?(?:[А-Я][\s.]*){2}(?:\(\s*\d\s*[а-я\s]+\))?(?:[\s,]+)?){1,2}+[\s.,]*",
|
||||
)
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
});
|
||||
|
||||
// Отчистка
|
||||
@@ -320,40 +319,61 @@ fn parse_name_and_subgroups(
|
||||
.replace(&text.replace(&[' ', '\t', '\n'], " "), " ")
|
||||
.to_string();
|
||||
|
||||
let (lesson_name, mut subgroups, lesson_type) = match NAMES_REGEX.captures(&text) {
|
||||
let (lesson_name, subgroups, lesson_type) = match NAMES_REGEX.captures(&text) {
|
||||
Some(captures) => {
|
||||
let capture = captures.get(0).unwrap();
|
||||
|
||||
let subgroups: Vec<LessonSubGroup> = {
|
||||
let subgroups: Vec<Option<LessonSubGroup>> = {
|
||||
let src = capture.as_str().replace(&[' ', '.'], "");
|
||||
|
||||
src.split(',')
|
||||
.map(|name| {
|
||||
let open_bracket_index = name.find('(');
|
||||
let mut shared_subgroup = false;
|
||||
let mut subgroups: [Option<LessonSubGroup>; 2] = [None, None];
|
||||
|
||||
let subgroup_number = open_bracket_index.map_or(0, |index| {
|
||||
name[(index + 1)..(index + 2)].parse::<u8>().unwrap()
|
||||
});
|
||||
for name in src.split(',') {
|
||||
let open_bracket_index = name.find('(');
|
||||
|
||||
let teacher_name = {
|
||||
let name_end = open_bracket_index.unwrap_or_else(|| name.len());
|
||||
let number: u8 = open_bracket_index
|
||||
.map_or(0, |index| name[(index + 1)..(index + 2)].parse().unwrap());
|
||||
|
||||
// Я ебал. Как же я долго до этого доходил.
|
||||
format!(
|
||||
"{} {}.{}.",
|
||||
name.get(..name_end - 4).unwrap(),
|
||||
name.get(name_end - 4..name_end - 2).unwrap(),
|
||||
name.get(name_end - 2..name_end).unwrap(),
|
||||
)
|
||||
};
|
||||
let teacher_name = {
|
||||
let name_end = open_bracket_index.unwrap_or_else(|| name.len());
|
||||
|
||||
LessonSubGroup {
|
||||
number: subgroup_number,
|
||||
cabinet: None,
|
||||
teacher: teacher_name,
|
||||
// Я ебал. Как же я долго до этого доходил.
|
||||
format!(
|
||||
"{} {}.{}.",
|
||||
name.get(..name_end - 4).unwrap(),
|
||||
name.get(name_end - 4..name_end - 2).unwrap(),
|
||||
name.get(name_end - 2..name_end).unwrap(),
|
||||
)
|
||||
};
|
||||
|
||||
let lesson = Some(LessonSubGroup {
|
||||
cabinet: None,
|
||||
teacher: Some(teacher_name),
|
||||
});
|
||||
|
||||
match number {
|
||||
0 => {
|
||||
subgroups[0] = lesson;
|
||||
subgroups[1] = None;
|
||||
shared_subgroup = true;
|
||||
break;
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
num => {
|
||||
// 1 - 1 = 0 | 2 - 1 = 1 | 3 - 1 = 2 (schedule index to array index)
|
||||
// 0 % 2 = 0 | 1 % 2 = 1 | 2 % 2 = 0 (clamp)
|
||||
let normalised = (num - 1) % 2;
|
||||
|
||||
subgroups[normalised as usize] = lesson;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if shared_subgroup {
|
||||
Vec::from([subgroups[0].take()])
|
||||
} else {
|
||||
Vec::from(subgroups)
|
||||
}
|
||||
};
|
||||
|
||||
let name = text[..capture.start()].trim().to_string();
|
||||
@@ -362,12 +382,15 @@ fn parse_name_and_subgroups(
|
||||
let lesson_type = if extra.len() > 4 {
|
||||
let result = guess_lesson_type(&extra);
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
if result.is_none() {
|
||||
#[cfg(not(debug_assertions))]
|
||||
sentry::capture_message(
|
||||
&*format!("Не удалось угадать тип пары '{}'!", extra),
|
||||
sentry::Level::Warning,
|
||||
);
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
log::warn!("Не удалось угадать тип пары '{}'!", extra);
|
||||
}
|
||||
|
||||
result
|
||||
@@ -380,40 +403,6 @@ fn parse_name_and_subgroups(
|
||||
None => (text, Vec::new(), None),
|
||||
};
|
||||
|
||||
// фикс, если у кого-то отсутствует индекс подгруппы
|
||||
|
||||
if subgroups.len() == 1 {
|
||||
let index = subgroups[0].number;
|
||||
|
||||
if index == 0 {
|
||||
subgroups[0].number = 1u8;
|
||||
} else {
|
||||
subgroups.push(LessonSubGroup {
|
||||
number: if index == 1 { 2 } else { 1 },
|
||||
cabinet: None,
|
||||
teacher: "Только у другой".to_string(),
|
||||
});
|
||||
}
|
||||
} else if subgroups.len() == 2 {
|
||||
// если индексы отсутствуют у обоих, ставим поочерёдно
|
||||
if subgroups[0].number == 0 && subgroups[1].number == 0 {
|
||||
subgroups[0].number = 1;
|
||||
subgroups[1].number = 2;
|
||||
}
|
||||
// если индекс отсутствует у первого, ставим 2, если у второго индекс 1 и наоборот
|
||||
else if subgroups[0].number == 0 {
|
||||
subgroups[0].number = if subgroups[1].number == 1 { 2 } else { 1 };
|
||||
}
|
||||
// если индекс отсутствует у второго, ставим 2, если у первого индекс 1 и наоборот
|
||||
else if subgroups[1].number == 0 {
|
||||
subgroups[1].number = if subgroups[0].number == 1 { 2 } else { 1 };
|
||||
}
|
||||
}
|
||||
|
||||
if subgroups.len() == 2 && subgroups[0].number == 2 && subgroups[1].number == 1 {
|
||||
subgroups.reverse()
|
||||
}
|
||||
|
||||
Ok((lesson_name, subgroups, lesson_type))
|
||||
}
|
||||
|
||||
@@ -586,22 +575,30 @@ fn convert_groups_to_teachers(
|
||||
let subgroups = group_lesson.subgroups.as_ref().unwrap();
|
||||
|
||||
for subgroup in subgroups {
|
||||
if subgroup.teacher == "Ошибка в расписании" {
|
||||
let teacher = match subgroup {
|
||||
None => continue,
|
||||
Some(subgroup) => match &subgroup.teacher {
|
||||
None => continue,
|
||||
Some(teacher) => teacher,
|
||||
},
|
||||
};
|
||||
|
||||
if teacher == "Ошибка в расписании" {
|
||||
continue;
|
||||
}
|
||||
|
||||
if !teachers.contains_key(&subgroup.teacher) {
|
||||
if !teachers.contains_key(teacher) {
|
||||
teachers.insert(
|
||||
subgroup.teacher.clone(),
|
||||
teacher.clone(),
|
||||
ScheduleEntry {
|
||||
name: subgroup.teacher.clone(),
|
||||
name: teacher.clone(),
|
||||
days: empty_days.to_vec(),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
let teacher_day = teachers
|
||||
.get_mut(&subgroup.teacher)
|
||||
.get_mut(teacher)
|
||||
.unwrap()
|
||||
.days
|
||||
.get_mut(index)
|
||||
@@ -620,9 +617,8 @@ fn convert_groups_to_teachers(
|
||||
|
||||
teachers.iter_mut().for_each(|(_, teacher)| {
|
||||
teacher.days.iter_mut().for_each(|day| {
|
||||
day.lessons.sort_by(|a, b| {
|
||||
a.default_range.as_ref().unwrap()[1].cmp(&b.default_range.as_ref().unwrap()[1])
|
||||
})
|
||||
day.lessons
|
||||
.sort_by(|a, b| a.range.as_ref().unwrap()[1].cmp(&b.range.as_ref().unwrap()[1]))
|
||||
})
|
||||
});
|
||||
|
||||
@@ -754,12 +750,20 @@ pub mod tests {
|
||||
assert_eq!(thursday.lessons.len(), 1);
|
||||
|
||||
let lesson = &thursday.lessons[0];
|
||||
assert_eq!(lesson.default_range.unwrap()[1], 3);
|
||||
assert_eq!(lesson.range.unwrap()[1], 3);
|
||||
assert!(lesson.subgroups.is_some());
|
||||
|
||||
let subgroups = lesson.subgroups.as_ref().unwrap();
|
||||
assert_eq!(subgroups.len(), 2);
|
||||
assert_eq!(subgroups[0].cabinet, Some("44".to_string()));
|
||||
assert_eq!(subgroups[1].cabinet, Some("43".to_string()));
|
||||
|
||||
assert_eq!(
|
||||
subgroups[0].as_ref().unwrap().cabinet,
|
||||
Some("44".to_string())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
subgroups[1].as_ref().unwrap().cabinet,
|
||||
Some("43".to_string())
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,25 +1,21 @@
|
||||
#[macro_export]
|
||||
macro_rules! or_continue {
|
||||
( $e:expr ) => {
|
||||
{
|
||||
if let Some(x) = $e {
|
||||
x
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
( $e:expr ) => {{
|
||||
if let Some(x) = $e {
|
||||
x
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! or_break {
|
||||
( $e:expr ) => {
|
||||
{
|
||||
if let Some(x) = $e {
|
||||
x
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
( $e:expr ) => {{
|
||||
if let Some(x) = $e {
|
||||
x
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
@@ -88,24 +88,21 @@ pub enum LessonType {
|
||||
|
||||
/// Экзамен.
|
||||
ExamDefault,
|
||||
|
||||
|
||||
/// Курсовой проект.
|
||||
CourseProject,
|
||||
|
||||
|
||||
/// Защита курсового проекта.
|
||||
CourseProjectDefense,
|
||||
}
|
||||
|
||||
#[derive(Clone, Hash, Debug, Serialize, Deserialize, ToSchema)]
|
||||
pub struct LessonSubGroup {
|
||||
/// Index of subgroup.
|
||||
pub number: u8,
|
||||
|
||||
/// Cabinet, if present.
|
||||
pub cabinet: Option<String>,
|
||||
|
||||
/// Full name of the teacher.
|
||||
pub teacher: String,
|
||||
pub teacher: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Hash, Debug, Serialize, Deserialize, ToSchema)]
|
||||
@@ -116,7 +113,7 @@ pub struct Lesson {
|
||||
pub lesson_type: LessonType,
|
||||
|
||||
/// Lesson indexes, if present.
|
||||
pub default_range: Option<[u8; 2]>,
|
||||
pub range: Option<[u8; 2]>,
|
||||
|
||||
/// Name.
|
||||
pub name: Option<String>,
|
||||
@@ -125,8 +122,8 @@ pub struct Lesson {
|
||||
pub time: LessonBoundaries,
|
||||
|
||||
/// List of subgroups.
|
||||
#[serde(rename = "subGroups")]
|
||||
pub subgroups: Option<Vec<LessonSubGroup>>,
|
||||
#[serde(rename = "subgroups")]
|
||||
pub subgroups: Option<Vec<Option<LessonSubGroup>>>,
|
||||
|
||||
/// Group name, if this is a schedule for teachers.
|
||||
pub group: Option<String>,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use regex::Regex;
|
||||
use std::ops::Deref;
|
||||
use std::sync::LazyLock;
|
||||
use regex::Regex;
|
||||
|
||||
/// XLS WorkSheet data.
|
||||
pub struct WorkSheet {
|
||||
|
||||
Reference in New Issue
Block a user