mirror of
https://github.com/n08i40k/schedule-parser-rusted.git
synced 2025-12-06 17:57:47 +03:00
feat(parser)!: rework of subgroups parsing
This commit is contained in:
@@ -16,6 +16,7 @@ serde_repr = "0.1.20"
|
|||||||
regex = "1.11.1"
|
regex = "1.11.1"
|
||||||
utoipa = { version = "5", features = ["chrono"] }
|
utoipa = { version = "5", features = ["chrono"] }
|
||||||
strsim = "0.11.1"
|
strsim = "0.11.1"
|
||||||
|
log = "0.4.26"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
criterion = "0.6"
|
criterion = "0.6"
|
||||||
|
|||||||
@@ -180,55 +180,54 @@ fn parse_lesson(
|
|||||||
group_column + 1,
|
group_column + 1,
|
||||||
);
|
);
|
||||||
|
|
||||||
match cabinets.len() {
|
let cab_count = cabinets.len();
|
||||||
// Если кабинетов нет, но есть подгруппы, назначаем им кабинет "??"
|
|
||||||
0 => {
|
if cab_count == 1 {
|
||||||
for subgroup in &mut subgroups {
|
|
||||||
subgroup.cabinet = Some("??".to_string());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Назначаем этот кабинет всем подгруппам
|
// Назначаем этот кабинет всем подгруппам
|
||||||
1 => {
|
let cab = Some(cabinets.get(0).unwrap().clone());
|
||||||
|
|
||||||
for subgroup in &mut subgroups {
|
for subgroup in &mut subgroups {
|
||||||
subgroup.cabinet =
|
if let Some(subgroup) = subgroup {
|
||||||
Some(cabinets.get(0).or(Some(&String::new())).unwrap().clone())
|
subgroup.cabinet = cab.clone()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
len => {
|
} else if cab_count == 2 {
|
||||||
// Если количество кабинетов совпадает с количеством подгрупп, назначаем кабинеты по порядку
|
while subgroups.len() < cab_count {
|
||||||
if len == subgroups.len() {
|
subgroups.push(subgroups.last().unwrap_or(&None).clone());
|
||||||
for subgroup in &mut subgroups {
|
|
||||||
subgroup.cabinet = Some(
|
|
||||||
cabinets
|
|
||||||
.get((subgroup.number - 1) as usize)
|
|
||||||
.unwrap()
|
|
||||||
.clone(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
// Если количество кабинетов больше количества подгрупп, делаем ещё одну подгруппу.
|
|
||||||
} else if len > subgroups.len() {
|
|
||||||
for index in 0..subgroups.len() {
|
|
||||||
subgroups[index].cabinet = Some(cabinets[index].clone());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
while cabinets.len() > subgroups.len() {
|
for i in 0..cab_count {
|
||||||
subgroups.push(LessonSubGroup {
|
let subgroup = subgroups.get_mut(i).unwrap();
|
||||||
number: (subgroups.len() + 1) as u8,
|
let cabinet = Some(cabinets.get(i).unwrap().clone());
|
||||||
cabinet: Some(cabinets[subgroups.len()].clone()),
|
|
||||||
teacher: "Ошибка в расписании".to_string(),
|
match subgroup {
|
||||||
|
None => {
|
||||||
|
let _ = subgroup.insert(LessonSubGroup {
|
||||||
|
teacher: None,
|
||||||
|
cabinet,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
Some(subgroup) => {
|
||||||
|
subgroup.cabinet = cabinet;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let lesson = Lesson {
|
let lesson = Lesson {
|
||||||
lesson_type: lesson_type.unwrap_or(lesson_boundaries.lesson_type.clone()),
|
lesson_type: lesson_type.unwrap_or(lesson_boundaries.lesson_type.clone()),
|
||||||
default_range,
|
range: default_range,
|
||||||
name: Some(name),
|
name: Some(name),
|
||||||
time: lesson_time,
|
time: lesson_time,
|
||||||
subgroups: Some(subgroups),
|
subgroups: if subgroups.len() == 2
|
||||||
|
&& subgroups.get(0).unwrap().is_none()
|
||||||
|
&& subgroups.get(1).unwrap().is_none()
|
||||||
|
{
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(subgroups)
|
||||||
|
},
|
||||||
group: None,
|
group: None,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -241,7 +240,7 @@ fn parse_lesson(
|
|||||||
Ok(Lessons(Vec::from([
|
Ok(Lessons(Vec::from([
|
||||||
Lesson {
|
Lesson {
|
||||||
lesson_type: Break,
|
lesson_type: Break,
|
||||||
default_range: None,
|
range: None,
|
||||||
name: None,
|
name: None,
|
||||||
time: LessonBoundaries {
|
time: LessonBoundaries {
|
||||||
start: prev_lesson.time.end,
|
start: prev_lesson.time.end,
|
||||||
@@ -264,11 +263,11 @@ fn parse_cabinets(worksheet: &WorkSheet, row_range: (u32, u32), column: u32) ->
|
|||||||
let clean = raw.replace("\n", " ");
|
let clean = raw.replace("\n", " ");
|
||||||
let parts: Vec<&str> = clean.split(" ").collect();
|
let parts: Vec<&str> = clean.split(" ").collect();
|
||||||
|
|
||||||
for part in parts {
|
parts.iter().take(2).for_each(|part| {
|
||||||
let clean_part = part.to_string().trim().to_string();
|
let clean_part = part.to_string().trim().to_string();
|
||||||
|
|
||||||
cabinets.push(clean_part);
|
cabinets.push(clean_part);
|
||||||
}
|
});
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -280,7 +279,7 @@ fn parse_cabinets(worksheet: &WorkSheet, row_range: (u32, u32), column: u32) ->
|
|||||||
/// Getting the "pure" name of the lesson and list of teachers from the text of the lesson cell.
|
/// Getting the "pure" name of the lesson and list of teachers from the text of the lesson cell.
|
||||||
fn parse_name_and_subgroups(
|
fn parse_name_and_subgroups(
|
||||||
text: &String,
|
text: &String,
|
||||||
) -> Result<(String, Vec<LessonSubGroup>, Option<LessonType>), ParseError> {
|
) -> Result<(String, Vec<Option<LessonSubGroup>>, Option<LessonType>), ParseError> {
|
||||||
// Части названия пары:
|
// Части названия пары:
|
||||||
// 1. Само название.
|
// 1. Само название.
|
||||||
// 2. Список преподавателей и подгрупп.
|
// 2. Список преподавателей и подгрупп.
|
||||||
@@ -308,7 +307,7 @@ fn parse_name_and_subgroups(
|
|||||||
|
|
||||||
static NAMES_REGEX: LazyLock<Regex> = LazyLock::new(|| {
|
static NAMES_REGEX: LazyLock<Regex> = LazyLock::new(|| {
|
||||||
Regex::new(
|
Regex::new(
|
||||||
r"(?:[А-Я][а-я]+\s?(?:[А-Я][\s.]*){2}(?:\(\s*\d\s*[а-я\s]+\))?(?:[\s,]+)?)+[\s.]*",
|
r"(?:[А-Я][а-я]+\s?(?:[А-Я][\s.]*){2}(?:\(\s*\d\s*[а-я\s]+\))?(?:[\s,]+)?){1,2}+[\s.,]*",
|
||||||
)
|
)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
});
|
});
|
||||||
@@ -320,20 +319,21 @@ fn parse_name_and_subgroups(
|
|||||||
.replace(&text.replace(&[' ', '\t', '\n'], " "), " ")
|
.replace(&text.replace(&[' ', '\t', '\n'], " "), " ")
|
||||||
.to_string();
|
.to_string();
|
||||||
|
|
||||||
let (lesson_name, mut subgroups, lesson_type) = match NAMES_REGEX.captures(&text) {
|
let (lesson_name, subgroups, lesson_type) = match NAMES_REGEX.captures(&text) {
|
||||||
Some(captures) => {
|
Some(captures) => {
|
||||||
let capture = captures.get(0).unwrap();
|
let capture = captures.get(0).unwrap();
|
||||||
|
|
||||||
let subgroups: Vec<LessonSubGroup> = {
|
let subgroups: Vec<Option<LessonSubGroup>> = {
|
||||||
let src = capture.as_str().replace(&[' ', '.'], "");
|
let src = capture.as_str().replace(&[' ', '.'], "");
|
||||||
|
|
||||||
src.split(',')
|
let mut shared_subgroup = false;
|
||||||
.map(|name| {
|
let mut subgroups: [Option<LessonSubGroup>; 2] = [None, None];
|
||||||
|
|
||||||
|
for name in src.split(',') {
|
||||||
let open_bracket_index = name.find('(');
|
let open_bracket_index = name.find('(');
|
||||||
|
|
||||||
let subgroup_number = open_bracket_index.map_or(0, |index| {
|
let number: u8 = open_bracket_index
|
||||||
name[(index + 1)..(index + 2)].parse::<u8>().unwrap()
|
.map_or(0, |index| name[(index + 1)..(index + 2)].parse().unwrap());
|
||||||
});
|
|
||||||
|
|
||||||
let teacher_name = {
|
let teacher_name = {
|
||||||
let name_end = open_bracket_index.unwrap_or_else(|| name.len());
|
let name_end = open_bracket_index.unwrap_or_else(|| name.len());
|
||||||
@@ -347,13 +347,33 @@ fn parse_name_and_subgroups(
|
|||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
|
||||||
LessonSubGroup {
|
let lesson = Some(LessonSubGroup {
|
||||||
number: subgroup_number,
|
|
||||||
cabinet: None,
|
cabinet: None,
|
||||||
teacher: teacher_name,
|
teacher: Some(teacher_name),
|
||||||
|
});
|
||||||
|
|
||||||
|
match number {
|
||||||
|
0 => {
|
||||||
|
subgroups[0] = lesson;
|
||||||
|
subgroups[1] = None;
|
||||||
|
shared_subgroup = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
num => {
|
||||||
|
// 1 - 1 = 0 | 2 - 1 = 1 | 3 - 1 = 2 (schedule index to array index)
|
||||||
|
// 0 % 2 = 0 | 1 % 2 = 1 | 2 % 2 = 0 (clamp)
|
||||||
|
let normalised = (num - 1) % 2;
|
||||||
|
|
||||||
|
subgroups[normalised as usize] = lesson;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if shared_subgroup {
|
||||||
|
Vec::from([subgroups[0].take()])
|
||||||
|
} else {
|
||||||
|
Vec::from(subgroups)
|
||||||
}
|
}
|
||||||
})
|
|
||||||
.collect()
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let name = text[..capture.start()].trim().to_string();
|
let name = text[..capture.start()].trim().to_string();
|
||||||
@@ -362,12 +382,15 @@ fn parse_name_and_subgroups(
|
|||||||
let lesson_type = if extra.len() > 4 {
|
let lesson_type = if extra.len() > 4 {
|
||||||
let result = guess_lesson_type(&extra);
|
let result = guess_lesson_type(&extra);
|
||||||
|
|
||||||
#[cfg(not(debug_assertions))]
|
|
||||||
if result.is_none() {
|
if result.is_none() {
|
||||||
|
#[cfg(not(debug_assertions))]
|
||||||
sentry::capture_message(
|
sentry::capture_message(
|
||||||
&*format!("Не удалось угадать тип пары '{}'!", extra),
|
&*format!("Не удалось угадать тип пары '{}'!", extra),
|
||||||
sentry::Level::Warning,
|
sentry::Level::Warning,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
log::warn!("Не удалось угадать тип пары '{}'!", extra);
|
||||||
}
|
}
|
||||||
|
|
||||||
result
|
result
|
||||||
@@ -380,40 +403,6 @@ fn parse_name_and_subgroups(
|
|||||||
None => (text, Vec::new(), None),
|
None => (text, Vec::new(), None),
|
||||||
};
|
};
|
||||||
|
|
||||||
// фикс, если у кого-то отсутствует индекс подгруппы
|
|
||||||
|
|
||||||
if subgroups.len() == 1 {
|
|
||||||
let index = subgroups[0].number;
|
|
||||||
|
|
||||||
if index == 0 {
|
|
||||||
subgroups[0].number = 1u8;
|
|
||||||
} else {
|
|
||||||
subgroups.push(LessonSubGroup {
|
|
||||||
number: if index == 1 { 2 } else { 1 },
|
|
||||||
cabinet: None,
|
|
||||||
teacher: "Только у другой".to_string(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} else if subgroups.len() == 2 {
|
|
||||||
// если индексы отсутствуют у обоих, ставим поочерёдно
|
|
||||||
if subgroups[0].number == 0 && subgroups[1].number == 0 {
|
|
||||||
subgroups[0].number = 1;
|
|
||||||
subgroups[1].number = 2;
|
|
||||||
}
|
|
||||||
// если индекс отсутствует у первого, ставим 2, если у второго индекс 1 и наоборот
|
|
||||||
else if subgroups[0].number == 0 {
|
|
||||||
subgroups[0].number = if subgroups[1].number == 1 { 2 } else { 1 };
|
|
||||||
}
|
|
||||||
// если индекс отсутствует у второго, ставим 2, если у первого индекс 1 и наоборот
|
|
||||||
else if subgroups[1].number == 0 {
|
|
||||||
subgroups[1].number = if subgroups[0].number == 1 { 2 } else { 1 };
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if subgroups.len() == 2 && subgroups[0].number == 2 && subgroups[1].number == 1 {
|
|
||||||
subgroups.reverse()
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok((lesson_name, subgroups, lesson_type))
|
Ok((lesson_name, subgroups, lesson_type))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -586,22 +575,30 @@ fn convert_groups_to_teachers(
|
|||||||
let subgroups = group_lesson.subgroups.as_ref().unwrap();
|
let subgroups = group_lesson.subgroups.as_ref().unwrap();
|
||||||
|
|
||||||
for subgroup in subgroups {
|
for subgroup in subgroups {
|
||||||
if subgroup.teacher == "Ошибка в расписании" {
|
let teacher = match subgroup {
|
||||||
|
None => continue,
|
||||||
|
Some(subgroup) => match &subgroup.teacher {
|
||||||
|
None => continue,
|
||||||
|
Some(teacher) => teacher,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
if teacher == "Ошибка в расписании" {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if !teachers.contains_key(&subgroup.teacher) {
|
if !teachers.contains_key(teacher) {
|
||||||
teachers.insert(
|
teachers.insert(
|
||||||
subgroup.teacher.clone(),
|
teacher.clone(),
|
||||||
ScheduleEntry {
|
ScheduleEntry {
|
||||||
name: subgroup.teacher.clone(),
|
name: teacher.clone(),
|
||||||
days: empty_days.to_vec(),
|
days: empty_days.to_vec(),
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
let teacher_day = teachers
|
let teacher_day = teachers
|
||||||
.get_mut(&subgroup.teacher)
|
.get_mut(teacher)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.days
|
.days
|
||||||
.get_mut(index)
|
.get_mut(index)
|
||||||
@@ -620,9 +617,8 @@ fn convert_groups_to_teachers(
|
|||||||
|
|
||||||
teachers.iter_mut().for_each(|(_, teacher)| {
|
teachers.iter_mut().for_each(|(_, teacher)| {
|
||||||
teacher.days.iter_mut().for_each(|day| {
|
teacher.days.iter_mut().for_each(|day| {
|
||||||
day.lessons.sort_by(|a, b| {
|
day.lessons
|
||||||
a.default_range.as_ref().unwrap()[1].cmp(&b.default_range.as_ref().unwrap()[1])
|
.sort_by(|a, b| a.range.as_ref().unwrap()[1].cmp(&b.range.as_ref().unwrap()[1]))
|
||||||
})
|
|
||||||
})
|
})
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -754,12 +750,20 @@ pub mod tests {
|
|||||||
assert_eq!(thursday.lessons.len(), 1);
|
assert_eq!(thursday.lessons.len(), 1);
|
||||||
|
|
||||||
let lesson = &thursday.lessons[0];
|
let lesson = &thursday.lessons[0];
|
||||||
assert_eq!(lesson.default_range.unwrap()[1], 3);
|
assert_eq!(lesson.range.unwrap()[1], 3);
|
||||||
assert!(lesson.subgroups.is_some());
|
assert!(lesson.subgroups.is_some());
|
||||||
|
|
||||||
let subgroups = lesson.subgroups.as_ref().unwrap();
|
let subgroups = lesson.subgroups.as_ref().unwrap();
|
||||||
assert_eq!(subgroups.len(), 2);
|
assert_eq!(subgroups.len(), 2);
|
||||||
assert_eq!(subgroups[0].cabinet, Some("44".to_string()));
|
|
||||||
assert_eq!(subgroups[1].cabinet, Some("43".to_string()));
|
assert_eq!(
|
||||||
|
subgroups[0].as_ref().unwrap().cabinet,
|
||||||
|
Some("44".to_string())
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
subgroups[1].as_ref().unwrap().cabinet,
|
||||||
|
Some("43".to_string())
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,25 +1,21 @@
|
|||||||
#[macro_export]
|
#[macro_export]
|
||||||
macro_rules! or_continue {
|
macro_rules! or_continue {
|
||||||
( $e:expr ) => {
|
( $e:expr ) => {{
|
||||||
{
|
|
||||||
if let Some(x) = $e {
|
if let Some(x) = $e {
|
||||||
x
|
x
|
||||||
} else {
|
} else {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}};
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[macro_export]
|
#[macro_export]
|
||||||
macro_rules! or_break {
|
macro_rules! or_break {
|
||||||
( $e:expr ) => {
|
( $e:expr ) => {{
|
||||||
{
|
|
||||||
if let Some(x) = $e {
|
if let Some(x) = $e {
|
||||||
x
|
x
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}};
|
||||||
}
|
|
||||||
}
|
}
|
||||||
@@ -98,14 +98,11 @@ pub enum LessonType {
|
|||||||
|
|
||||||
#[derive(Clone, Hash, Debug, Serialize, Deserialize, ToSchema)]
|
#[derive(Clone, Hash, Debug, Serialize, Deserialize, ToSchema)]
|
||||||
pub struct LessonSubGroup {
|
pub struct LessonSubGroup {
|
||||||
/// Index of subgroup.
|
|
||||||
pub number: u8,
|
|
||||||
|
|
||||||
/// Cabinet, if present.
|
/// Cabinet, if present.
|
||||||
pub cabinet: Option<String>,
|
pub cabinet: Option<String>,
|
||||||
|
|
||||||
/// Full name of the teacher.
|
/// Full name of the teacher.
|
||||||
pub teacher: String,
|
pub teacher: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Hash, Debug, Serialize, Deserialize, ToSchema)]
|
#[derive(Clone, Hash, Debug, Serialize, Deserialize, ToSchema)]
|
||||||
@@ -116,7 +113,7 @@ pub struct Lesson {
|
|||||||
pub lesson_type: LessonType,
|
pub lesson_type: LessonType,
|
||||||
|
|
||||||
/// Lesson indexes, if present.
|
/// Lesson indexes, if present.
|
||||||
pub default_range: Option<[u8; 2]>,
|
pub range: Option<[u8; 2]>,
|
||||||
|
|
||||||
/// Name.
|
/// Name.
|
||||||
pub name: Option<String>,
|
pub name: Option<String>,
|
||||||
@@ -125,8 +122,8 @@ pub struct Lesson {
|
|||||||
pub time: LessonBoundaries,
|
pub time: LessonBoundaries,
|
||||||
|
|
||||||
/// List of subgroups.
|
/// List of subgroups.
|
||||||
#[serde(rename = "subGroups")]
|
#[serde(rename = "subgroups")]
|
||||||
pub subgroups: Option<Vec<LessonSubGroup>>,
|
pub subgroups: Option<Vec<Option<LessonSubGroup>>>,
|
||||||
|
|
||||||
/// Group name, if this is a schedule for teachers.
|
/// Group name, if this is a schedule for teachers.
|
||||||
pub group: Option<String>,
|
pub group: Option<String>,
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
|
use regex::Regex;
|
||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
use std::sync::LazyLock;
|
use std::sync::LazyLock;
|
||||||
use regex::Regex;
|
|
||||||
|
|
||||||
/// XLS WorkSheet data.
|
/// XLS WorkSheet data.
|
||||||
pub struct WorkSheet {
|
pub struct WorkSheet {
|
||||||
|
|||||||
Reference in New Issue
Block a user