mirror of
https://github.com/n08i40k/schedule-parser-rusted.git
synced 2025-12-06 17:57:47 +03:00
Compare commits
3 Commits
release/v1
...
d23092a32a
| Author | SHA1 | Date | |
|---|---|---|---|
|
d23092a32a
|
|||
|
01bfa38969
|
|||
|
851ec9225f
|
22
Cargo.lock
generated
22
Cargo.lock
generated
@@ -1258,15 +1258,6 @@ dependencies = [
|
||||
"slab",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fuzzy-matcher"
|
||||
version = "0.3.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "54614a3312934d066701a80f20f15fa3b56d67ac7722b39eea5b4c9dd1d66c94"
|
||||
dependencies = [
|
||||
"thread_local",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gcc"
|
||||
version = "0.3.55"
|
||||
@@ -2881,10 +2872,11 @@ dependencies = [
|
||||
"chrono",
|
||||
"criterion",
|
||||
"derive_more",
|
||||
"fuzzy-matcher",
|
||||
"regex",
|
||||
"sentry",
|
||||
"serde",
|
||||
"serde_repr",
|
||||
"strsim",
|
||||
"utoipa",
|
||||
]
|
||||
|
||||
@@ -3383,16 +3375,6 @@ dependencies = [
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thread_local"
|
||||
version = "1.1.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.1.45"
|
||||
|
||||
@@ -10,11 +10,12 @@ test-utils = []
|
||||
calamine = "0.26"
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
derive_more = { version = "2", features = ["full"] }
|
||||
sentry = "0.38"
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_repr = "0.1.20"
|
||||
fuzzy-matcher = "0.3.7"
|
||||
regex = "1.11.1"
|
||||
utoipa = { version = "5", features = ["chrono"] }
|
||||
strsim = "0.11.1"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.6"
|
||||
|
||||
@@ -1,112 +1,21 @@
|
||||
use crate::LessonParseResult::{Lessons, Street};
|
||||
use crate::schema::LessonType::Break;
|
||||
use crate::schema::internal::{BoundariesCellInfo, DayCellInfo, GroupCellInfo};
|
||||
use crate::schema::{
|
||||
Day, ErrorCell, ErrorCellPos, Lesson, LessonBoundaries, LessonSubGroup, LessonType, ParseError,
|
||||
ParseResult, ScheduleEntry,
|
||||
};
|
||||
use crate::worksheet::WorkSheet;
|
||||
use calamine::{Reader, Xls, open_workbook_from_rs};
|
||||
use chrono::{DateTime, Duration, NaiveDateTime, Utc};
|
||||
use fuzzy_matcher::FuzzyMatcher;
|
||||
use fuzzy_matcher::skim::SkimMatcherV2;
|
||||
use chrono::{DateTime, Duration, NaiveDate, NaiveTime, Utc};
|
||||
use regex::Regex;
|
||||
use std::collections::HashMap;
|
||||
use std::io::Cursor;
|
||||
use std::ops::Deref;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
mod macros;
|
||||
pub mod schema;
|
||||
|
||||
/// Data cell storing the group name.
|
||||
struct GroupCellInfo {
|
||||
/// Column index.
|
||||
column: u32,
|
||||
|
||||
/// Text in the cell.
|
||||
name: String,
|
||||
}
|
||||
|
||||
/// Data cell storing the line.
|
||||
struct DayCellInfo {
|
||||
/// Line index.
|
||||
row: u32,
|
||||
|
||||
/// Column index.
|
||||
column: u32,
|
||||
|
||||
/// Day name.
|
||||
name: String,
|
||||
|
||||
/// Date of the day.
|
||||
date: DateTime<Utc>,
|
||||
}
|
||||
|
||||
/// Data on the time of lessons from the second column of the schedule.
|
||||
struct BoundariesCellInfo {
|
||||
/// Temporary segment of the lesson.
|
||||
time_range: LessonBoundaries,
|
||||
|
||||
/// Type of lesson.
|
||||
lesson_type: LessonType,
|
||||
|
||||
/// The lesson index.
|
||||
default_index: Option<u32>,
|
||||
|
||||
/// The frame of the cell.
|
||||
xls_range: ((u32, u32), (u32, u32)),
|
||||
}
|
||||
|
||||
struct WorkSheet {
|
||||
pub data: calamine::Range<calamine::Data>,
|
||||
pub merges: Vec<calamine::Dimensions>,
|
||||
}
|
||||
|
||||
impl Deref for WorkSheet {
|
||||
type Target = calamine::Range<calamine::Data>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.data
|
||||
}
|
||||
}
|
||||
|
||||
/// Getting a line from the required cell.
|
||||
fn get_string_from_cell(worksheet: &WorkSheet, row: u32, col: u32) -> Option<String> {
|
||||
let cell_data = if let Some(data) = worksheet.get((row as usize, col as usize)) {
|
||||
data.to_string()
|
||||
} else {
|
||||
return None;
|
||||
};
|
||||
|
||||
if cell_data.trim().is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
static NL_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[\n\r]+").unwrap());
|
||||
static SP_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\s+").unwrap());
|
||||
|
||||
let trimmed_data = SP_RE
|
||||
.replace_all(&NL_RE.replace_all(&cell_data, " "), " ")
|
||||
.trim()
|
||||
.to_string();
|
||||
|
||||
if trimmed_data.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(trimmed_data)
|
||||
}
|
||||
}
|
||||
|
||||
/// Obtaining the boundaries of the cell along its upper left coordinate.
|
||||
fn get_merge_from_start(worksheet: &WorkSheet, row: u32, column: u32) -> ((u32, u32), (u32, u32)) {
|
||||
return match worksheet
|
||||
.merges
|
||||
.iter()
|
||||
.find(|merge| merge.start.0 == row && merge.start.1 == column)
|
||||
{
|
||||
Some(merge) => (merge.start, (merge.end.0 + 1, merge.end.1 + 1)),
|
||||
None => ((row, column), (row + 1, column + 1))
|
||||
};
|
||||
}
|
||||
mod worksheet;
|
||||
|
||||
/// Obtaining a "skeleton" schedule from the working sheet.
|
||||
fn parse_skeleton(
|
||||
@@ -123,7 +32,7 @@ fn parse_skeleton(
|
||||
while row < worksheet_end.0 {
|
||||
row += 1;
|
||||
|
||||
let day_full_name = or_continue!(get_string_from_cell(&worksheet, row, 0));
|
||||
let day_full_name = or_continue!(worksheet.get_string_from_cell(row, 0));
|
||||
|
||||
// parse groups row when days column will found
|
||||
if groups.is_empty() {
|
||||
@@ -133,7 +42,7 @@ fn parse_skeleton(
|
||||
for column in (worksheet_start.1 + 2)..=worksheet_end.1 {
|
||||
groups.push(GroupCellInfo {
|
||||
column,
|
||||
name: or_continue!(get_string_from_cell(&worksheet, row, column)),
|
||||
name: or_continue!(worksheet.get_string_from_cell(row, column)),
|
||||
});
|
||||
}
|
||||
|
||||
@@ -146,13 +55,12 @@ fn parse_skeleton(
|
||||
|
||||
let name = day_full_name[..space_index].to_string();
|
||||
|
||||
let date_raw = day_full_name[space_index + 1..].to_string();
|
||||
let date_add = format!("{} 00:00:00", date_raw);
|
||||
let date_slice = &day_full_name[space_index + 1..];
|
||||
let date = or_break!(NaiveDate::parse_from_str(date_slice, "%d.%m.%Y").ok())
|
||||
.and_time(NaiveTime::default())
|
||||
.and_utc();
|
||||
|
||||
let date =
|
||||
or_break!(NaiveDateTime::parse_from_str(&*date_add, "%d.%m.%Y %H:%M:%S").ok());
|
||||
|
||||
(name, date.and_utc())
|
||||
(name, date)
|
||||
};
|
||||
|
||||
days.push(DayCellInfo {
|
||||
@@ -178,103 +86,75 @@ enum LessonParseResult {
|
||||
Street(String),
|
||||
}
|
||||
|
||||
trait StringInnerSlice {
|
||||
/// Obtaining a line from the line on the initial and final index.
|
||||
fn inner_slice(&self, from: usize, to: usize) -> Self;
|
||||
}
|
||||
|
||||
impl StringInnerSlice for String {
|
||||
fn inner_slice(&self, from: usize, to: usize) -> Self {
|
||||
self.chars()
|
||||
.take(from)
|
||||
.chain(self.chars().skip(to))
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
// noinspection GrazieInspection
|
||||
/// Obtaining a non-standard type of lesson by name.
|
||||
fn guess_lesson_type(name: &String) -> Option<(String, LessonType)> {
|
||||
let map: HashMap<String, LessonType> = HashMap::from([
|
||||
("(консультация)".to_string(), LessonType::Consultation),
|
||||
(
|
||||
"самостоятельная работа".to_string(),
|
||||
LessonType::IndependentWork,
|
||||
),
|
||||
("зачет".to_string(), LessonType::Exam),
|
||||
("зачет с оценкой".to_string(), LessonType::ExamWithGrade),
|
||||
("экзамен".to_string(), LessonType::ExamDefault),
|
||||
]);
|
||||
fn guess_lesson_type(text: &String) -> Option<LessonType> {
|
||||
static MAP: LazyLock<HashMap<&str, LessonType>> = LazyLock::new(|| {
|
||||
HashMap::from([
|
||||
("консультация", LessonType::Consultation),
|
||||
("самостоятельная работа", LessonType::IndependentWork),
|
||||
("зачет", LessonType::Exam),
|
||||
("зачет с оценкой", LessonType::ExamWithGrade),
|
||||
("экзамен", LessonType::ExamDefault),
|
||||
("курсовой проект", LessonType::CourseProject),
|
||||
("защита курсового проекта", LessonType::CourseProjectDefense),
|
||||
])
|
||||
});
|
||||
|
||||
let matcher = SkimMatcherV2::default();
|
||||
let name_lower = name.to_lowercase();
|
||||
let name_lower = text.to_lowercase();
|
||||
|
||||
type SearchResult<'a> = (&'a LessonType, i64, Vec<usize>);
|
||||
|
||||
let mut search_results: Vec<SearchResult> = map
|
||||
match MAP
|
||||
.iter()
|
||||
.map(|entry| -> SearchResult {
|
||||
if let Some((score, indices)) = matcher.fuzzy_indices(&*name_lower, entry.0) {
|
||||
return (entry.1, score, indices);
|
||||
}
|
||||
|
||||
(entry.1, 0, Vec::new())
|
||||
})
|
||||
.collect();
|
||||
search_results.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
|
||||
let guessed_type = search_results.first().unwrap();
|
||||
|
||||
if guessed_type.1 > 80 {
|
||||
Some((
|
||||
name.inner_slice(guessed_type.2[0], guessed_type.2[guessed_type.2.len() - 1]),
|
||||
guessed_type.0.clone(),
|
||||
))
|
||||
} else {
|
||||
None
|
||||
.map(|(text, lesson_type)| (lesson_type, strsim::levenshtein(text, &*name_lower)))
|
||||
.filter(|x| x.1 <= 4)
|
||||
.min_by_key(|(_, score)| *score)
|
||||
{
|
||||
None => None,
|
||||
Some(v) => Some(v.0.clone()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Getting a pair or street from a cell.
|
||||
fn parse_lesson(
|
||||
worksheet: &WorkSheet,
|
||||
day: &mut Day,
|
||||
day: &Day,
|
||||
day_boundaries: &Vec<BoundariesCellInfo>,
|
||||
lesson_boundaries: &BoundariesCellInfo,
|
||||
column: u32,
|
||||
group_column: u32,
|
||||
) -> Result<LessonParseResult, ParseError> {
|
||||
let row = lesson_boundaries.xls_range.0.0;
|
||||
|
||||
let (name, lesson_type) = {
|
||||
let full_name = match get_string_from_cell(&worksheet, row, column) {
|
||||
let name = {
|
||||
let cell_data = match worksheet.get_string_from_cell(row, group_column) {
|
||||
Some(x) => x,
|
||||
None => return Ok(Lessons(Vec::new())),
|
||||
};
|
||||
|
||||
static OTHER_STREET_RE: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r"^[А-Я][а-я]+,?\s?[0-9]+$").unwrap());
|
||||
LazyLock::new(|| Regex::new(r"^[А-Я][а-я]+[,\s]\d+$").unwrap());
|
||||
|
||||
if OTHER_STREET_RE.is_match(&full_name) {
|
||||
return Ok(Street(full_name));
|
||||
if OTHER_STREET_RE.is_match(&cell_data) {
|
||||
return Ok(Street(cell_data));
|
||||
}
|
||||
|
||||
match guess_lesson_type(&full_name) {
|
||||
Some(x) => x,
|
||||
None => (full_name, lesson_boundaries.lesson_type.clone()),
|
||||
}
|
||||
cell_data
|
||||
};
|
||||
|
||||
let cell_range = worksheet.get_merge_from_start(row, group_column);
|
||||
|
||||
let (default_range, lesson_time) = {
|
||||
let cell_range = get_merge_from_start(worksheet, row, column);
|
||||
|
||||
let end_time_arr = day_boundaries
|
||||
.iter()
|
||||
.filter(|time| time.xls_range.1.0 == cell_range.1.0)
|
||||
.collect::<Vec<&BoundariesCellInfo>>();
|
||||
|
||||
let end_time = end_time_arr
|
||||
.first()
|
||||
.ok_or(ParseError::LessonTimeNotFound(ErrorCellPos { row, column }))?;
|
||||
let end_time =
|
||||
end_time_arr
|
||||
.first()
|
||||
.ok_or(ParseError::LessonTimeNotFound(ErrorCellPos {
|
||||
row,
|
||||
column: group_column,
|
||||
}))?;
|
||||
|
||||
let range: Option<[u8; 2]> = if lesson_boundaries.default_index != None {
|
||||
let default = lesson_boundaries.default_index.unwrap() as u8;
|
||||
@@ -291,10 +171,14 @@ fn parse_lesson(
|
||||
Ok((range, time))
|
||||
}?;
|
||||
|
||||
let (name, mut subgroups) = parse_name_and_subgroups(&name)?;
|
||||
let (name, mut subgroups, lesson_type) = parse_name_and_subgroups(&name)?;
|
||||
|
||||
{
|
||||
let cabinets: Vec<String> = parse_cabinets(worksheet, row, column + 1);
|
||||
let cabinets: Vec<String> = parse_cabinets(
|
||||
worksheet,
|
||||
(cell_range.0.0, cell_range.1.0),
|
||||
group_column + 1,
|
||||
);
|
||||
|
||||
match cabinets.len() {
|
||||
// Если кабинетов нет, но есть подгруппы, назначаем им кабинет "??"
|
||||
@@ -340,7 +224,7 @@ fn parse_lesson(
|
||||
};
|
||||
|
||||
let lesson = Lesson {
|
||||
lesson_type,
|
||||
lesson_type: lesson_type.unwrap_or(lesson_boundaries.lesson_type.clone()),
|
||||
default_range,
|
||||
name: Some(name),
|
||||
time: lesson_time,
|
||||
@@ -371,10 +255,12 @@ fn parse_lesson(
|
||||
}
|
||||
|
||||
/// Obtaining a list of cabinets to the right of the lesson cell.
|
||||
fn parse_cabinets(worksheet: &WorkSheet, row: u32, column: u32) -> Vec<String> {
|
||||
fn parse_cabinets(worksheet: &WorkSheet, row_range: (u32, u32), column: u32) -> Vec<String> {
|
||||
let mut cabinets: Vec<String> = Vec::new();
|
||||
|
||||
if let Some(raw) = get_string_from_cell(&worksheet, row, column) {
|
||||
for row in row_range.0..row_range.1 {
|
||||
let raw = or_continue!(worksheet.get_string_from_cell(row, column));
|
||||
|
||||
let clean = raw.replace("\n", " ");
|
||||
let parts: Vec<&str> = clean.split(" ").collect();
|
||||
|
||||
@@ -383,59 +269,117 @@ fn parse_cabinets(worksheet: &WorkSheet, row: u32, column: u32) -> Vec<String> {
|
||||
|
||||
cabinets.push(clean_part);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
cabinets
|
||||
}
|
||||
|
||||
//noinspection GrazieInspection
|
||||
/// Getting the "pure" name of the lesson and list of teachers from the text of the lesson cell.
|
||||
fn parse_name_and_subgroups(name: &String) -> Result<(String, Vec<LessonSubGroup>), ParseError> {
|
||||
static LESSON_RE: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r"(?:[А-Я][а-я]+[А-Я]{2}(?:\([0-9][а-я]+\))?)+$").unwrap());
|
||||
static TEACHER_RE: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r"([А-Я][а-я]+)([А-Я])([А-Я])(?:\(([0-9])[а-я]+\))?").unwrap());
|
||||
static CLEAN_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[\s.,]+").unwrap());
|
||||
static END_CLEAN_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[.\s]+$").unwrap());
|
||||
fn parse_name_and_subgroups(
|
||||
text: &String,
|
||||
) -> Result<(String, Vec<LessonSubGroup>, Option<LessonType>), ParseError> {
|
||||
// Части названия пары:
|
||||
// 1. Само название.
|
||||
// 2. Список преподавателей и подгрупп.
|
||||
// 3. "Модификатор" (чаще всего).
|
||||
//
|
||||
// Регулярное выражение для получения ФИО преподавателей и номеров подгрупп (aka. второй части).
|
||||
// (?:[А-Я][а-я]+\s?(?:[А-Я][\s.]*){2}(?:\(\d\s?[а-я]+\))?(?:, )?)+[\s.]*
|
||||
//
|
||||
// Подробнее:
|
||||
// (?:
|
||||
// [А-Я][а-я]+ - Фамилия.
|
||||
// \s? - Кто знает, будет ли там пробел.
|
||||
// (?:[А-Я][\s.]*){2} - Имя и отчество с учётом случайных пробелов и точек.
|
||||
// (?:
|
||||
// \( - Открытие подгруппы.
|
||||
// \s? - Кто знает, будет ли там пробел.
|
||||
// \d - Номер подгруппы.
|
||||
// \s? - Кто знает, будет ли там пробел.
|
||||
// [а-я\s]+ - Слово "подгруппа" с учётов ошибок.
|
||||
// \) - Закрытие подгруппы.
|
||||
// )? - Явное указание подгруппы может отсутствовать по понятным причинам.
|
||||
// (?:, )? - Разделители между отдельными частями.
|
||||
// )+
|
||||
// [\s.]* - Забираем с собой всякий мусор, что бы не передать его в третью часть.
|
||||
|
||||
let (teachers, lesson_name) = {
|
||||
let clean_name = CLEAN_RE.replace_all(&name, "").to_string();
|
||||
static NAMES_REGEX: LazyLock<Regex> = LazyLock::new(|| {
|
||||
Regex::new(
|
||||
r"(?:[А-Я][а-я]+\s?(?:[А-Я][\s.]*){2}(?:\(\s*\d\s*[а-я\s]+\))?(?:[\s,]+)?)+[\s.]*",
|
||||
)
|
||||
.unwrap()
|
||||
});
|
||||
|
||||
if let Some(captures) = LESSON_RE.captures(&clean_name) {
|
||||
// Отчистка
|
||||
static CLEAN_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[\s\n\t]+").unwrap());
|
||||
|
||||
let text = CLEAN_RE
|
||||
.replace(&text.replace(&[' ', '\t', '\n'], " "), " ")
|
||||
.to_string();
|
||||
|
||||
let (lesson_name, mut subgroups, lesson_type) = match NAMES_REGEX.captures(&text) {
|
||||
Some(captures) => {
|
||||
let capture = captures.get(0).unwrap();
|
||||
let capture_str = capture.as_str().to_string();
|
||||
let capture_name: String = capture_str.chars().take(5).collect();
|
||||
|
||||
(
|
||||
END_CLEAN_RE.replace(&capture_str, "").to_string(),
|
||||
END_CLEAN_RE
|
||||
.replace(&name[0..name.find(&*capture_name).unwrap()], "")
|
||||
.to_string(),
|
||||
)
|
||||
} else {
|
||||
return Ok((END_CLEAN_RE.replace(&name, "").to_string(), Vec::new()));
|
||||
let subgroups: Vec<LessonSubGroup> = {
|
||||
let src = capture.as_str().replace(&[' ', '.'], "");
|
||||
|
||||
src.split(',')
|
||||
.map(|name| {
|
||||
let open_bracket_index = name.find('(');
|
||||
|
||||
let subgroup_number = open_bracket_index.map_or(0, |index| {
|
||||
name[(index + 1)..(index + 2)].parse::<u8>().unwrap()
|
||||
});
|
||||
|
||||
let teacher_name = {
|
||||
let name_end = open_bracket_index.unwrap_or_else(|| name.len());
|
||||
|
||||
// Я ебал. Как же я долго до этого доходил.
|
||||
format!(
|
||||
"{} {}.{}.",
|
||||
name.get(..name_end - 4).unwrap(),
|
||||
name.get(name_end - 4..name_end - 2).unwrap(),
|
||||
name.get(name_end - 2..name_end).unwrap(),
|
||||
)
|
||||
};
|
||||
|
||||
LessonSubGroup {
|
||||
number: subgroup_number,
|
||||
cabinet: None,
|
||||
teacher: teacher_name,
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
};
|
||||
|
||||
let name = text[..capture.start()].trim().to_string();
|
||||
let extra = text[capture.end()..].trim().to_string();
|
||||
|
||||
let lesson_type = if extra.len() > 4 {
|
||||
let result = guess_lesson_type(&extra);
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
if result.is_none() {
|
||||
sentry::capture_message(
|
||||
&*format!("Не удалось угадать тип пары '{}'!", extra),
|
||||
sentry::Level::Warning,
|
||||
);
|
||||
}
|
||||
|
||||
result
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
(name, subgroups, lesson_type)
|
||||
}
|
||||
None => (text, Vec::new(), None),
|
||||
};
|
||||
|
||||
let mut subgroups: Vec<LessonSubGroup> = Vec::new();
|
||||
|
||||
let teacher_it = TEACHER_RE.captures_iter(&teachers);
|
||||
|
||||
for captures in teacher_it {
|
||||
subgroups.push(LessonSubGroup {
|
||||
number: match captures.get(4) {
|
||||
Some(capture) => capture.as_str().to_string().parse::<u8>().unwrap(),
|
||||
None => 0,
|
||||
},
|
||||
cabinet: None,
|
||||
teacher: format!(
|
||||
"{} {}.{}.",
|
||||
captures.get(1).unwrap().as_str().to_string(),
|
||||
captures.get(2).unwrap().as_str().to_string(),
|
||||
captures.get(3).unwrap().as_str().to_string()
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
// фикс, если у кого-то отсутствует индекс подгруппы
|
||||
|
||||
if subgroups.len() == 1 {
|
||||
@@ -470,9 +414,15 @@ fn parse_name_and_subgroups(name: &String) -> Result<(String, Vec<LessonSubGroup
|
||||
subgroups.reverse()
|
||||
}
|
||||
|
||||
Ok((lesson_name, subgroups))
|
||||
Ok((lesson_name, subgroups, lesson_type))
|
||||
}
|
||||
|
||||
/// Getting the start and end of a pair from a cell in the first column of a document.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `cell_data`: text in cell.
|
||||
/// * `date`: date of the current day.
|
||||
fn parse_lesson_boundaries_cell(
|
||||
cell_data: &String,
|
||||
date: DateTime<Utc>,
|
||||
@@ -503,28 +453,31 @@ fn parse_lesson_boundaries_cell(
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_day_boundaries_column(
|
||||
/// Parse the column of the document to obtain a list of day's lesson boundaries.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `worksheet`: document.
|
||||
/// * `date`: date of the current day.
|
||||
/// * `row_range`: row boundaries of the current day.
|
||||
/// * `column`: column with the required data.
|
||||
fn parse_day_boundaries(
|
||||
worksheet: &WorkSheet,
|
||||
day_markup: &DayCellInfo,
|
||||
lesson_time_column: u32,
|
||||
row_distance: u32,
|
||||
date: DateTime<Utc>,
|
||||
row_range: (u32, u32),
|
||||
column: u32,
|
||||
) -> Result<Vec<BoundariesCellInfo>, ParseError> {
|
||||
let mut day_times: Vec<BoundariesCellInfo> = Vec::new();
|
||||
|
||||
for row in day_markup.row..(day_markup.row + row_distance) {
|
||||
let time_cell = if let Some(str) = get_string_from_cell(&worksheet, row, lesson_time_column)
|
||||
{
|
||||
for row in row_range.0..row_range.1 {
|
||||
let time_cell = if let Some(str) = worksheet.get_string_from_cell(row, column) {
|
||||
str
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let lesson_time = parse_lesson_boundaries_cell(&time_cell, day_markup.date.clone()).ok_or(
|
||||
ParseError::LessonBoundaries(ErrorCell::new(
|
||||
row,
|
||||
lesson_time_column,
|
||||
time_cell.clone(),
|
||||
)),
|
||||
let lesson_time = parse_lesson_boundaries_cell(&time_cell, date.clone()).ok_or(
|
||||
ParseError::LessonBoundaries(ErrorCell::new(row, column, time_cell.clone())),
|
||||
)?;
|
||||
|
||||
// type
|
||||
@@ -553,14 +506,20 @@ fn parse_day_boundaries_column(
|
||||
time_range: lesson_time,
|
||||
lesson_type,
|
||||
default_index,
|
||||
xls_range: get_merge_from_start(&worksheet, row, lesson_time_column),
|
||||
xls_range: worksheet.get_merge_from_start(row, column),
|
||||
});
|
||||
}
|
||||
|
||||
return Ok(day_times);
|
||||
Ok(day_times)
|
||||
}
|
||||
|
||||
fn parse_week_boundaries_column(
|
||||
/// Parse the column of the document to obtain a list of week's lesson boundaries.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `worksheet`: document.
|
||||
/// * `week_markup`: markup of the current week.
|
||||
fn parse_week_boundaries(
|
||||
worksheet: &WorkSheet,
|
||||
week_markup: &Vec<DayCellInfo>,
|
||||
) -> Result<Vec<Vec<BoundariesCellInfo>>, ParseError> {
|
||||
@@ -572,16 +531,20 @@ fn parse_week_boundaries_column(
|
||||
for day_index in 0..week_markup.len() {
|
||||
let day_markup = &week_markup[day_index];
|
||||
|
||||
// Если текущий день не последнему, то индекс строки следующего дня минус индекс строки текущего дня.
|
||||
// Если текущий день - последний, то индекс последней строки документа минус индекс строки текущего дня.
|
||||
let row_distance = if day_index != week_markup.len() - 1 {
|
||||
// Если текущий день не последнему, то индекс строки следующего дня.
|
||||
// Если текущий день - последний, то индекс последней строки документа.
|
||||
let end_row = if day_index != week_markup.len() - 1 {
|
||||
week_markup[day_index + 1].row
|
||||
} else {
|
||||
worksheet_end_row
|
||||
} - day_markup.row;
|
||||
};
|
||||
|
||||
let day_boundaries =
|
||||
parse_day_boundaries_column(&worksheet, day_markup, lesson_time_column, row_distance)?;
|
||||
let day_boundaries = parse_day_boundaries(
|
||||
&worksheet,
|
||||
day_markup.date.clone(),
|
||||
(day_markup.row, end_row),
|
||||
lesson_time_column,
|
||||
)?;
|
||||
|
||||
result.push(day_boundaries);
|
||||
}
|
||||
@@ -709,7 +672,7 @@ pub fn parse_xls(buffer: &Vec<u8>) -> Result<ParseResult, ParseError> {
|
||||
};
|
||||
|
||||
let (week_markup, groups_markup) = parse_skeleton(&worksheet)?;
|
||||
let week_boundaries = parse_week_boundaries_column(&worksheet, &week_markup)?;
|
||||
let week_boundaries = parse_week_boundaries(&worksheet, &week_markup)?;
|
||||
|
||||
let mut groups: HashMap<String, ScheduleEntry> = HashMap::new();
|
||||
|
||||
@@ -734,7 +697,7 @@ pub fn parse_xls(buffer: &Vec<u8>) -> Result<ParseResult, ParseError> {
|
||||
for lesson_boundaries in day_boundaries {
|
||||
match &mut parse_lesson(
|
||||
&worksheet,
|
||||
&mut day,
|
||||
&day,
|
||||
&day_boundaries,
|
||||
&lesson_boundaries,
|
||||
group_markup.column,
|
||||
@@ -786,9 +749,17 @@ pub mod tests {
|
||||
assert!(result.groups.contains_key("ИС-214/23"));
|
||||
|
||||
let group = result.groups.get("ИС-214/23").unwrap();
|
||||
let thursday = group.days.get(3).unwrap();
|
||||
|
||||
let thursday = group.days.get(3).unwrap();
|
||||
assert_eq!(thursday.lessons.len(), 1);
|
||||
assert_eq!(thursday.lessons[0].default_range.unwrap()[1], 3);
|
||||
|
||||
let lesson = &thursday.lessons[0];
|
||||
assert_eq!(lesson.default_range.unwrap()[1], 3);
|
||||
assert!(lesson.subgroups.is_some());
|
||||
|
||||
let subgroups = lesson.subgroups.as_ref().unwrap();
|
||||
assert_eq!(subgroups.len(), 2);
|
||||
assert_eq!(subgroups[0].cabinet, Some("44".to_string()));
|
||||
assert_eq!(subgroups[1].cabinet, Some("43".to_string()));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,6 +6,50 @@ use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use utoipa::ToSchema;
|
||||
|
||||
pub(crate) mod internal {
|
||||
use crate::schema::{LessonBoundaries, LessonType};
|
||||
use chrono::{DateTime, Utc};
|
||||
|
||||
/// Data cell storing the group name.
|
||||
pub struct GroupCellInfo {
|
||||
/// Column index.
|
||||
pub column: u32,
|
||||
|
||||
/// Text in the cell.
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
/// Data cell storing the line.
|
||||
pub struct DayCellInfo {
|
||||
/// Line index.
|
||||
pub row: u32,
|
||||
|
||||
/// Column index.
|
||||
pub column: u32,
|
||||
|
||||
/// Day name.
|
||||
pub name: String,
|
||||
|
||||
/// Date of the day.
|
||||
pub date: DateTime<Utc>,
|
||||
}
|
||||
|
||||
/// Data on the time of lessons from the second column of the schedule.
|
||||
pub struct BoundariesCellInfo {
|
||||
/// Temporary segment of the lesson.
|
||||
pub time_range: LessonBoundaries,
|
||||
|
||||
/// Type of lesson.
|
||||
pub lesson_type: LessonType,
|
||||
|
||||
/// The lesson index.
|
||||
pub default_index: Option<u32>,
|
||||
|
||||
/// The frame of the cell.
|
||||
pub xls_range: ((u32, u32), (u32, u32)),
|
||||
}
|
||||
}
|
||||
|
||||
/// The beginning and end of the lesson.
|
||||
#[derive(Clone, Hash, Debug, Serialize, Deserialize, ToSchema)]
|
||||
pub struct LessonBoundaries {
|
||||
@@ -44,6 +88,12 @@ pub enum LessonType {
|
||||
|
||||
/// Экзамен.
|
||||
ExamDefault,
|
||||
|
||||
/// Курсовой проект.
|
||||
CourseProject,
|
||||
|
||||
/// Защита курсового проекта.
|
||||
CourseProjectDefense,
|
||||
}
|
||||
|
||||
#[derive(Clone, Hash, Debug, Serialize, Deserialize, ToSchema)]
|
||||
|
||||
58
schedule-parser/src/worksheet.rs
Normal file
58
schedule-parser/src/worksheet.rs
Normal file
@@ -0,0 +1,58 @@
|
||||
use std::ops::Deref;
|
||||
use std::sync::LazyLock;
|
||||
use regex::Regex;
|
||||
|
||||
/// XLS WorkSheet data.
|
||||
pub struct WorkSheet {
|
||||
pub data: calamine::Range<calamine::Data>,
|
||||
pub merges: Vec<calamine::Dimensions>,
|
||||
}
|
||||
|
||||
impl Deref for WorkSheet {
|
||||
type Target = calamine::Range<calamine::Data>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.data
|
||||
}
|
||||
}
|
||||
|
||||
impl WorkSheet {
|
||||
/// Getting a line from the required cell.
|
||||
pub fn get_string_from_cell(&self, row: u32, col: u32) -> Option<String> {
|
||||
let cell_data = if let Some(data) = self.get((row as usize, col as usize)) {
|
||||
data.to_string()
|
||||
} else {
|
||||
return None;
|
||||
};
|
||||
|
||||
if cell_data.trim().is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
static NL_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[\n\r]+").unwrap());
|
||||
static SP_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\s+").unwrap());
|
||||
|
||||
let trimmed_data = SP_RE
|
||||
.replace_all(&NL_RE.replace_all(&cell_data, " "), " ")
|
||||
.trim()
|
||||
.to_string();
|
||||
|
||||
if trimmed_data.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(trimmed_data)
|
||||
}
|
||||
}
|
||||
|
||||
/// Obtaining the boundaries of the cell along its upper left coordinate.
|
||||
pub fn get_merge_from_start(&self, row: u32, column: u32) -> ((u32, u32), (u32, u32)) {
|
||||
match self
|
||||
.merges
|
||||
.iter()
|
||||
.find(|merge| merge.start.0 == row && merge.start.1 == column)
|
||||
{
|
||||
Some(merge) => (merge.start, (merge.end.0 + 1, merge.end.1 + 1)),
|
||||
None => ((row, column), (row + 1, column + 1)),
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user