mirror of
https://github.com/n08i40k/schedule-parser-rusted.git
synced 2025-12-06 09:47:50 +03:00
Compare commits
8 Commits
f121a04f1b
...
cdc89b5bcd
| Author | SHA1 | Date | |
|---|---|---|---|
|
cdc89b5bcd
|
|||
|
ad86f6cd64
|
|||
|
a3b4a501db
|
|||
|
df0e99a4d0
|
|||
|
a8cf8fb0f5
|
|||
|
7ed866138e
|
|||
|
7bac48f8fc
|
|||
|
191ec36fef
|
@@ -100,9 +100,9 @@ pub enum LessonType {
|
||||
|
||||
/// Защита курсового проекта.
|
||||
CourseProjectDefense,
|
||||
|
||||
|
||||
/// Практическое занятие.
|
||||
Practice
|
||||
Practice,
|
||||
}
|
||||
|
||||
#[derive(Clone, Hash, Debug, Serialize, Deserialize, ToSchema)]
|
||||
@@ -212,70 +212,6 @@ impl ScheduleSnapshot {
|
||||
}
|
||||
}
|
||||
|
||||
// #[derive(Clone, Debug, Display, Error, ToSchema)]
|
||||
// #[display("row {row}, column {column}")]
|
||||
// pub struct ErrorCellPos {
|
||||
// pub row: u32,
|
||||
// pub column: u32,
|
||||
// }
|
||||
//
|
||||
// #[derive(Clone, Debug, Display, Error, ToSchema)]
|
||||
// #[display("'{data}' at {pos}")]
|
||||
// pub struct ErrorCell {
|
||||
// pub pos: ErrorCellPos,
|
||||
// pub data: String,
|
||||
// }
|
||||
//
|
||||
// impl ErrorCell {
|
||||
// pub fn new(row: u32, column: u32, data: String) -> Self {
|
||||
// Self {
|
||||
// pos: ErrorCellPos { row, column },
|
||||
// data,
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// #[derive(Clone, Debug, Display, Error, ToSchema)]
|
||||
// pub enum ParseError {
|
||||
// /// Errors related to reading XLS file.
|
||||
// #[display("{_0:?}: Failed to read XLS file.")]
|
||||
// #[schema(value_type = String)]
|
||||
// BadXLS(Arc<calamine::XlsError>),
|
||||
//
|
||||
// /// Not a single sheet was found.
|
||||
// #[display("No work sheets found.")]
|
||||
// NoWorkSheets,
|
||||
//
|
||||
// /// There are no data on the boundaries of the sheet.
|
||||
// #[display("There is no data on work sheet boundaries.")]
|
||||
// UnknownWorkSheetRange,
|
||||
//
|
||||
// /// Failed to read the beginning and end of the lesson from the cell
|
||||
// #[display("Failed to read lesson start and end from {_0}.")]
|
||||
// LessonBoundaries(ErrorCell),
|
||||
//
|
||||
// /// Not found the beginning and the end corresponding to the lesson.
|
||||
// #[display("No start and end times matching the lesson (at {_0}) was found.")]
|
||||
// LessonTimeNotFound(ErrorCellPos),
|
||||
// }
|
||||
//
|
||||
// impl Serialize for ParseError {
|
||||
// fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
// where
|
||||
// S: Serializer,
|
||||
// {
|
||||
// match self {
|
||||
// ParseError::BadXLS(_) => serializer.serialize_str("BAD_XLS"),
|
||||
// ParseError::NoWorkSheets => serializer.serialize_str("NO_WORK_SHEETS"),
|
||||
// ParseError::UnknownWorkSheetRange => {
|
||||
// serializer.serialize_str("UNKNOWN_WORK_SHEET_RANGE")
|
||||
// }
|
||||
// ParseError::LessonBoundaries(_) => serializer.serialize_str("GLOBAL_TIME"),
|
||||
// ParseError::LessonTimeNotFound(_) => serializer.serialize_str("LESSON_TIME_NOT_FOUND"),
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
#[async_trait]
|
||||
pub trait ScheduleProvider
|
||||
where
|
||||
|
||||
@@ -1,21 +1,5 @@
|
||||
use derive_more::{Display, Error, From};
|
||||
use crate::parser::worksheet::CellPos;
|
||||
|
||||
#[derive(Clone, Debug, Display, Error)]
|
||||
#[display("'{data}' at {pos}")]
|
||||
pub struct ErrorCell {
|
||||
pub pos: CellPos,
|
||||
pub data: String,
|
||||
}
|
||||
|
||||
impl ErrorCell {
|
||||
pub fn new(row: u32, column: u32, data: &str) -> Self {
|
||||
Self {
|
||||
pos: CellPos { row, column },
|
||||
data: data.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
use derive_more::{Display, Error, From};
|
||||
|
||||
#[derive(Debug, Display, Error, From)]
|
||||
pub enum Error {
|
||||
@@ -28,11 +12,14 @@ pub enum Error {
|
||||
#[display("There is no data on work sheet boundaries.")]
|
||||
UnknownWorkSheetRange,
|
||||
|
||||
#[display("Failed to read lesson start and end from {_0}.")]
|
||||
NoLessonBoundaries(ErrorCell),
|
||||
#[display("Failed to read lesson start and end of lesson at {_0}.")]
|
||||
NoLessonBoundaries(CellPos),
|
||||
|
||||
#[display("No start and end times matching the lesson (at {_0}) was found.")]
|
||||
LessonTimeNotFound(CellPos),
|
||||
|
||||
#[display("Unknown lesson type `{type}` at {pos}")]
|
||||
UnknownLessonType { pos: CellPos, r#type: String },
|
||||
}
|
||||
|
||||
pub type Result<T> = core::result::Result<T, Error>;
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
pub use self::error::{Error, Result};
|
||||
use crate::or_continue;
|
||||
use crate::parser::error::ErrorCell;
|
||||
use crate::parser::worksheet::{CellPos, CellRange, WorkSheet};
|
||||
use crate::parser::LessonParseResult::{Lessons, Street};
|
||||
use base::LessonType::Break;
|
||||
@@ -217,7 +216,7 @@ fn parse_lesson(
|
||||
};
|
||||
|
||||
static OTHER_STREET_RE: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r"^[А-Я][а-я]+[,\s]\d+$").unwrap());
|
||||
LazyLock::new(|| Regex::new(r"^[А-Я][а-я]+[,\s]+д\.\s\d+$").unwrap());
|
||||
|
||||
if OTHER_STREET_RE.is_match(&cell_data) {
|
||||
return Ok(Street(cell_data));
|
||||
@@ -226,12 +225,17 @@ fn parse_lesson(
|
||||
cell_data
|
||||
};
|
||||
|
||||
let cell_range = worksheet.get_merge_from_start(row, group_column);
|
||||
let lesson_cell_range = worksheet.get_merge_from_start(row, group_column);
|
||||
|
||||
let (default_range, lesson_time) = {
|
||||
let end_time_arr = day_boundaries
|
||||
.iter()
|
||||
.filter(|time| time.range.end.row == cell_range.end.row)
|
||||
.filter(
|
||||
|BoundariesData {
|
||||
range: CellRange { end, .. },
|
||||
..
|
||||
}| { lesson_cell_range.end.row <= end.row },
|
||||
)
|
||||
.collect::<Vec<&BoundariesData>>();
|
||||
|
||||
let end_time = end_time_arr
|
||||
@@ -257,12 +261,12 @@ fn parse_lesson(
|
||||
name,
|
||||
mut subgroups,
|
||||
r#type: lesson_type,
|
||||
} = parse_name_and_subgroups(&name)?;
|
||||
} = parse_name_and_subgroups(&name, row, group_column)?;
|
||||
|
||||
{
|
||||
let cabinets: Vec<String> = parse_cabinets(
|
||||
worksheet,
|
||||
(cell_range.start.row, cell_range.end.row),
|
||||
(lesson_cell_range.start.row, lesson_cell_range.end.row),
|
||||
group_column + 1,
|
||||
);
|
||||
|
||||
@@ -364,7 +368,7 @@ struct ParsedLessonName {
|
||||
|
||||
//noinspection GrazieInspection
|
||||
/// Getting the "pure" name of the lesson and list of teachers from the text of the lesson cell.
|
||||
fn parse_name_and_subgroups(text: &str) -> Result<ParsedLessonName> {
|
||||
fn parse_name_and_subgroups(text: &str, row: u32, column: u32) -> Result<ParsedLessonName> {
|
||||
// Части названия пары:
|
||||
// 1. Само название.
|
||||
// 2. Список преподавателей и подгрупп.
|
||||
@@ -373,7 +377,7 @@ fn parse_name_and_subgroups(text: &str) -> Result<ParsedLessonName> {
|
||||
// Регулярное выражение для получения ФИО преподавателей и номеров подгрупп (aka. второй части).
|
||||
static NAME_RE: LazyLock<fancy_regex::Regex> = LazyLock::new(|| {
|
||||
fancy_regex::Regex::new(
|
||||
r"([А-Я][а-я]+(?:[\s.]*[А-Я]){1,2})(?=[^а-я])[.\s]*(?:\(?(\d)[\sа-я]*\)?)?",
|
||||
r"([А-Я][а-я]+(?:[\s.]*[А-Я]){1,2})(?=[^А-Яа-я])[.\s]*(?:\(?(\d)[\sа-я]*\)?)?",
|
||||
)
|
||||
.unwrap()
|
||||
});
|
||||
@@ -394,10 +398,10 @@ fn parse_name_and_subgroups(text: &str) -> Result<ParsedLessonName> {
|
||||
let mut lesson_name: Option<&str> = None;
|
||||
let mut extra: Option<&str> = None;
|
||||
|
||||
let mut shared_subgroup = false;
|
||||
let mut shared_subgroup = true;
|
||||
let mut subgroups: [Option<LessonSubGroup>; 2] = [None, None];
|
||||
|
||||
for capture in NAME_RE.captures_iter(&text) {
|
||||
for capture in NAME_RE.captures_iter(&text).take(2) {
|
||||
let capture = capture.unwrap();
|
||||
|
||||
if lesson_name.is_none() {
|
||||
@@ -438,17 +442,23 @@ fn parse_name_and_subgroups(text: &str) -> Result<ParsedLessonName> {
|
||||
|
||||
match subgroup_index {
|
||||
None => {
|
||||
subgroups[0] = subgroup;
|
||||
subgroups[1] = None;
|
||||
shared_subgroup = true;
|
||||
break;
|
||||
// we have only 2 matches max so more than 2 subgroups we cant have 100%
|
||||
*subgroups.iter_mut().find(|x| x.is_none()).unwrap() = subgroup;
|
||||
}
|
||||
Some(num) => {
|
||||
// bc we have indexed subgroup
|
||||
shared_subgroup = false;
|
||||
|
||||
// 1 - 1 = 0 | 2 - 1 = 1 | 3 - 1 = 2 (schedule index to array index)
|
||||
// 0 % 2 = 0 | 1 % 2 = 1 | 2 % 2 = 0 (clamp)
|
||||
let normalised = (num - 1) % 2;
|
||||
let subgroup_index = ((num - 1) % 2) as usize;
|
||||
|
||||
subgroups[normalised as usize] = subgroup;
|
||||
// if we have subgroup in that index (probably non-indexed, we change it index to free)
|
||||
if subgroups[subgroup_index].is_some() {
|
||||
subgroups.swap(0, 1);
|
||||
}
|
||||
|
||||
subgroups[subgroup_index] = subgroup;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -456,7 +466,7 @@ fn parse_name_and_subgroups(text: &str) -> Result<ParsedLessonName> {
|
||||
let subgroups = if lesson_name.is_none() {
|
||||
Vec::new()
|
||||
} else if shared_subgroup {
|
||||
Vec::from([subgroups[0].take()])
|
||||
Vec::from([subgroups.into_iter().next().unwrap()])
|
||||
} else {
|
||||
Vec::from(subgroups)
|
||||
};
|
||||
@@ -475,13 +485,19 @@ fn parse_name_and_subgroups(text: &str) -> Result<ParsedLessonName> {
|
||||
|
||||
if result.is_none() {
|
||||
#[cfg(not(debug_assertions))]
|
||||
sentry::capture_message(
|
||||
&format!("Не удалось угадать тип пары '{}'!", extra),
|
||||
sentry::Level::Warning,
|
||||
);
|
||||
sentry::capture_error(&Error::UnknownLessonType {
|
||||
r#type: extra.to_string(),
|
||||
pos: CellPos::new(row, column),
|
||||
});
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
log::warn!("Не удалось угадать тип пары '{}'!", extra);
|
||||
log::warn!(
|
||||
"{}",
|
||||
Error::UnknownLessonType {
|
||||
r#type: extra.to_string(),
|
||||
pos: CellPos::new(row, column),
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
result
|
||||
@@ -548,9 +564,8 @@ fn parse_day_boundaries(
|
||||
continue;
|
||||
};
|
||||
|
||||
let lesson_time = parse_lesson_boundaries_cell(&time_cell, date).ok_or(
|
||||
Error::NoLessonBoundaries(ErrorCell::new(row, column, &time_cell)),
|
||||
)?;
|
||||
let lesson_time = parse_lesson_boundaries_cell(&time_cell, date)
|
||||
.ok_or(Error::NoLessonBoundaries(CellPos::new(row, column)))?;
|
||||
|
||||
// type
|
||||
let lesson_type = if time_cell.contains("пара") {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use derive_more::Display;
|
||||
use regex::Regex;
|
||||
use std::fmt::{Display, Formatter};
|
||||
use std::ops::Deref;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
@@ -9,13 +9,35 @@ pub struct WorkSheet {
|
||||
pub merges: Vec<calamine::Dimensions>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Display, derive_more::Error)]
|
||||
#[display("row {row}, column {column}")]
|
||||
#[derive(Clone, Debug, derive_more::Error)]
|
||||
pub struct CellPos {
|
||||
pub row: u32,
|
||||
pub column: u32,
|
||||
}
|
||||
|
||||
fn format_column_index(index: u32) -> String {
|
||||
// https://stackoverflow.com/a/297214
|
||||
let quotient = index / 26;
|
||||
|
||||
let char = char::from((65 + (index % 26)) as u8);
|
||||
|
||||
if quotient > 0 {
|
||||
return format!("{}{}", format_column_index(quotient - 1), char);
|
||||
}
|
||||
|
||||
return char.to_string();
|
||||
}
|
||||
|
||||
impl Display for CellPos {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_fmt(format_args!(
|
||||
"column {}, row {}",
|
||||
format_column_index(self.column),
|
||||
self.row + 1,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct CellRange {
|
||||
pub start: CellPos,
|
||||
pub end: CellPos,
|
||||
|
||||
Reference in New Issue
Block a user