mirror of
https://github.com/n08i40k/schedule-parser-rusted.git
synced 2025-12-06 09:47:50 +03:00
Compare commits
8 Commits
f121a04f1b
...
cdc89b5bcd
| Author | SHA1 | Date | |
|---|---|---|---|
|
cdc89b5bcd
|
|||
|
ad86f6cd64
|
|||
|
a3b4a501db
|
|||
|
df0e99a4d0
|
|||
|
a8cf8fb0f5
|
|||
|
7ed866138e
|
|||
|
7bac48f8fc
|
|||
|
191ec36fef
|
@@ -100,9 +100,9 @@ pub enum LessonType {
|
|||||||
|
|
||||||
/// Защита курсового проекта.
|
/// Защита курсового проекта.
|
||||||
CourseProjectDefense,
|
CourseProjectDefense,
|
||||||
|
|
||||||
/// Практическое занятие.
|
/// Практическое занятие.
|
||||||
Practice
|
Practice,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Hash, Debug, Serialize, Deserialize, ToSchema)]
|
#[derive(Clone, Hash, Debug, Serialize, Deserialize, ToSchema)]
|
||||||
@@ -212,70 +212,6 @@ impl ScheduleSnapshot {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// #[derive(Clone, Debug, Display, Error, ToSchema)]
|
|
||||||
// #[display("row {row}, column {column}")]
|
|
||||||
// pub struct ErrorCellPos {
|
|
||||||
// pub row: u32,
|
|
||||||
// pub column: u32,
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// #[derive(Clone, Debug, Display, Error, ToSchema)]
|
|
||||||
// #[display("'{data}' at {pos}")]
|
|
||||||
// pub struct ErrorCell {
|
|
||||||
// pub pos: ErrorCellPos,
|
|
||||||
// pub data: String,
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// impl ErrorCell {
|
|
||||||
// pub fn new(row: u32, column: u32, data: String) -> Self {
|
|
||||||
// Self {
|
|
||||||
// pos: ErrorCellPos { row, column },
|
|
||||||
// data,
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// #[derive(Clone, Debug, Display, Error, ToSchema)]
|
|
||||||
// pub enum ParseError {
|
|
||||||
// /// Errors related to reading XLS file.
|
|
||||||
// #[display("{_0:?}: Failed to read XLS file.")]
|
|
||||||
// #[schema(value_type = String)]
|
|
||||||
// BadXLS(Arc<calamine::XlsError>),
|
|
||||||
//
|
|
||||||
// /// Not a single sheet was found.
|
|
||||||
// #[display("No work sheets found.")]
|
|
||||||
// NoWorkSheets,
|
|
||||||
//
|
|
||||||
// /// There are no data on the boundaries of the sheet.
|
|
||||||
// #[display("There is no data on work sheet boundaries.")]
|
|
||||||
// UnknownWorkSheetRange,
|
|
||||||
//
|
|
||||||
// /// Failed to read the beginning and end of the lesson from the cell
|
|
||||||
// #[display("Failed to read lesson start and end from {_0}.")]
|
|
||||||
// LessonBoundaries(ErrorCell),
|
|
||||||
//
|
|
||||||
// /// Not found the beginning and the end corresponding to the lesson.
|
|
||||||
// #[display("No start and end times matching the lesson (at {_0}) was found.")]
|
|
||||||
// LessonTimeNotFound(ErrorCellPos),
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// impl Serialize for ParseError {
|
|
||||||
// fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
|
||||||
// where
|
|
||||||
// S: Serializer,
|
|
||||||
// {
|
|
||||||
// match self {
|
|
||||||
// ParseError::BadXLS(_) => serializer.serialize_str("BAD_XLS"),
|
|
||||||
// ParseError::NoWorkSheets => serializer.serialize_str("NO_WORK_SHEETS"),
|
|
||||||
// ParseError::UnknownWorkSheetRange => {
|
|
||||||
// serializer.serialize_str("UNKNOWN_WORK_SHEET_RANGE")
|
|
||||||
// }
|
|
||||||
// ParseError::LessonBoundaries(_) => serializer.serialize_str("GLOBAL_TIME"),
|
|
||||||
// ParseError::LessonTimeNotFound(_) => serializer.serialize_str("LESSON_TIME_NOT_FOUND"),
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
pub trait ScheduleProvider
|
pub trait ScheduleProvider
|
||||||
where
|
where
|
||||||
|
|||||||
@@ -1,21 +1,5 @@
|
|||||||
use derive_more::{Display, Error, From};
|
|
||||||
use crate::parser::worksheet::CellPos;
|
use crate::parser::worksheet::CellPos;
|
||||||
|
use derive_more::{Display, Error, From};
|
||||||
#[derive(Clone, Debug, Display, Error)]
|
|
||||||
#[display("'{data}' at {pos}")]
|
|
||||||
pub struct ErrorCell {
|
|
||||||
pub pos: CellPos,
|
|
||||||
pub data: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ErrorCell {
|
|
||||||
pub fn new(row: u32, column: u32, data: &str) -> Self {
|
|
||||||
Self {
|
|
||||||
pos: CellPos { row, column },
|
|
||||||
data: data.to_string(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Display, Error, From)]
|
#[derive(Debug, Display, Error, From)]
|
||||||
pub enum Error {
|
pub enum Error {
|
||||||
@@ -28,11 +12,14 @@ pub enum Error {
|
|||||||
#[display("There is no data on work sheet boundaries.")]
|
#[display("There is no data on work sheet boundaries.")]
|
||||||
UnknownWorkSheetRange,
|
UnknownWorkSheetRange,
|
||||||
|
|
||||||
#[display("Failed to read lesson start and end from {_0}.")]
|
#[display("Failed to read lesson start and end of lesson at {_0}.")]
|
||||||
NoLessonBoundaries(ErrorCell),
|
NoLessonBoundaries(CellPos),
|
||||||
|
|
||||||
#[display("No start and end times matching the lesson (at {_0}) was found.")]
|
#[display("No start and end times matching the lesson (at {_0}) was found.")]
|
||||||
LessonTimeNotFound(CellPos),
|
LessonTimeNotFound(CellPos),
|
||||||
|
|
||||||
|
#[display("Unknown lesson type `{type}` at {pos}")]
|
||||||
|
UnknownLessonType { pos: CellPos, r#type: String },
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type Result<T> = core::result::Result<T, Error>;
|
pub type Result<T> = core::result::Result<T, Error>;
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
pub use self::error::{Error, Result};
|
pub use self::error::{Error, Result};
|
||||||
use crate::or_continue;
|
use crate::or_continue;
|
||||||
use crate::parser::error::ErrorCell;
|
|
||||||
use crate::parser::worksheet::{CellPos, CellRange, WorkSheet};
|
use crate::parser::worksheet::{CellPos, CellRange, WorkSheet};
|
||||||
use crate::parser::LessonParseResult::{Lessons, Street};
|
use crate::parser::LessonParseResult::{Lessons, Street};
|
||||||
use base::LessonType::Break;
|
use base::LessonType::Break;
|
||||||
@@ -217,7 +216,7 @@ fn parse_lesson(
|
|||||||
};
|
};
|
||||||
|
|
||||||
static OTHER_STREET_RE: LazyLock<Regex> =
|
static OTHER_STREET_RE: LazyLock<Regex> =
|
||||||
LazyLock::new(|| Regex::new(r"^[А-Я][а-я]+[,\s]\d+$").unwrap());
|
LazyLock::new(|| Regex::new(r"^[А-Я][а-я]+[,\s]+д\.\s\d+$").unwrap());
|
||||||
|
|
||||||
if OTHER_STREET_RE.is_match(&cell_data) {
|
if OTHER_STREET_RE.is_match(&cell_data) {
|
||||||
return Ok(Street(cell_data));
|
return Ok(Street(cell_data));
|
||||||
@@ -226,12 +225,17 @@ fn parse_lesson(
|
|||||||
cell_data
|
cell_data
|
||||||
};
|
};
|
||||||
|
|
||||||
let cell_range = worksheet.get_merge_from_start(row, group_column);
|
let lesson_cell_range = worksheet.get_merge_from_start(row, group_column);
|
||||||
|
|
||||||
let (default_range, lesson_time) = {
|
let (default_range, lesson_time) = {
|
||||||
let end_time_arr = day_boundaries
|
let end_time_arr = day_boundaries
|
||||||
.iter()
|
.iter()
|
||||||
.filter(|time| time.range.end.row == cell_range.end.row)
|
.filter(
|
||||||
|
|BoundariesData {
|
||||||
|
range: CellRange { end, .. },
|
||||||
|
..
|
||||||
|
}| { lesson_cell_range.end.row <= end.row },
|
||||||
|
)
|
||||||
.collect::<Vec<&BoundariesData>>();
|
.collect::<Vec<&BoundariesData>>();
|
||||||
|
|
||||||
let end_time = end_time_arr
|
let end_time = end_time_arr
|
||||||
@@ -257,12 +261,12 @@ fn parse_lesson(
|
|||||||
name,
|
name,
|
||||||
mut subgroups,
|
mut subgroups,
|
||||||
r#type: lesson_type,
|
r#type: lesson_type,
|
||||||
} = parse_name_and_subgroups(&name)?;
|
} = parse_name_and_subgroups(&name, row, group_column)?;
|
||||||
|
|
||||||
{
|
{
|
||||||
let cabinets: Vec<String> = parse_cabinets(
|
let cabinets: Vec<String> = parse_cabinets(
|
||||||
worksheet,
|
worksheet,
|
||||||
(cell_range.start.row, cell_range.end.row),
|
(lesson_cell_range.start.row, lesson_cell_range.end.row),
|
||||||
group_column + 1,
|
group_column + 1,
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -364,7 +368,7 @@ struct ParsedLessonName {
|
|||||||
|
|
||||||
//noinspection GrazieInspection
|
//noinspection GrazieInspection
|
||||||
/// Getting the "pure" name of the lesson and list of teachers from the text of the lesson cell.
|
/// Getting the "pure" name of the lesson and list of teachers from the text of the lesson cell.
|
||||||
fn parse_name_and_subgroups(text: &str) -> Result<ParsedLessonName> {
|
fn parse_name_and_subgroups(text: &str, row: u32, column: u32) -> Result<ParsedLessonName> {
|
||||||
// Части названия пары:
|
// Части названия пары:
|
||||||
// 1. Само название.
|
// 1. Само название.
|
||||||
// 2. Список преподавателей и подгрупп.
|
// 2. Список преподавателей и подгрупп.
|
||||||
@@ -373,7 +377,7 @@ fn parse_name_and_subgroups(text: &str) -> Result<ParsedLessonName> {
|
|||||||
// Регулярное выражение для получения ФИО преподавателей и номеров подгрупп (aka. второй части).
|
// Регулярное выражение для получения ФИО преподавателей и номеров подгрупп (aka. второй части).
|
||||||
static NAME_RE: LazyLock<fancy_regex::Regex> = LazyLock::new(|| {
|
static NAME_RE: LazyLock<fancy_regex::Regex> = LazyLock::new(|| {
|
||||||
fancy_regex::Regex::new(
|
fancy_regex::Regex::new(
|
||||||
r"([А-Я][а-я]+(?:[\s.]*[А-Я]){1,2})(?=[^а-я])[.\s]*(?:\(?(\d)[\sа-я]*\)?)?",
|
r"([А-Я][а-я]+(?:[\s.]*[А-Я]){1,2})(?=[^А-Яа-я])[.\s]*(?:\(?(\d)[\sа-я]*\)?)?",
|
||||||
)
|
)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
});
|
});
|
||||||
@@ -394,10 +398,10 @@ fn parse_name_and_subgroups(text: &str) -> Result<ParsedLessonName> {
|
|||||||
let mut lesson_name: Option<&str> = None;
|
let mut lesson_name: Option<&str> = None;
|
||||||
let mut extra: Option<&str> = None;
|
let mut extra: Option<&str> = None;
|
||||||
|
|
||||||
let mut shared_subgroup = false;
|
let mut shared_subgroup = true;
|
||||||
let mut subgroups: [Option<LessonSubGroup>; 2] = [None, None];
|
let mut subgroups: [Option<LessonSubGroup>; 2] = [None, None];
|
||||||
|
|
||||||
for capture in NAME_RE.captures_iter(&text) {
|
for capture in NAME_RE.captures_iter(&text).take(2) {
|
||||||
let capture = capture.unwrap();
|
let capture = capture.unwrap();
|
||||||
|
|
||||||
if lesson_name.is_none() {
|
if lesson_name.is_none() {
|
||||||
@@ -438,17 +442,23 @@ fn parse_name_and_subgroups(text: &str) -> Result<ParsedLessonName> {
|
|||||||
|
|
||||||
match subgroup_index {
|
match subgroup_index {
|
||||||
None => {
|
None => {
|
||||||
subgroups[0] = subgroup;
|
// we have only 2 matches max so more than 2 subgroups we cant have 100%
|
||||||
subgroups[1] = None;
|
*subgroups.iter_mut().find(|x| x.is_none()).unwrap() = subgroup;
|
||||||
shared_subgroup = true;
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
Some(num) => {
|
Some(num) => {
|
||||||
|
// bc we have indexed subgroup
|
||||||
|
shared_subgroup = false;
|
||||||
|
|
||||||
// 1 - 1 = 0 | 2 - 1 = 1 | 3 - 1 = 2 (schedule index to array index)
|
// 1 - 1 = 0 | 2 - 1 = 1 | 3 - 1 = 2 (schedule index to array index)
|
||||||
// 0 % 2 = 0 | 1 % 2 = 1 | 2 % 2 = 0 (clamp)
|
// 0 % 2 = 0 | 1 % 2 = 1 | 2 % 2 = 0 (clamp)
|
||||||
let normalised = (num - 1) % 2;
|
let subgroup_index = ((num - 1) % 2) as usize;
|
||||||
|
|
||||||
subgroups[normalised as usize] = subgroup;
|
// if we have subgroup in that index (probably non-indexed, we change it index to free)
|
||||||
|
if subgroups[subgroup_index].is_some() {
|
||||||
|
subgroups.swap(0, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
subgroups[subgroup_index] = subgroup;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -456,7 +466,7 @@ fn parse_name_and_subgroups(text: &str) -> Result<ParsedLessonName> {
|
|||||||
let subgroups = if lesson_name.is_none() {
|
let subgroups = if lesson_name.is_none() {
|
||||||
Vec::new()
|
Vec::new()
|
||||||
} else if shared_subgroup {
|
} else if shared_subgroup {
|
||||||
Vec::from([subgroups[0].take()])
|
Vec::from([subgroups.into_iter().next().unwrap()])
|
||||||
} else {
|
} else {
|
||||||
Vec::from(subgroups)
|
Vec::from(subgroups)
|
||||||
};
|
};
|
||||||
@@ -475,13 +485,19 @@ fn parse_name_and_subgroups(text: &str) -> Result<ParsedLessonName> {
|
|||||||
|
|
||||||
if result.is_none() {
|
if result.is_none() {
|
||||||
#[cfg(not(debug_assertions))]
|
#[cfg(not(debug_assertions))]
|
||||||
sentry::capture_message(
|
sentry::capture_error(&Error::UnknownLessonType {
|
||||||
&format!("Не удалось угадать тип пары '{}'!", extra),
|
r#type: extra.to_string(),
|
||||||
sentry::Level::Warning,
|
pos: CellPos::new(row, column),
|
||||||
);
|
});
|
||||||
|
|
||||||
#[cfg(debug_assertions)]
|
#[cfg(debug_assertions)]
|
||||||
log::warn!("Не удалось угадать тип пары '{}'!", extra);
|
log::warn!(
|
||||||
|
"{}",
|
||||||
|
Error::UnknownLessonType {
|
||||||
|
r#type: extra.to_string(),
|
||||||
|
pos: CellPos::new(row, column),
|
||||||
|
}
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
result
|
result
|
||||||
@@ -548,9 +564,8 @@ fn parse_day_boundaries(
|
|||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
|
|
||||||
let lesson_time = parse_lesson_boundaries_cell(&time_cell, date).ok_or(
|
let lesson_time = parse_lesson_boundaries_cell(&time_cell, date)
|
||||||
Error::NoLessonBoundaries(ErrorCell::new(row, column, &time_cell)),
|
.ok_or(Error::NoLessonBoundaries(CellPos::new(row, column)))?;
|
||||||
)?;
|
|
||||||
|
|
||||||
// type
|
// type
|
||||||
let lesson_type = if time_cell.contains("пара") {
|
let lesson_type = if time_cell.contains("пара") {
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
use derive_more::Display;
|
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
|
use std::fmt::{Display, Formatter};
|
||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
use std::sync::LazyLock;
|
use std::sync::LazyLock;
|
||||||
|
|
||||||
@@ -9,13 +9,35 @@ pub struct WorkSheet {
|
|||||||
pub merges: Vec<calamine::Dimensions>,
|
pub merges: Vec<calamine::Dimensions>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Display, derive_more::Error)]
|
#[derive(Clone, Debug, derive_more::Error)]
|
||||||
#[display("row {row}, column {column}")]
|
|
||||||
pub struct CellPos {
|
pub struct CellPos {
|
||||||
pub row: u32,
|
pub row: u32,
|
||||||
pub column: u32,
|
pub column: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn format_column_index(index: u32) -> String {
|
||||||
|
// https://stackoverflow.com/a/297214
|
||||||
|
let quotient = index / 26;
|
||||||
|
|
||||||
|
let char = char::from((65 + (index % 26)) as u8);
|
||||||
|
|
||||||
|
if quotient > 0 {
|
||||||
|
return format!("{}{}", format_column_index(quotient - 1), char);
|
||||||
|
}
|
||||||
|
|
||||||
|
return char.to_string();
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for CellPos {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||||
|
f.write_fmt(format_args!(
|
||||||
|
"column {}, row {}",
|
||||||
|
format_column_index(self.column),
|
||||||
|
self.row + 1,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct CellRange {
|
pub struct CellRange {
|
||||||
pub start: CellPos,
|
pub start: CellPos,
|
||||||
pub end: CellPos,
|
pub end: CellPos,
|
||||||
|
|||||||
Reference in New Issue
Block a user