mirror of
https://github.com/n08i40k/schedule-parser-rusted.git
synced 2025-12-06 17:57:47 +03:00
feat(schedule)!: move schedule parser, downloader, and updater to external library
This can be used to support more schedule formats in the future.
This commit is contained in:
85
providers/provider-engels-polytechnic/src/lib.rs
Normal file
85
providers/provider-engels-polytechnic/src/lib.rs
Normal file
@@ -0,0 +1,85 @@
|
||||
use crate::updater::Updater;
|
||||
use async_trait::async_trait;
|
||||
use base::{ScheduleProvider, ScheduleSnapshot};
|
||||
use std::ops::DerefMut;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio::time::interval;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
pub use crate::updater::UpdateSource;
|
||||
|
||||
mod parser;
|
||||
mod updater;
|
||||
mod xls_downloader;
|
||||
|
||||
#[cfg(feature = "test")]
|
||||
pub mod test_utils {
|
||||
pub use crate::parser::test_utils::test_result;
|
||||
}
|
||||
|
||||
pub struct EngelsPolytechnicProvider {
|
||||
updater: Updater,
|
||||
snapshot: Arc<ScheduleSnapshot>,
|
||||
}
|
||||
|
||||
impl EngelsPolytechnicProvider {
|
||||
pub async fn new(
|
||||
update_source: UpdateSource,
|
||||
) -> Result<Arc<dyn ScheduleProvider>, crate::updater::error::Error> {
|
||||
let (updater, snapshot) = Updater::new(update_source).await?;
|
||||
|
||||
Ok(Arc::new(Wrapper {
|
||||
inner: RwLock::new(Self {
|
||||
updater,
|
||||
snapshot: Arc::new(snapshot),
|
||||
}),
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Wrapper {
|
||||
inner: RwLock<EngelsPolytechnicProvider>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ScheduleProvider for Wrapper {
|
||||
async fn start_auto_update_task(
|
||||
&self,
|
||||
cancellation_token: CancellationToken,
|
||||
) -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> {
|
||||
let mut ticker = interval(Duration::from_secs(60 * 30));
|
||||
ticker.tick().await; // bc we already have the latest schedule, when instantiating provider
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
_ = ticker.tick() => {
|
||||
let mut lock = self.inner.write().await;
|
||||
let this= lock.deref_mut();
|
||||
|
||||
log::info!("Updating schedule...");
|
||||
|
||||
match this.updater.update(&mut this.snapshot).await {
|
||||
Ok(snapshot) => {
|
||||
this.snapshot = Arc::new(snapshot);
|
||||
},
|
||||
|
||||
Err(err) => {
|
||||
cancellation_token.cancel();
|
||||
return Err(err.into());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_ = cancellation_token.cancelled() => {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_schedule(&self) -> Arc<ScheduleSnapshot> {
|
||||
self.inner.read().await.snapshot.clone()
|
||||
}
|
||||
}
|
||||
21
providers/provider-engels-polytechnic/src/parser/macros.rs
Normal file
21
providers/provider-engels-polytechnic/src/parser/macros.rs
Normal file
@@ -0,0 +1,21 @@
|
||||
#[macro_export]
|
||||
macro_rules! or_continue {
|
||||
( $e:expr ) => {{
|
||||
if let Some(x) = $e {
|
||||
x
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! or_break {
|
||||
( $e:expr ) => {{
|
||||
if let Some(x) = $e {
|
||||
x
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}};
|
||||
}
|
||||
912
providers/provider-engels-polytechnic/src/parser/mod.rs
Normal file
912
providers/provider-engels-polytechnic/src/parser/mod.rs
Normal file
@@ -0,0 +1,912 @@
|
||||
use crate::or_continue;
|
||||
use crate::parser::error::{ErrorCell, ErrorCellPos};
|
||||
use crate::parser::worksheet::WorkSheet;
|
||||
use crate::parser::LessonParseResult::{Lessons, Street};
|
||||
use base::LessonType::Break;
|
||||
use base::{
|
||||
Day, Lesson, LessonBoundaries, LessonSubGroup, LessonType, ParsedSchedule, ScheduleEntry,
|
||||
};
|
||||
use calamine::{open_workbook_from_rs, Reader, Xls};
|
||||
use chrono::{DateTime, Duration, NaiveDate, NaiveTime, Utc};
|
||||
use regex::Regex;
|
||||
use std::collections::HashMap;
|
||||
use std::io::Cursor;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
mod macros;
|
||||
mod worksheet;
|
||||
|
||||
pub mod error {
|
||||
use derive_more::{Display, Error};
|
||||
use serde::{Serialize, Serializer};
|
||||
use std::sync::Arc;
|
||||
use utoipa::ToSchema;
|
||||
|
||||
#[derive(Clone, Debug, Display, Error, ToSchema)]
|
||||
#[display("row {row}, column {column}")]
|
||||
pub struct ErrorCellPos {
|
||||
pub row: u32,
|
||||
pub column: u32,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Display, Error, ToSchema)]
|
||||
#[display("'{data}' at {pos}")]
|
||||
pub struct ErrorCell {
|
||||
pub pos: ErrorCellPos,
|
||||
pub data: String,
|
||||
}
|
||||
|
||||
impl ErrorCell {
|
||||
pub fn new(row: u32, column: u32, data: String) -> Self {
|
||||
Self {
|
||||
pos: ErrorCellPos { row, column },
|
||||
data,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Display, Error, ToSchema)]
|
||||
pub enum Error {
|
||||
/// Errors related to reading XLS file.
|
||||
#[display("{_0:?}: Failed to read XLS file.")]
|
||||
#[schema(value_type = String)]
|
||||
BadXLS(Arc<calamine::XlsError>),
|
||||
|
||||
/// Not a single sheet was found.
|
||||
#[display("No work sheets found.")]
|
||||
NoWorkSheets,
|
||||
|
||||
/// There are no data on the boundaries of the sheet.
|
||||
#[display("There is no data on work sheet boundaries.")]
|
||||
UnknownWorkSheetRange,
|
||||
|
||||
/// Failed to read the beginning and end of the lesson from the cell
|
||||
#[display("Failed to read lesson start and end from {_0}.")]
|
||||
LessonBoundaries(ErrorCell),
|
||||
|
||||
/// Not found the beginning and the end corresponding to the lesson.
|
||||
#[display("No start and end times matching the lesson (at {_0}) was found.")]
|
||||
LessonTimeNotFound(ErrorCellPos),
|
||||
}
|
||||
|
||||
impl Serialize for Error {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
match self {
|
||||
Error::BadXLS(_) => serializer.serialize_str("BAD_XLS"),
|
||||
Error::NoWorkSheets => serializer.serialize_str("NO_WORK_SHEETS"),
|
||||
Error::UnknownWorkSheetRange => {
|
||||
serializer.serialize_str("UNKNOWN_WORK_SHEET_RANGE")
|
||||
}
|
||||
Error::LessonBoundaries(_) => serializer.serialize_str("GLOBAL_TIME"),
|
||||
Error::LessonTimeNotFound(_) => serializer.serialize_str("LESSON_TIME_NOT_FOUND"),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Data cell storing the group name.
|
||||
pub struct GroupCellInfo {
|
||||
/// Column index.
|
||||
pub column: u32,
|
||||
|
||||
/// Text in the cell.
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
/// Data cell storing the line.
|
||||
pub struct DayCellInfo {
|
||||
/// Line index.
|
||||
pub row: u32,
|
||||
|
||||
/// Column index.
|
||||
pub column: u32,
|
||||
|
||||
/// Day name.
|
||||
pub name: String,
|
||||
|
||||
/// Date of the day.
|
||||
pub date: DateTime<Utc>,
|
||||
}
|
||||
|
||||
/// Data on the time of lessons from the second column of the schedule.
|
||||
pub struct BoundariesCellInfo {
|
||||
/// Temporary segment of the lesson.
|
||||
pub time_range: LessonBoundaries,
|
||||
|
||||
/// Type of lesson.
|
||||
pub lesson_type: LessonType,
|
||||
|
||||
/// The lesson index.
|
||||
pub default_index: Option<u32>,
|
||||
|
||||
/// The frame of the cell.
|
||||
pub xls_range: ((u32, u32), (u32, u32)),
|
||||
}
|
||||
/// Obtaining a "skeleton" schedule from the working sheet.
|
||||
fn parse_skeleton(
|
||||
worksheet: &WorkSheet,
|
||||
) -> Result<(Vec<DayCellInfo>, Vec<GroupCellInfo>), crate::parser::error::Error> {
|
||||
let mut groups: Vec<GroupCellInfo> = Vec::new();
|
||||
let mut days: Vec<(u32, String, Option<DateTime<Utc>>)> = Vec::new();
|
||||
|
||||
let worksheet_start = worksheet
|
||||
.start()
|
||||
.ok_or(error::Error::UnknownWorkSheetRange)?;
|
||||
let worksheet_end = worksheet.end().ok_or(error::Error::UnknownWorkSheetRange)?;
|
||||
|
||||
let mut row = worksheet_start.0;
|
||||
|
||||
while row < worksheet_end.0 {
|
||||
row += 1;
|
||||
|
||||
let day_full_name = or_continue!(worksheet.get_string_from_cell(row, 0));
|
||||
|
||||
// parse groups row when days column will found
|
||||
if groups.is_empty() {
|
||||
// переход на предыдущую строку
|
||||
row -= 1;
|
||||
|
||||
for column in (worksheet_start.1 + 2)..=worksheet_end.1 {
|
||||
groups.push(GroupCellInfo {
|
||||
column,
|
||||
name: or_continue!(worksheet.get_string_from_cell(row, column))
|
||||
.replace(" ", ""),
|
||||
});
|
||||
}
|
||||
|
||||
// возврат на текущую строку
|
||||
row += 1;
|
||||
}
|
||||
|
||||
let (day_name, day_date) = {
|
||||
let space_index = match day_full_name.find(' ') {
|
||||
Some(index) => {
|
||||
if index < 10 {
|
||||
break;
|
||||
} else {
|
||||
index
|
||||
}
|
||||
}
|
||||
None => break,
|
||||
};
|
||||
|
||||
let name = day_full_name[..space_index].to_string();
|
||||
|
||||
let date_slice = &day_full_name[space_index + 1..];
|
||||
let date = NaiveDate::parse_from_str(date_slice, "%d.%m.%Y")
|
||||
.map(|date| date.and_time(NaiveTime::default()).and_utc())
|
||||
.ok();
|
||||
|
||||
(name, date)
|
||||
};
|
||||
|
||||
days.push((row, day_name, day_date));
|
||||
}
|
||||
|
||||
// fix unparsable day dates
|
||||
let days_max = days.len().min(5);
|
||||
|
||||
for i in 0..days_max {
|
||||
if days[i].2.is_none() && days[i + 1].2.is_some() {
|
||||
days[i].2 = Some(days[i + 1].2.unwrap() - Duration::days(1));
|
||||
}
|
||||
}
|
||||
|
||||
for i in 0..days_max {
|
||||
let i = days_max - i;
|
||||
|
||||
if days[i - 1].2.is_none() && days[i].2.is_some() {
|
||||
days[i - 1].2 = Some(days[i].2.unwrap() - Duration::days(1));
|
||||
}
|
||||
}
|
||||
|
||||
let days = days
|
||||
.into_iter()
|
||||
.map(|day| DayCellInfo {
|
||||
row: day.0,
|
||||
column: 0,
|
||||
name: day.1,
|
||||
date: day.2.unwrap(),
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok((days, groups))
|
||||
}
|
||||
|
||||
/// The result of obtaining a lesson from the cell.
|
||||
enum LessonParseResult {
|
||||
/// List of lessons long from one to two.
|
||||
///
|
||||
/// The number of lessons will be equal to one if the couple is the first in the day,
|
||||
/// otherwise the list from the change template and the lesson itself will be returned.
|
||||
Lessons(Vec<Lesson>),
|
||||
|
||||
/// Street on which the Polytechnic Corps is located.
|
||||
Street(String),
|
||||
}
|
||||
|
||||
// noinspection GrazieInspection
|
||||
/// Obtaining a non-standard type of lesson by name.
|
||||
fn guess_lesson_type(text: &String) -> Option<LessonType> {
|
||||
static MAP: LazyLock<HashMap<&str, LessonType>> = LazyLock::new(|| {
|
||||
HashMap::from([
|
||||
("консультация", LessonType::Consultation),
|
||||
("самостоятельная работа", LessonType::IndependentWork),
|
||||
("зачет", LessonType::Exam),
|
||||
("зачет с оценкой", LessonType::ExamWithGrade),
|
||||
("экзамен", LessonType::ExamDefault),
|
||||
("курсовой проект", LessonType::CourseProject),
|
||||
("защита курсового проекта", LessonType::CourseProjectDefense),
|
||||
])
|
||||
});
|
||||
|
||||
let name_lower = text.to_lowercase();
|
||||
|
||||
match MAP
|
||||
.iter()
|
||||
.map(|(text, lesson_type)| (lesson_type, strsim::levenshtein(text, &*name_lower)))
|
||||
.filter(|x| x.1 <= 4)
|
||||
.min_by_key(|(_, score)| *score)
|
||||
{
|
||||
None => None,
|
||||
Some(v) => Some(v.0.clone()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Getting a pair or street from a cell.
|
||||
fn parse_lesson(
|
||||
worksheet: &WorkSheet,
|
||||
day: &Day,
|
||||
day_boundaries: &Vec<BoundariesCellInfo>,
|
||||
lesson_boundaries: &BoundariesCellInfo,
|
||||
group_column: u32,
|
||||
) -> Result<LessonParseResult, crate::parser::error::Error> {
|
||||
let row = lesson_boundaries.xls_range.0.0;
|
||||
|
||||
let name = {
|
||||
let cell_data = match worksheet.get_string_from_cell(row, group_column) {
|
||||
Some(x) => x,
|
||||
None => return Ok(Lessons(Vec::new())),
|
||||
};
|
||||
|
||||
static OTHER_STREET_RE: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r"^[А-Я][а-я]+[,\s]\d+$").unwrap());
|
||||
|
||||
if OTHER_STREET_RE.is_match(&cell_data) {
|
||||
return Ok(Street(cell_data));
|
||||
}
|
||||
|
||||
cell_data
|
||||
};
|
||||
|
||||
let cell_range = worksheet.get_merge_from_start(row, group_column);
|
||||
|
||||
let (default_range, lesson_time) = {
|
||||
let end_time_arr = day_boundaries
|
||||
.iter()
|
||||
.filter(|time| time.xls_range.1.0 == cell_range.1.0)
|
||||
.collect::<Vec<&BoundariesCellInfo>>();
|
||||
|
||||
let end_time = end_time_arr
|
||||
.first()
|
||||
.ok_or(error::Error::LessonTimeNotFound(ErrorCellPos {
|
||||
row,
|
||||
column: group_column,
|
||||
}))?;
|
||||
|
||||
let range: Option<[u8; 2]> = if lesson_boundaries.default_index != None {
|
||||
let default = lesson_boundaries.default_index.unwrap() as u8;
|
||||
Some([default, end_time.default_index.unwrap() as u8])
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let time = LessonBoundaries {
|
||||
start: lesson_boundaries.time_range.start,
|
||||
end: end_time.time_range.end,
|
||||
};
|
||||
|
||||
Ok((range, time))
|
||||
}?;
|
||||
|
||||
let (name, mut subgroups, lesson_type) = parse_name_and_subgroups(&name)?;
|
||||
|
||||
{
|
||||
let cabinets: Vec<String> = parse_cabinets(
|
||||
worksheet,
|
||||
(cell_range.0.0, cell_range.1.0),
|
||||
group_column + 1,
|
||||
);
|
||||
|
||||
let cab_count = cabinets.len();
|
||||
|
||||
if cab_count == 1 {
|
||||
// Назначаем этот кабинет всем подгруппам
|
||||
let cab = Some(cabinets.get(0).unwrap().clone());
|
||||
|
||||
for subgroup in &mut subgroups {
|
||||
if let Some(subgroup) = subgroup {
|
||||
subgroup.cabinet = cab.clone()
|
||||
}
|
||||
}
|
||||
} else if cab_count == 2 {
|
||||
while subgroups.len() < cab_count {
|
||||
subgroups.push(subgroups.last().unwrap_or(&None).clone());
|
||||
}
|
||||
|
||||
for i in 0..cab_count {
|
||||
let subgroup = subgroups.get_mut(i).unwrap();
|
||||
let cabinet = Some(cabinets.get(i).unwrap().clone());
|
||||
|
||||
match subgroup {
|
||||
None => {
|
||||
let _ = subgroup.insert(LessonSubGroup {
|
||||
teacher: None,
|
||||
cabinet,
|
||||
});
|
||||
}
|
||||
Some(subgroup) => {
|
||||
subgroup.cabinet = cabinet;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let lesson = Lesson {
|
||||
lesson_type: lesson_type.unwrap_or(lesson_boundaries.lesson_type.clone()),
|
||||
range: default_range,
|
||||
name: Some(name),
|
||||
time: lesson_time,
|
||||
subgroups: if subgroups.len() == 2
|
||||
&& subgroups.get(0).unwrap().is_none()
|
||||
&& subgroups.get(1).unwrap().is_none()
|
||||
{
|
||||
None
|
||||
} else {
|
||||
Some(subgroups)
|
||||
},
|
||||
group: None,
|
||||
};
|
||||
|
||||
let prev_lesson = if day.lessons.is_empty() {
|
||||
return Ok(Lessons(Vec::from([lesson])));
|
||||
} else {
|
||||
&day.lessons[day.lessons.len() - 1]
|
||||
};
|
||||
|
||||
Ok(Lessons(Vec::from([
|
||||
Lesson {
|
||||
lesson_type: Break,
|
||||
range: None,
|
||||
name: None,
|
||||
time: LessonBoundaries {
|
||||
start: prev_lesson.time.end,
|
||||
end: lesson.time.start,
|
||||
},
|
||||
subgroups: Some(Vec::new()),
|
||||
group: None,
|
||||
},
|
||||
lesson,
|
||||
])))
|
||||
}
|
||||
|
||||
/// Obtaining a list of cabinets to the right of the lesson cell.
|
||||
fn parse_cabinets(worksheet: &WorkSheet, row_range: (u32, u32), column: u32) -> Vec<String> {
|
||||
let mut cabinets: Vec<String> = Vec::new();
|
||||
|
||||
for row in row_range.0..row_range.1 {
|
||||
let raw = or_continue!(worksheet.get_string_from_cell(row, column));
|
||||
|
||||
let clean = raw.replace("\n", " ");
|
||||
let parts: Vec<&str> = clean.split(" ").collect();
|
||||
|
||||
parts.iter().take(2).for_each(|part| {
|
||||
let clean_part = part.to_string().trim().to_string();
|
||||
|
||||
cabinets.push(clean_part);
|
||||
});
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
cabinets
|
||||
}
|
||||
|
||||
//noinspection GrazieInspection
|
||||
/// Getting the "pure" name of the lesson and list of teachers from the text of the lesson cell.
|
||||
fn parse_name_and_subgroups(
|
||||
text: &String,
|
||||
) -> Result<(String, Vec<Option<LessonSubGroup>>, Option<LessonType>), crate::parser::error::Error>
|
||||
{
|
||||
// Части названия пары:
|
||||
// 1. Само название.
|
||||
// 2. Список преподавателей и подгрупп.
|
||||
// 3. "Модификатор" (чаще всего).
|
||||
//
|
||||
// Регулярное выражение для получения ФИО преподавателей и номеров подгрупп (aka. второй части).
|
||||
// (?:[А-Я][а-я]+\s?(?:[А-Я][\s.]*){2}(?:\(\d\s?[а-я]+\))?(?:, )?)+[\s.]*
|
||||
//
|
||||
// Подробнее:
|
||||
// (?:
|
||||
// [А-Я][а-я]+ - Фамилия.
|
||||
// \s? - Кто знает, будет ли там пробел.
|
||||
// (?:[А-Я][\s.]*){2} - Имя и отчество с учётом случайных пробелов и точек.
|
||||
// (?:
|
||||
// \( - Открытие подгруппы.
|
||||
// \s? - Кто знает, будет ли там пробел.
|
||||
// \d - Номер подгруппы.
|
||||
// \s? - Кто знает, будет ли там пробел.
|
||||
// [а-я\s]+ - Слово "подгруппа" с учётов ошибок.
|
||||
// \) - Закрытие подгруппы.
|
||||
// )? - Явное указание подгруппы может отсутствовать по понятным причинам.
|
||||
// (?:, )? - Разделители между отдельными частями.
|
||||
// )+
|
||||
// [\s.]* - Забираем с собой всякий мусор, что бы не передать его в третью часть.
|
||||
|
||||
static NAMES_REGEX: LazyLock<Regex> = LazyLock::new(|| {
|
||||
Regex::new(
|
||||
r"(?:[А-Я][а-я]+\s?(?:[А-Я][\s.]*){2}(?:\(\s*\d\s*[а-я\s]+\))?(?:[\s,]+)?){1,2}+[\s.,]*",
|
||||
)
|
||||
.unwrap()
|
||||
});
|
||||
|
||||
// Отчистка
|
||||
static CLEAN_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[\s\n\t]+").unwrap());
|
||||
|
||||
let text = CLEAN_RE
|
||||
.replace(&text.replace(&[' ', '\t', '\n'], " "), " ")
|
||||
.to_string();
|
||||
|
||||
let (lesson_name, subgroups, lesson_type) = match NAMES_REGEX.captures(&text) {
|
||||
Some(captures) => {
|
||||
let capture = captures.get(0).unwrap();
|
||||
|
||||
let subgroups: Vec<Option<LessonSubGroup>> = {
|
||||
let src = capture.as_str().replace(&[' ', '.'], "");
|
||||
|
||||
let mut shared_subgroup = false;
|
||||
let mut subgroups: [Option<LessonSubGroup>; 2] = [None, None];
|
||||
|
||||
for name in src.split(',') {
|
||||
let open_bracket_index = name.find('(');
|
||||
|
||||
let number: u8 = open_bracket_index
|
||||
.map_or(0, |index| name[(index + 1)..(index + 2)].parse().unwrap());
|
||||
|
||||
let teacher_name = {
|
||||
let name_end = open_bracket_index.unwrap_or_else(|| name.len());
|
||||
|
||||
// Я ебал. Как же я долго до этого доходил.
|
||||
format!(
|
||||
"{} {}.{}.",
|
||||
name.get(..name_end - 4).unwrap(),
|
||||
name.get(name_end - 4..name_end - 2).unwrap(),
|
||||
name.get(name_end - 2..name_end).unwrap(),
|
||||
)
|
||||
};
|
||||
|
||||
let lesson = Some(LessonSubGroup {
|
||||
cabinet: None,
|
||||
teacher: Some(teacher_name),
|
||||
});
|
||||
|
||||
match number {
|
||||
0 => {
|
||||
subgroups[0] = lesson;
|
||||
subgroups[1] = None;
|
||||
shared_subgroup = true;
|
||||
break;
|
||||
}
|
||||
num => {
|
||||
// 1 - 1 = 0 | 2 - 1 = 1 | 3 - 1 = 2 (schedule index to array index)
|
||||
// 0 % 2 = 0 | 1 % 2 = 1 | 2 % 2 = 0 (clamp)
|
||||
let normalised = (num - 1) % 2;
|
||||
|
||||
subgroups[normalised as usize] = lesson;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if shared_subgroup {
|
||||
Vec::from([subgroups[0].take()])
|
||||
} else {
|
||||
Vec::from(subgroups)
|
||||
}
|
||||
};
|
||||
|
||||
let name = text[..capture.start()].trim().to_string();
|
||||
let extra = text[capture.end()..].trim().to_string();
|
||||
|
||||
let lesson_type = if extra.len() > 4 {
|
||||
let result = guess_lesson_type(&extra);
|
||||
|
||||
if result.is_none() {
|
||||
#[cfg(not(debug_assertions))]
|
||||
sentry::capture_message(
|
||||
&*format!("Не удалось угадать тип пары '{}'!", extra),
|
||||
sentry::Level::Warning,
|
||||
);
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
log::warn!("Не удалось угадать тип пары '{}'!", extra);
|
||||
}
|
||||
|
||||
result
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
(name, subgroups, lesson_type)
|
||||
}
|
||||
None => (text, Vec::new(), None),
|
||||
};
|
||||
|
||||
Ok((lesson_name, subgroups, lesson_type))
|
||||
}
|
||||
|
||||
/// Getting the start and end of a pair from a cell in the first column of a document.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `cell_data`: text in cell.
|
||||
/// * `date`: date of the current day.
|
||||
fn parse_lesson_boundaries_cell(
|
||||
cell_data: &String,
|
||||
date: DateTime<Utc>,
|
||||
) -> Option<LessonBoundaries> {
|
||||
static TIME_RE: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r"(\d+\.\d+)-(\d+\.\d+)").unwrap());
|
||||
|
||||
let parse_res = if let Some(captures) = TIME_RE.captures(cell_data) {
|
||||
captures
|
||||
} else {
|
||||
return None;
|
||||
};
|
||||
|
||||
let start_match = parse_res.get(1).unwrap().as_str();
|
||||
let start_parts: Vec<&str> = start_match.split(".").collect();
|
||||
|
||||
let end_match = parse_res.get(2).unwrap().as_str();
|
||||
let end_parts: Vec<&str> = end_match.split(".").collect();
|
||||
|
||||
static GET_TIME: fn(DateTime<Utc>, &Vec<&str>) -> DateTime<Utc> = |date, parts| {
|
||||
date + Duration::hours(parts[0].parse::<i64>().unwrap() - 4)
|
||||
+ Duration::minutes(parts[1].parse::<i64>().unwrap())
|
||||
};
|
||||
|
||||
Some(LessonBoundaries {
|
||||
start: GET_TIME(date.clone(), &start_parts),
|
||||
end: GET_TIME(date, &end_parts),
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse the column of the document to obtain a list of day's lesson boundaries.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `worksheet`: document.
|
||||
/// * `date`: date of the current day.
|
||||
/// * `row_range`: row boundaries of the current day.
|
||||
/// * `column`: column with the required data.
|
||||
fn parse_day_boundaries(
|
||||
worksheet: &WorkSheet,
|
||||
date: DateTime<Utc>,
|
||||
row_range: (u32, u32),
|
||||
column: u32,
|
||||
) -> Result<Vec<BoundariesCellInfo>, crate::parser::error::Error> {
|
||||
let mut day_times: Vec<BoundariesCellInfo> = Vec::new();
|
||||
|
||||
for row in row_range.0..row_range.1 {
|
||||
let time_cell = if let Some(str) = worksheet.get_string_from_cell(row, column) {
|
||||
str
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let lesson_time = parse_lesson_boundaries_cell(&time_cell, date.clone()).ok_or(
|
||||
error::Error::LessonBoundaries(ErrorCell::new(row, column, time_cell.clone())),
|
||||
)?;
|
||||
|
||||
// type
|
||||
let lesson_type = if time_cell.contains("пара") {
|
||||
LessonType::Default
|
||||
} else {
|
||||
LessonType::Additional
|
||||
};
|
||||
|
||||
// lesson index
|
||||
let default_index = if lesson_type == LessonType::Default {
|
||||
Some(
|
||||
time_cell
|
||||
.chars()
|
||||
.next()
|
||||
.unwrap()
|
||||
.to_string()
|
||||
.parse::<u32>()
|
||||
.unwrap(),
|
||||
)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
day_times.push(BoundariesCellInfo {
|
||||
time_range: lesson_time,
|
||||
lesson_type,
|
||||
default_index,
|
||||
xls_range: worksheet.get_merge_from_start(row, column),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(day_times)
|
||||
}
|
||||
|
||||
/// Parse the column of the document to obtain a list of week's lesson boundaries.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `worksheet`: document.
|
||||
/// * `week_markup`: markup of the current week.
|
||||
fn parse_week_boundaries(
|
||||
worksheet: &WorkSheet,
|
||||
week_markup: &Vec<DayCellInfo>,
|
||||
) -> Result<Vec<Vec<BoundariesCellInfo>>, crate::parser::error::Error> {
|
||||
let mut result: Vec<Vec<BoundariesCellInfo>> = Vec::new();
|
||||
|
||||
let worksheet_end_row = worksheet.end().unwrap().0;
|
||||
let lesson_time_column = week_markup[0].column + 1;
|
||||
|
||||
for day_index in 0..week_markup.len() {
|
||||
let day_markup = &week_markup[day_index];
|
||||
|
||||
// Если текущий день не последнему, то индекс строки следующего дня.
|
||||
// Если текущий день - последний, то индекс последней строки документа.
|
||||
let end_row = if day_index != week_markup.len() - 1 {
|
||||
week_markup[day_index + 1].row
|
||||
} else {
|
||||
worksheet_end_row
|
||||
};
|
||||
|
||||
let day_boundaries = parse_day_boundaries(
|
||||
&worksheet,
|
||||
day_markup.date.clone(),
|
||||
(day_markup.row, end_row),
|
||||
lesson_time_column,
|
||||
)?;
|
||||
|
||||
result.push(day_boundaries);
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Conversion of the list of couples of groups in the list of lessons of teachers.
|
||||
fn convert_groups_to_teachers(
|
||||
groups: &HashMap<String, ScheduleEntry>,
|
||||
) -> HashMap<String, ScheduleEntry> {
|
||||
let mut teachers: HashMap<String, ScheduleEntry> = HashMap::new();
|
||||
|
||||
let empty_days: Vec<Day> = groups
|
||||
.values()
|
||||
.next()
|
||||
.unwrap()
|
||||
.days
|
||||
.iter()
|
||||
.map(|day| Day {
|
||||
name: day.name.clone(),
|
||||
street: day.street.clone(),
|
||||
date: day.date.clone(),
|
||||
lessons: vec![],
|
||||
})
|
||||
.collect();
|
||||
|
||||
for group in groups.values() {
|
||||
for (index, day) in group.days.iter().enumerate() {
|
||||
for group_lesson in &day.lessons {
|
||||
if group_lesson.lesson_type == Break {
|
||||
continue;
|
||||
}
|
||||
|
||||
if group_lesson.subgroups.is_none() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let subgroups = group_lesson.subgroups.as_ref().unwrap();
|
||||
|
||||
for subgroup in subgroups {
|
||||
let teacher = match subgroup {
|
||||
None => continue,
|
||||
Some(subgroup) => match &subgroup.teacher {
|
||||
None => continue,
|
||||
Some(teacher) => teacher,
|
||||
},
|
||||
};
|
||||
|
||||
if teacher == "Ошибка в расписании" {
|
||||
continue;
|
||||
}
|
||||
|
||||
if !teachers.contains_key(teacher) {
|
||||
teachers.insert(
|
||||
teacher.clone(),
|
||||
ScheduleEntry {
|
||||
name: teacher.clone(),
|
||||
days: empty_days.to_vec(),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
let teacher_day = teachers
|
||||
.get_mut(teacher)
|
||||
.unwrap()
|
||||
.days
|
||||
.get_mut(index)
|
||||
.unwrap();
|
||||
|
||||
teacher_day.lessons.push({
|
||||
let mut lesson = group_lesson.clone();
|
||||
lesson.group = Some(group.name.clone());
|
||||
|
||||
lesson
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
teachers.iter_mut().for_each(|(_, teacher)| {
|
||||
teacher.days.iter_mut().for_each(|day| {
|
||||
day.lessons
|
||||
.sort_by(|a, b| a.range.as_ref().unwrap()[1].cmp(&b.range.as_ref().unwrap()[1]))
|
||||
})
|
||||
});
|
||||
|
||||
teachers
|
||||
}
|
||||
|
||||
/// Reading XLS Document from the buffer and converting it into the schedule ready to use.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `buffer`: XLS data containing schedule.
|
||||
///
|
||||
/// returns: Result<ParseResult, crate::parser::error::Error>
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use schedule_parser::parse_xls;
|
||||
///
|
||||
/// let result = parse_xls(&include_bytes!("../../schedule.xls").to_vec());
|
||||
///
|
||||
/// assert!(result.is_ok(), "{}", result.err().unwrap());
|
||||
///
|
||||
/// assert_ne!(result.as_ref().unwrap().groups.len(), 0);
|
||||
/// assert_ne!(result.as_ref().unwrap().teachers.len(), 0);
|
||||
/// ```
|
||||
pub fn parse_xls(buffer: &Vec<u8>) -> Result<ParsedSchedule, crate::parser::error::Error> {
|
||||
let cursor = Cursor::new(&buffer);
|
||||
let mut workbook: Xls<_> =
|
||||
open_workbook_from_rs(cursor).map_err(|e| error::Error::BadXLS(std::sync::Arc::new(e)))?;
|
||||
|
||||
let worksheet = {
|
||||
let (worksheet_name, worksheet) = workbook
|
||||
.worksheets()
|
||||
.first()
|
||||
.ok_or(error::Error::NoWorkSheets)?
|
||||
.clone();
|
||||
|
||||
let worksheet_merges = workbook
|
||||
.worksheet_merge_cells(&*worksheet_name)
|
||||
.ok_or(error::Error::NoWorkSheets)?;
|
||||
|
||||
WorkSheet {
|
||||
data: worksheet,
|
||||
merges: worksheet_merges,
|
||||
}
|
||||
};
|
||||
|
||||
let (week_markup, groups_markup) = parse_skeleton(&worksheet)?;
|
||||
let week_boundaries = parse_week_boundaries(&worksheet, &week_markup)?;
|
||||
|
||||
let mut groups: HashMap<String, ScheduleEntry> = HashMap::new();
|
||||
|
||||
for group_markup in groups_markup {
|
||||
let mut group = ScheduleEntry {
|
||||
name: group_markup.name,
|
||||
days: Vec::new(),
|
||||
};
|
||||
|
||||
for day_index in 0..(&week_markup).len() {
|
||||
let day_markup = &week_markup[day_index];
|
||||
|
||||
let mut day = Day {
|
||||
name: day_markup.name.clone(),
|
||||
street: None,
|
||||
date: day_markup.date,
|
||||
lessons: Vec::new(),
|
||||
};
|
||||
|
||||
let day_boundaries = &week_boundaries[day_index];
|
||||
|
||||
for lesson_boundaries in day_boundaries {
|
||||
match &mut parse_lesson(
|
||||
&worksheet,
|
||||
&day,
|
||||
&day_boundaries,
|
||||
&lesson_boundaries,
|
||||
group_markup.column,
|
||||
)? {
|
||||
Lessons(lesson) => day.lessons.append(lesson),
|
||||
Street(street) => day.street = Some(street.to_owned()),
|
||||
}
|
||||
}
|
||||
|
||||
group.days.push(day);
|
||||
}
|
||||
|
||||
groups.insert(group.name.clone(), group);
|
||||
}
|
||||
|
||||
Ok(ParsedSchedule {
|
||||
teachers: convert_groups_to_teachers(&groups),
|
||||
groups,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(any(test, feature = "test"))]
|
||||
pub mod test_utils {
|
||||
use super::*;
|
||||
use base::ParsedSchedule;
|
||||
|
||||
pub fn test_result() -> Result<ParsedSchedule, crate::parser::error::Error> {
|
||||
parse_xls(&include_bytes!("../../../../test-data/engels-polytechnic.xls").to_vec())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
#[test]
|
||||
fn read() {
|
||||
let result = super::test_utils::test_result();
|
||||
|
||||
assert!(result.is_ok(), "{}", result.err().unwrap());
|
||||
|
||||
assert_ne!(result.as_ref().unwrap().groups.len(), 0);
|
||||
assert_ne!(result.as_ref().unwrap().teachers.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_lesson() {
|
||||
let result = super::test_utils::test_result();
|
||||
assert!(result.is_ok(), "{}", result.err().unwrap());
|
||||
|
||||
let result = result.unwrap();
|
||||
assert!(result.groups.contains_key("ИС-214/23"));
|
||||
|
||||
let group = result.groups.get("ИС-214/23").unwrap();
|
||||
|
||||
let thursday = group.days.get(3).unwrap();
|
||||
assert_eq!(thursday.lessons.len(), 1);
|
||||
|
||||
let lesson = &thursday.lessons[0];
|
||||
assert_eq!(lesson.range.unwrap()[1], 3);
|
||||
assert!(lesson.subgroups.is_some());
|
||||
|
||||
let subgroups = lesson.subgroups.as_ref().unwrap();
|
||||
assert_eq!(subgroups.len(), 2);
|
||||
|
||||
assert_eq!(
|
||||
subgroups[0].as_ref().unwrap().cabinet,
|
||||
Some("44".to_string())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
subgroups[1].as_ref().unwrap().cabinet,
|
||||
Some("43".to_string())
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
use regex::Regex;
|
||||
use std::ops::Deref;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
/// XLS WorkSheet data.
|
||||
pub struct WorkSheet {
|
||||
pub data: calamine::Range<calamine::Data>,
|
||||
pub merges: Vec<calamine::Dimensions>,
|
||||
}
|
||||
|
||||
impl Deref for WorkSheet {
|
||||
type Target = calamine::Range<calamine::Data>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.data
|
||||
}
|
||||
}
|
||||
|
||||
impl WorkSheet {
|
||||
/// Getting a line from the required cell.
|
||||
pub fn get_string_from_cell(&self, row: u32, col: u32) -> Option<String> {
|
||||
let cell_data = if let Some(data) = self.get((row as usize, col as usize)) {
|
||||
data.to_string()
|
||||
} else {
|
||||
return None;
|
||||
};
|
||||
|
||||
if cell_data.trim().is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
static NL_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[\n\r]+").unwrap());
|
||||
static SP_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\s+").unwrap());
|
||||
|
||||
let trimmed_data = SP_RE
|
||||
.replace_all(&NL_RE.replace_all(&cell_data, " "), " ")
|
||||
.trim()
|
||||
.to_string();
|
||||
|
||||
if trimmed_data.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(trimmed_data)
|
||||
}
|
||||
}
|
||||
|
||||
/// Obtaining the boundaries of the cell along its upper left coordinate.
|
||||
pub fn get_merge_from_start(&self, row: u32, column: u32) -> ((u32, u32), (u32, u32)) {
|
||||
match self
|
||||
.merges
|
||||
.iter()
|
||||
.find(|merge| merge.start.0 == row && merge.start.1 == column)
|
||||
{
|
||||
Some(merge) => (merge.start, (merge.end.0 + 1, merge.end.1 + 1)),
|
||||
None => ((row, column), (row + 1, column + 1)),
|
||||
}
|
||||
}
|
||||
}
|
||||
263
providers/provider-engels-polytechnic/src/updater.rs
Normal file
263
providers/provider-engels-polytechnic/src/updater.rs
Normal file
@@ -0,0 +1,263 @@
|
||||
use crate::parser::parse_xls;
|
||||
use crate::updater::error::{Error, QueryUrlError, SnapshotCreationError};
|
||||
use crate::xls_downloader::{FetchError, XlsDownloader};
|
||||
use base::ScheduleSnapshot;
|
||||
|
||||
pub enum UpdateSource {
|
||||
Prepared(ScheduleSnapshot),
|
||||
|
||||
Url(String),
|
||||
|
||||
GrabFromSite {
|
||||
yandex_api_key: String,
|
||||
yandex_func_id: String,
|
||||
},
|
||||
}
|
||||
|
||||
pub struct Updater {
|
||||
downloader: XlsDownloader,
|
||||
update_source: UpdateSource,
|
||||
}
|
||||
|
||||
pub mod error {
|
||||
use crate::xls_downloader::FetchError;
|
||||
use derive_more::{Display, Error};
|
||||
|
||||
#[derive(Debug, Display, Error)]
|
||||
pub enum Error {
|
||||
/// An error occurred while querying the Yandex Cloud API for a URL.
|
||||
///
|
||||
/// This may result from network failures, invalid API credentials, or issues with the Yandex Cloud Function invocation.
|
||||
/// See [`QueryUrlError`] for more details about specific causes.
|
||||
QueryUrlFailed(QueryUrlError),
|
||||
|
||||
/// The schedule snapshot creation process failed.
|
||||
///
|
||||
/// This can happen due to URL conflicts (same URL already in use), failed network requests,
|
||||
/// download errors, or invalid XLS file content. See [`SnapshotCreationError`] for details.
|
||||
SnapshotCreationFailed(SnapshotCreationError),
|
||||
}
|
||||
/// Errors that may occur when querying the Yandex Cloud API to retrieve a URL.
|
||||
#[derive(Debug, Display, Error)]
|
||||
pub enum QueryUrlError {
|
||||
/// Occurs when the request to the Yandex Cloud API fails.
|
||||
///
|
||||
/// This may be due to network issues, invalid API key, incorrect function ID, or other
|
||||
/// problems with the Yandex Cloud Function invocation.
|
||||
#[display("An error occurred during the request to the Yandex Cloud API: {_0}")]
|
||||
RequestFailed(reqwest::Error),
|
||||
}
|
||||
|
||||
/// Errors that may occur during the creation of a schedule snapshot.
|
||||
#[derive(Debug, Display, Error)]
|
||||
pub enum SnapshotCreationError {
|
||||
/// The URL is the same as the one already being used (no update needed).
|
||||
#[display("The URL is the same as the one already being used.")]
|
||||
SameUrl,
|
||||
|
||||
/// The URL query for the XLS file failed to execute, either due to network issues or invalid API parameters.
|
||||
#[display("Failed to fetch URL: {_0}")]
|
||||
FetchFailed(FetchError),
|
||||
|
||||
/// Downloading the XLS file content failed after successfully obtaining the URL.
|
||||
#[display("Download failed: {_0}")]
|
||||
DownloadFailed(FetchError),
|
||||
|
||||
/// The XLS file could not be parsed into a valid schedule format.
|
||||
#[display("Schedule data is invalid: {_0}")]
|
||||
InvalidSchedule(crate::parser::error::Error),
|
||||
}
|
||||
}
|
||||
|
||||
impl Updater {
|
||||
/// Constructs a new `ScheduleSnapshot` by downloading and parsing schedule data from the specified URL.
|
||||
///
|
||||
/// This method first checks if the provided URL is the same as the one already configured in the downloader.
|
||||
/// If different, it updates the downloader's URL, fetches the XLS content, parses it, and creates a snapshot.
|
||||
/// Errors are returned for URL conflicts, network issues, download failures, or invalid data.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `downloader`: A mutable reference to an `XLSDownloader` implementation used to fetch and parse the schedule data.
|
||||
/// * `url`: The source URL pointing to the XLS file containing schedule data.
|
||||
///
|
||||
/// returns: Result<ScheduleSnapshot, SnapshotCreationError>
|
||||
pub async fn new_snapshot(
|
||||
downloader: &mut XlsDownloader,
|
||||
url: String,
|
||||
) -> Result<ScheduleSnapshot, SnapshotCreationError> {
|
||||
if downloader.url.as_ref().is_some_and(|_url| _url.eq(&url)) {
|
||||
return Err(SnapshotCreationError::SameUrl);
|
||||
}
|
||||
|
||||
let head_result = downloader.set_url(&*url).await.map_err(|error| {
|
||||
if let FetchError::Unknown(error) = &error {
|
||||
sentry::capture_error(&error);
|
||||
}
|
||||
|
||||
SnapshotCreationError::FetchFailed(error)
|
||||
})?;
|
||||
|
||||
let xls_data = downloader
|
||||
.fetch(false)
|
||||
.await
|
||||
.map_err(|error| {
|
||||
if let FetchError::Unknown(error) = &error {
|
||||
sentry::capture_error(&error);
|
||||
}
|
||||
|
||||
SnapshotCreationError::DownloadFailed(error)
|
||||
})?
|
||||
.data
|
||||
.unwrap();
|
||||
|
||||
let parse_result = parse_xls(&xls_data).map_err(|error| {
|
||||
sentry::capture_error(&error);
|
||||
|
||||
SnapshotCreationError::InvalidSchedule(error)
|
||||
})?;
|
||||
|
||||
Ok(ScheduleSnapshot {
|
||||
fetched_at: head_result.requested_at,
|
||||
updated_at: head_result.uploaded_at,
|
||||
url,
|
||||
data: parse_result,
|
||||
})
|
||||
}
|
||||
|
||||
/// Queries the Yandex Cloud Function (FaaS) to obtain a URL for the schedule file.
|
||||
///
|
||||
/// This sends a POST request to the specified Yandex Cloud Function endpoint,
|
||||
/// using the provided API key for authentication. The returned URI is combined
|
||||
/// with the "https://politehnikum-eng.ru" base domain to form the complete URL.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `api_key` - Authentication token for Yandex Cloud API
|
||||
/// * `func_id` - ID of the target Yandex Cloud Function to invoke
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// Result containing:
|
||||
/// - `Ok(String)` - Complete URL constructed from the Function's response
|
||||
/// - `Err(QueryUrlError)` - If the request or response processing fails
|
||||
async fn query_url(api_key: &str, func_id: &str) -> Result<String, QueryUrlError> {
|
||||
let client = reqwest::Client::new();
|
||||
|
||||
let uri = client
|
||||
.post(format!(
|
||||
"https://functions.yandexcloud.net/{}?integration=raw",
|
||||
func_id
|
||||
))
|
||||
.header("Authorization", format!("Api-Key {}", api_key))
|
||||
.send()
|
||||
.await
|
||||
.map_err(|error| QueryUrlError::RequestFailed(error))?
|
||||
.text()
|
||||
.await
|
||||
.map_err(|error| QueryUrlError::RequestFailed(error))?;
|
||||
|
||||
Ok(format!("https://politehnikum-eng.ru{}", uri.trim()))
|
||||
}
|
||||
|
||||
/// Initializes the schedule by fetching the URL from the environment or Yandex Cloud Function (FaaS)
|
||||
/// and creating a [`ScheduleSnapshot`] with the downloaded data.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `downloader`: Mutable reference to an `XLSDownloader` implementation used to fetch and parse the schedule
|
||||
/// * `app_env`: Reference to the application environment containing either a predefined URL or Yandex Cloud credentials
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// Returns `Ok(())` if the snapshot was successfully initialized, or an `Error` if:
|
||||
/// - URL query to Yandex Cloud failed ([`QueryUrlError`])
|
||||
/// - Schedule snapshot creation failed ([`SnapshotCreationError`])
|
||||
pub async fn new(update_source: UpdateSource) -> Result<(Self, ScheduleSnapshot), Error> {
|
||||
let mut this = Updater {
|
||||
downloader: XlsDownloader::new(),
|
||||
update_source,
|
||||
};
|
||||
|
||||
if let UpdateSource::Prepared(snapshot) = &this.update_source {
|
||||
let snapshot = snapshot.clone();
|
||||
return Ok((this, snapshot));
|
||||
}
|
||||
|
||||
let url = match &this.update_source {
|
||||
UpdateSource::Url(url) => {
|
||||
log::info!("The default link {} will be used", url);
|
||||
url.clone()
|
||||
}
|
||||
UpdateSource::GrabFromSite {
|
||||
yandex_api_key,
|
||||
yandex_func_id,
|
||||
} => {
|
||||
log::info!("Obtaining a link using FaaS...");
|
||||
Self::query_url(yandex_api_key, yandex_func_id)
|
||||
.await
|
||||
.map_err(|error| Error::QueryUrlFailed(error))?
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
log::info!("For the initial setup, a link {} will be used", url);
|
||||
|
||||
let snapshot = Self::new_snapshot(&mut this.downloader, url)
|
||||
.await
|
||||
.map_err(|error| Error::SnapshotCreationFailed(error))?;
|
||||
|
||||
log::info!("Schedule snapshot successfully created!");
|
||||
|
||||
Ok((this, snapshot))
|
||||
}
|
||||
|
||||
/// Updates the schedule snapshot by querying the latest URL from FaaS and checking for changes.
|
||||
/// If the URL hasn't changed, only updates the [`fetched_at`] timestamp. If changed, downloads
|
||||
/// and parses the new schedule data.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `downloader`: XLS file downloader used to fetch and parse the schedule data
|
||||
/// * `app_env`: Application environment containing Yandex Cloud configuration and auto-update settings
|
||||
///
|
||||
/// returns: `Result<(), Error>` - Returns error if URL query fails or schedule parsing encounters issues
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Use `unsafe` to access the initialized snapshot, guaranteed valid by prior `init()` call
|
||||
pub async fn update(
|
||||
&mut self,
|
||||
current_snapshot: &ScheduleSnapshot,
|
||||
) -> Result<ScheduleSnapshot, Error> {
|
||||
if let UpdateSource::Prepared(snapshot) = &self.update_source {
|
||||
let mut snapshot = snapshot.clone();
|
||||
snapshot.update();
|
||||
return Ok(snapshot);
|
||||
}
|
||||
|
||||
let url = match &self.update_source {
|
||||
UpdateSource::Url(url) => url.clone(),
|
||||
UpdateSource::GrabFromSite {
|
||||
yandex_api_key,
|
||||
yandex_func_id,
|
||||
} => Self::query_url(yandex_api_key.as_str(), yandex_func_id.as_str())
|
||||
.await
|
||||
.map_err(|error| Error::QueryUrlFailed(error))?,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let snapshot = match Self::new_snapshot(&mut self.downloader, url).await {
|
||||
Ok(snapshot) => snapshot,
|
||||
Err(SnapshotCreationError::SameUrl) => {
|
||||
let mut clone = current_snapshot.clone();
|
||||
clone.update();
|
||||
|
||||
clone
|
||||
}
|
||||
Err(error) => return Err(Error::SnapshotCreationFailed(error)),
|
||||
};
|
||||
|
||||
Ok(snapshot)
|
||||
}
|
||||
}
|
||||
237
providers/provider-engels-polytechnic/src/xls_downloader.rs
Normal file
237
providers/provider-engels-polytechnic/src/xls_downloader.rs
Normal file
@@ -0,0 +1,237 @@
|
||||
use chrono::{DateTime, Utc};
|
||||
use derive_more::{Display, Error};
|
||||
use std::mem::discriminant;
|
||||
use std::sync::Arc;
|
||||
use utoipa::ToSchema;
|
||||
|
||||
/// XLS data retrieval errors.
|
||||
#[derive(Clone, Debug, ToSchema, Display, Error)]
|
||||
pub enum FetchError {
|
||||
/// File url is not set.
|
||||
#[display("The link to the timetable was not provided earlier.")]
|
||||
NoUrlProvided,
|
||||
|
||||
/// Unknown error.
|
||||
#[display("An unknown error occurred while downloading the file.")]
|
||||
#[schema(value_type = String)]
|
||||
Unknown(Arc<reqwest::Error>),
|
||||
|
||||
/// Server returned a status code different from 200.
|
||||
#[display("Server returned a status code {status_code}.")]
|
||||
BadStatusCode { status_code: u16 },
|
||||
|
||||
/// The url leads to a file of a different type.
|
||||
#[display("The link leads to a file of type '{content_type}'.")]
|
||||
BadContentType { content_type: String },
|
||||
|
||||
/// Server doesn't return expected headers.
|
||||
#[display("Server doesn't return expected header(s) '{expected_header}'.")]
|
||||
BadHeaders { expected_header: String },
|
||||
}
|
||||
|
||||
impl FetchError {
|
||||
pub fn unknown(error: Arc<reqwest::Error>) -> Self {
|
||||
Self::Unknown(error)
|
||||
}
|
||||
|
||||
pub fn bad_status_code(status_code: u16) -> Self {
|
||||
Self::BadStatusCode { status_code }
|
||||
}
|
||||
|
||||
pub fn bad_content_type(content_type: &str) -> Self {
|
||||
Self::BadContentType {
|
||||
content_type: content_type.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn bad_headers(expected_header: &str) -> Self {
|
||||
Self::BadHeaders {
|
||||
expected_header: expected_header.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for FetchError {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
discriminant(self) == discriminant(other)
|
||||
}
|
||||
}
|
||||
|
||||
/// Result of XLS data retrieval.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct FetchOk {
|
||||
/// File upload date.
|
||||
pub uploaded_at: DateTime<Utc>,
|
||||
|
||||
/// Date data received.
|
||||
pub requested_at: DateTime<Utc>,
|
||||
|
||||
/// File data.
|
||||
pub data: Option<Vec<u8>>,
|
||||
}
|
||||
|
||||
impl FetchOk {
|
||||
/// Result without file content.
|
||||
pub fn head(uploaded_at: DateTime<Utc>) -> Self {
|
||||
FetchOk {
|
||||
uploaded_at,
|
||||
requested_at: Utc::now(),
|
||||
data: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Full result.
|
||||
pub fn get(uploaded_at: DateTime<Utc>, data: Vec<u8>) -> Self {
|
||||
FetchOk {
|
||||
uploaded_at,
|
||||
requested_at: Utc::now(),
|
||||
data: Some(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub type FetchResult = Result<FetchOk, FetchError>;
|
||||
|
||||
pub struct XlsDownloader {
|
||||
pub url: Option<String>,
|
||||
}
|
||||
|
||||
impl XlsDownloader {
|
||||
pub fn new() -> Self {
|
||||
XlsDownloader { url: None }
|
||||
}
|
||||
|
||||
async fn fetch_specified(url: &str, head: bool) -> FetchResult {
|
||||
let client = reqwest::Client::new();
|
||||
|
||||
let response = if head {
|
||||
client.head(url)
|
||||
} else {
|
||||
client.get(url)
|
||||
}
|
||||
.header("User-Agent", ua_generator::ua::spoof_chrome_ua())
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| FetchError::unknown(Arc::new(e)))?;
|
||||
|
||||
if response.status().as_u16() != 200 {
|
||||
return Err(FetchError::bad_status_code(response.status().as_u16()));
|
||||
}
|
||||
|
||||
let headers = response.headers();
|
||||
|
||||
let content_type = headers
|
||||
.get("Content-Type")
|
||||
.ok_or(FetchError::bad_headers("Content-Type"))?;
|
||||
|
||||
if !headers.contains_key("etag") {
|
||||
return Err(FetchError::bad_headers("etag"));
|
||||
}
|
||||
|
||||
let last_modified = headers
|
||||
.get("last-modified")
|
||||
.ok_or(FetchError::bad_headers("last-modified"))?;
|
||||
|
||||
if content_type != "application/vnd.ms-excel" {
|
||||
return Err(FetchError::bad_content_type(content_type.to_str().unwrap()));
|
||||
}
|
||||
|
||||
let last_modified = DateTime::parse_from_rfc2822(&last_modified.to_str().unwrap())
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
|
||||
Ok(if head {
|
||||
FetchOk::head(last_modified)
|
||||
} else {
|
||||
FetchOk::get(last_modified, response.bytes().await.unwrap().to_vec())
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn fetch(&self, head: bool) -> FetchResult {
|
||||
if self.url.is_none() {
|
||||
Err(FetchError::NoUrlProvided)
|
||||
} else {
|
||||
Self::fetch_specified(&*self.url.as_ref().unwrap(), head).await
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn set_url(&mut self, url: &str) -> FetchResult {
|
||||
let result = Self::fetch_specified(url, true).await;
|
||||
|
||||
if let Ok(_) = result {
|
||||
self.url = Some(url.to_string());
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::xls_downloader::{FetchError, XlsDownloader};
|
||||
|
||||
#[tokio::test]
|
||||
async fn bad_url() {
|
||||
let url = "bad_url";
|
||||
|
||||
let mut downloader = XlsDownloader::new();
|
||||
assert!(downloader.set_url(url).await.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn bad_status_code() {
|
||||
let url = "https://www.google.com/not-found";
|
||||
|
||||
let mut downloader = XlsDownloader::new();
|
||||
assert_eq!(
|
||||
downloader.set_url(url).await,
|
||||
Err(FetchError::bad_status_code(404))
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn bad_headers() {
|
||||
let url = "https://www.google.com/favicon.ico";
|
||||
|
||||
let mut downloader = XlsDownloader::new();
|
||||
assert_eq!(
|
||||
downloader.set_url(url).await,
|
||||
Err(FetchError::BadHeaders {
|
||||
expected_header: "ETag".to_string(),
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn bad_content_type() {
|
||||
let url = "https://s3.aero-storage.ldragol.ru/679e5d1145a6ad00843ad3f1/67ddb59fd46303008396ac96%2Fexample.txt";
|
||||
|
||||
let mut downloader = XlsDownloader::new();
|
||||
assert!(downloader.set_url(url).await.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn ok() {
|
||||
let url = "https://s3.aero-storage.ldragol.ru/679e5d1145a6ad00843ad3f1/67ddb5fad46303008396ac97%2Fschedule.xls";
|
||||
|
||||
let mut downloader = XlsDownloader::new();
|
||||
assert!(downloader.set_url(url).await.is_ok());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn downloader_ok() {
|
||||
let url = "https://s3.aero-storage.ldragol.ru/679e5d1145a6ad00843ad3f1/67ddb5fad46303008396ac97%2Fschedule.xls";
|
||||
|
||||
let mut downloader = XlsDownloader::new();
|
||||
assert!(downloader.set_url(url).await.is_ok());
|
||||
assert!(downloader.fetch(false).await.is_ok());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn downloader_no_url_provided() {
|
||||
let downloader = XlsDownloader::new();
|
||||
|
||||
let result = downloader.fetch(false).await;
|
||||
assert_eq!(result, Err(FetchError::NoUrlProvided));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user