26 Commits

Author SHA1 Message Date
cdc89b5bcd fix(parser): fix sentry error sending 2025-10-10 03:00:47 +04:00
ad86f6cd64 feat(parser): limit names regex to maximum 2 elements
This allows us to not worry about subgroups array index overflows, and we can make better non-standard case solving.
2025-10-10 01:39:54 +04:00
a3b4a501db feat(parser): improve names regex to exclude some non-standard cases
Like "Название ФАмилия. И.О.".
In that case regex will grab "Название ФА", instead of "Амилия. И. О." (we can't add 'Ф', bc it will make regex checks way more complex).

Now it will ignore "Название ФА" if after that lower or upper char is placed.
Previously only lower chars are excluded and check won't exclude "Название ФА" and grabs "Название Ф" bc after 'Ф' uppercase char is present.
2025-10-10 01:37:52 +04:00
df0e99a4d0 feat(parser): make lesson cell range less strict to support upcoming split-lessons 2025-10-10 01:31:55 +04:00
a8cf8fb0f5 feat(parser): improve street regex 2025-10-10 01:30:56 +04:00
7ed866138e feat(error): add error for unknown lesson type 2025-10-10 01:30:30 +04:00
7bac48f8fc feat(error): add more intuitive CellPos formatting and get rid of ErrorCell 2025-10-10 01:27:05 +04:00
191ec36fef chore: remove useless commented code 2025-10-10 01:25:12 +04:00
f121a04f1b refactor: refactor providers code 2025-10-02 07:55:07 +04:00
df74ab03a1 chore(ci): make building non-verbose 2025-10-02 07:54:09 +04:00
1b79d1cf1e chore: bump provider version to 0.2.2 2025-10-02 07:44:35 +04:00
2b9b1ea66b chore(deps): update dependencies 2025-10-02 07:43:37 +04:00
ca713d8d51 refactor: prevent updater stop because of errors 2025-10-02 07:40:44 +04:00
69df538467 refactor(updater): don't cancel token when uri fetch error occurred 2025-09-29 08:27:25 +04:00
aa019f8fcf fix(parser): rework teacher name parsing 2025-09-29 08:27:24 +04:00
b664ba578d chore(clippy): fix all clippy warnings 2025-09-25 03:42:34 +04:00
983967f8b0 chore(downloader): suppress unused_assignments warning 2025-09-25 03:27:55 +04:00
e5760120e2 chore(release): bump version to 1.3.0 2025-09-25 03:17:14 +04:00
a28fb66dd4 feat(downloader): add retry-mechanism for querying uri from yandex-cloud api (#18) 2025-09-25 03:15:36 +04:00
3780fb3136 feat(downloader): implement etag-based difference check for schedule 2025-09-25 03:14:39 +04:00
6c71bc19f5 chore(parser): fix crash caused by another mistype in schedule 2025-09-25 02:50:43 +04:00
2d0041dc8b feat(schedule): add practice lesson type 2025-09-25 02:49:23 +04:00
b5d372e109 feat(ci): build and push image to docker registry on every push to master 2025-09-10 20:05:11 +04:00
84dca02c34 fix(database): use migrator and change connection options 2025-09-10 20:04:19 +04:00
6c9d3b3b31 chore(release): bump version to 1.2.2 2025-09-08 07:16:53 +04:00
a348b1b99b refactor(auth): interpret failure to find user as an error 2025-09-08 07:15:22 +04:00
18 changed files with 1138 additions and 888 deletions

142
.github/workflows/build.yml vendored Normal file
View File

@@ -0,0 +1,142 @@
name: build
on:
push:
branches: [ "master" ]
tags-ignore: [ "release/v*" ]
permissions:
contents: write
env:
CARGO_TERM_COLOR: always
BINARY_NAME: schedule-parser-rusted
TEST_DB: ${{ secrets.TEST_DATABASE_URL }}
SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }}
SENTRY_ORG: ${{ secrets.SENTRY_ORG }}
SENTRY_PROJECT: ${{ secrets.SENTRY_PROJECT }}
DOCKER_IMAGE_NAME: ${{ github.repository }}
DOCKER_REGISTRY_HOST: registry.n08i40k.ru
DOCKER_REGISTRY_USERNAME: ${{ github.repository_owner }}
DOCKER_REGISTRY_PASSWORD: ${{ secrets.DOCKER_REGISTRY_PASSWORD }}
jobs:
test:
name: Test
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Rust
uses: actions-rust-lang/setup-rust-toolchain@v1.11.0
with:
toolchain: stable
- name: Test
run: |
cargo test
env:
DATABASE_URL: ${{ env.TEST_DB }}
SCHEDULE_DISABLE_AUTO_UPDATE: 1
JWT_SECRET: "test-secret-at-least-256-bits-used"
VK_ID_CLIENT_ID: 0
VK_ID_REDIRECT_URI: "vk0://vk.com/blank.html"
TELEGRAM_BOT_ID: 0
TELEGRAM_MINI_APP_HOST: example.com
TELEGRAM_TEST_DC: false
YANDEX_CLOUD_API_KEY: ""
YANDEX_CLOUD_FUNC_ID: ""
build:
name: Build
runs-on: ubuntu-latest
needs: test
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Rust
uses: actions-rust-lang/setup-rust-toolchain@v1.11.0
with:
toolchain: stable
- name: Build
run: cargo build --release
- name: Extract debug symbols
run: |
objcopy --only-keep-debug target/release/${{ env.BINARY_NAME }}{,.d}
objcopy --strip-debug --strip-unneeded target/release/${{ env.BINARY_NAME }}
objcopy --add-gnu-debuglink target/release/${{ env.BINARY_NAME }}{.d,}
- name: Setup sentry-cli
uses: matbour/setup-sentry-cli@v2.0.0
with:
version: latest
token: ${{ env.SENTRY_AUTH_TOKEN }}
organization: ${{ env.SENTRY_ORG }}
project: ${{ env.SENTRY_PROJECT }}
- name: Upload debug symbols to Sentry
run: |
sentry-cli debug-files upload --include-sources .
- name: Upload build binary artifact
uses: actions/upload-artifact@v4
with:
name: release-binary
path: target/release/${{ env.BINARY_NAME }}
- name: Upload build debug symbols artifact
uses: actions/upload-artifact@v4
with:
name: release-symbols
path: target/release/${{ env.BINARY_NAME }}.d
docker:
name: Build & Push Docker Image
runs-on: ubuntu-latest
needs: build
steps:
- uses: actions/checkout@v4
- name: Download build artifacts
uses: actions/download-artifact@v4
with:
name: release-binary
- name: Setup Docker Buildx
uses: docker/setup-buildx-action@v3.10.0
- name: Login to Registry
uses: docker/login-action@v3.4.0
with:
registry: ${{ env.DOCKER_REGISTRY_HOST }}
username: ${{ env.DOCKER_REGISTRY_USERNAME }}
password: ${{ env.DOCKER_REGISTRY_PASSWORD }}
- name: Extract Docker metadata
id: meta
uses: docker/metadata-action@v5.7.0
with:
images: ${{ env.DOCKER_REGISTRY_HOST }}/${{ env.DOCKER_IMAGE_NAME }}
- name: Build and push Docker image
id: build-and-push
uses: docker/build-push-action@v6.15.0
with:
context: .
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
build-args: |
"BINARY_NAME=${{ env.BINARY_NAME }}"

View File

@@ -2,7 +2,7 @@ name: cargo test
on: on:
push: push:
branches: [ "master" ] branches: [ "development" ]
tags-ignore: [ "release/v*" ] tags-ignore: [ "release/v*" ]
permissions: permissions:

944
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -3,7 +3,7 @@ members = ["actix-macros", "actix-test", "providers"]
[package] [package]
name = "schedule-parser-rusted" name = "schedule-parser-rusted"
version = "1.2.1" version = "1.3.1"
edition = "2024" edition = "2024"
publish = false publish = false
@@ -21,7 +21,7 @@ actix-macros = { path = "actix-macros" }
actix-web = "4.11.0" actix-web = "4.11.0"
# basic # basic
chrono = { version = "0.4.41", features = ["serde"] } chrono = { version = "0.4.42", features = ["serde"] }
derive_more = { version = "2.0.1", features = ["full"] } derive_more = { version = "2.0.1", features = ["full"] }
dotenvy = "0.15.7" dotenvy = "0.15.7"
@@ -48,13 +48,13 @@ reqwest = { version = "0.12.23", features = ["json"] }
mime = "0.3.17" mime = "0.3.17"
# error handling # error handling
sentry = "0.42.0" sentry = "0.43.0"
sentry-actix = "0.42.0" sentry-actix = "0.43.0"
# [de]serializing # [de]serializing
serde = { version = "1.0.219", features = ["derive"] } serde = { version = "1", features = ["derive"] }
serde_json = "1.0.143" serde_json = "1"
serde_with = "3.14.0" serde_with = "3.14"
sha1 = "0.11.0-rc.2" sha1 = "0.11.0-rc.2"
@@ -65,12 +65,12 @@ utoipa-actix-web = "0.1.2"
uuid = { version = "1.18.1", features = ["v4"] } uuid = { version = "1.18.1", features = ["v4"] }
hex-literal = "1" hex-literal = "1"
log = "0.4.27" log = "0.4.28"
# telegram webdata deciding and verify # telegram webdata deciding and verify
base64 = "0.22.1" base64 = "0.22.1"
percent-encoding = "2.3.2" percent-encoding = "2.3.2"
ed25519-dalek = "3.0.0-pre.0" ed25519-dalek = "3.0.0-pre.1"
# development tracing # development tracing
console-subscriber = { version = "0.4.1", optional = true } console-subscriber = { version = "0.4.1", optional = true }

View File

@@ -100,6 +100,9 @@ pub enum LessonType {
/// Защита курсового проекта. /// Защита курсового проекта.
CourseProjectDefense, CourseProjectDefense,
/// Практическое занятие.
Practice,
} }
#[derive(Clone, Hash, Debug, Serialize, Deserialize, ToSchema)] #[derive(Clone, Hash, Debug, Serialize, Deserialize, ToSchema)]
@@ -209,70 +212,6 @@ impl ScheduleSnapshot {
} }
} }
// #[derive(Clone, Debug, Display, Error, ToSchema)]
// #[display("row {row}, column {column}")]
// pub struct ErrorCellPos {
// pub row: u32,
// pub column: u32,
// }
//
// #[derive(Clone, Debug, Display, Error, ToSchema)]
// #[display("'{data}' at {pos}")]
// pub struct ErrorCell {
// pub pos: ErrorCellPos,
// pub data: String,
// }
//
// impl ErrorCell {
// pub fn new(row: u32, column: u32, data: String) -> Self {
// Self {
// pos: ErrorCellPos { row, column },
// data,
// }
// }
// }
// #[derive(Clone, Debug, Display, Error, ToSchema)]
// pub enum ParseError {
// /// Errors related to reading XLS file.
// #[display("{_0:?}: Failed to read XLS file.")]
// #[schema(value_type = String)]
// BadXLS(Arc<calamine::XlsError>),
//
// /// Not a single sheet was found.
// #[display("No work sheets found.")]
// NoWorkSheets,
//
// /// There are no data on the boundaries of the sheet.
// #[display("There is no data on work sheet boundaries.")]
// UnknownWorkSheetRange,
//
// /// Failed to read the beginning and end of the lesson from the cell
// #[display("Failed to read lesson start and end from {_0}.")]
// LessonBoundaries(ErrorCell),
//
// /// Not found the beginning and the end corresponding to the lesson.
// #[display("No start and end times matching the lesson (at {_0}) was found.")]
// LessonTimeNotFound(ErrorCellPos),
// }
//
// impl Serialize for ParseError {
// fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
// where
// S: Serializer,
// {
// match self {
// ParseError::BadXLS(_) => serializer.serialize_str("BAD_XLS"),
// ParseError::NoWorkSheets => serializer.serialize_str("NO_WORK_SHEETS"),
// ParseError::UnknownWorkSheetRange => {
// serializer.serialize_str("UNKNOWN_WORK_SHEET_RANGE")
// }
// ParseError::LessonBoundaries(_) => serializer.serialize_str("GLOBAL_TIME"),
// ParseError::LessonTimeNotFound(_) => serializer.serialize_str("LESSON_TIME_NOT_FOUND"),
// }
// }
// }
#[async_trait] #[async_trait]
pub trait ScheduleProvider pub trait ScheduleProvider
where where

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "provider-engels-polytechnic" name = "provider-engels-polytechnic"
version = "0.1.0" version = "0.2.3"
edition = "2024" edition = "2024"
[features] [features]
@@ -14,13 +14,11 @@ tokio-util = "0.7.16"
chrono = { version = "0.4.41", features = ["serde"] } chrono = { version = "0.4.41", features = ["serde"] }
serde = { version = "1.0.219", features = ["derive"] } derive_more = { version = "2.0.1", features = ["error", "display", "from"] }
derive_more = { version = "2.0.1", features = ["error", "display"] }
utoipa = { version = "5.4.0", features = ["macros", "chrono"] } utoipa = { version = "5.4.0", features = ["macros", "chrono"] }
calamine = "0.30.0" calamine = "0.31"
async-trait = "0.1.89" async-trait = "0.1.89"
reqwest = "0.12.23" reqwest = "0.12.23"
@@ -28,5 +26,6 @@ ua_generator = "0.5.22"
regex = "1.11.2" regex = "1.11.2"
strsim = "0.11.1" strsim = "0.11.1"
log = "0.4.27" log = "0.4.27"
sentry = "0.42.0" sentry = "0.43.0"
fancy-regex = "0.16.2"

View File

@@ -1,4 +1,4 @@
use crate::updater::Updater; pub use crate::updater::{UpdateSource, Updater};
use async_trait::async_trait; use async_trait::async_trait;
use base::{ScheduleProvider, ScheduleSnapshot}; use base::{ScheduleProvider, ScheduleSnapshot};
use std::ops::DerefMut; use std::ops::DerefMut;
@@ -8,8 +8,6 @@ use tokio::sync::RwLock;
use tokio::time::interval; use tokio::time::interval;
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
pub use crate::updater::UpdateSource;
mod parser; mod parser;
mod updater; mod updater;
mod xls_downloader; mod xls_downloader;
@@ -27,7 +25,7 @@ pub struct EngelsPolytechnicProvider {
impl EngelsPolytechnicProvider { impl EngelsPolytechnicProvider {
pub async fn get( pub async fn get(
update_source: UpdateSource, update_source: UpdateSource,
) -> Result<Arc<dyn ScheduleProvider>, crate::updater::error::Error> { ) -> Result<Arc<dyn ScheduleProvider>, crate::updater::Error> {
let (updater, snapshot) = Updater::new(update_source).await?; let (updater, snapshot) = Updater::new(update_source).await?;
Ok(Arc::new(Wrapper { Ok(Arc::new(Wrapper {
@@ -65,9 +63,10 @@ impl ScheduleProvider for Wrapper {
this.snapshot = Arc::new(snapshot); this.snapshot = Arc::new(snapshot);
}, },
Err(updater::Error::EmptyUri) => {},
Err(err) => { Err(err) => {
cancellation_token.cancel(); sentry::capture_error(&err);
return Err(err.into());
} }
} }
} }

View File

@@ -0,0 +1,25 @@
use crate::parser::worksheet::CellPos;
use derive_more::{Display, Error, From};
#[derive(Debug, Display, Error, From)]
pub enum Error {
#[from]
BadXls(calamine::XlsError),
#[display("No work sheets found.")]
NoWorkSheets,
#[display("There is no data on work sheet boundaries.")]
UnknownWorkSheetRange,
#[display("Failed to read lesson start and end of lesson at {_0}.")]
NoLessonBoundaries(CellPos),
#[display("No start and end times matching the lesson (at {_0}) was found.")]
LessonTimeNotFound(CellPos),
#[display("Unknown lesson type `{type}` at {pos}")]
UnknownLessonType { pos: CellPos, r#type: String },
}
pub type Result<T> = core::result::Result<T, Error>;

View File

@@ -1,6 +1,6 @@
pub use self::error::{Error, Result};
use crate::or_continue; use crate::or_continue;
use crate::parser::error::{Error, ErrorCell, ErrorCellPos}; use crate::parser::worksheet::{CellPos, CellRange, WorkSheet};
use crate::parser::worksheet::WorkSheet;
use crate::parser::LessonParseResult::{Lessons, Street}; use crate::parser::LessonParseResult::{Lessons, Street};
use base::LessonType::Break; use base::LessonType::Break;
use base::{ use base::{
@@ -13,82 +13,12 @@ use std::collections::HashMap;
use std::io::Cursor; use std::io::Cursor;
use std::sync::LazyLock; use std::sync::LazyLock;
mod error;
mod macros; mod macros;
mod worksheet; mod worksheet;
pub mod error {
use derive_more::{Display, Error};
use serde::{Serialize, Serializer};
use std::sync::Arc;
use utoipa::ToSchema;
#[derive(Clone, Debug, Display, Error, ToSchema)]
#[display("row {row}, column {column}")]
pub struct ErrorCellPos {
pub row: u32,
pub column: u32,
}
#[derive(Clone, Debug, Display, Error, ToSchema)]
#[display("'{data}' at {pos}")]
pub struct ErrorCell {
pub pos: ErrorCellPos,
pub data: String,
}
impl ErrorCell {
pub fn new(row: u32, column: u32, data: String) -> Self {
Self {
pos: ErrorCellPos { row, column },
data,
}
}
}
#[derive(Clone, Debug, Display, Error, ToSchema)]
pub enum Error {
/// Errors related to reading XLS file.
#[display("{_0:?}: Failed to read XLS file.")]
#[schema(value_type = String)]
BadXLS(Arc<calamine::XlsError>),
/// Not a single sheet was found.
#[display("No work sheets found.")]
NoWorkSheets,
/// There are no data on the boundaries of the sheet.
#[display("There is no data on work sheet boundaries.")]
UnknownWorkSheetRange,
/// Failed to read the beginning and end of the lesson from the cell
#[display("Failed to read lesson start and end from {_0}.")]
LessonBoundaries(ErrorCell),
/// Not found the beginning and the end corresponding to the lesson.
#[display("No start and end times matching the lesson (at {_0}) was found.")]
LessonTimeNotFound(ErrorCellPos),
}
impl Serialize for Error {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
match self {
Error::BadXLS(_) => serializer.serialize_str("BAD_XLS"),
Error::NoWorkSheets => serializer.serialize_str("NO_WORK_SHEETS"),
Error::UnknownWorkSheetRange => {
serializer.serialize_str("UNKNOWN_WORK_SHEET_RANGE")
}
Error::LessonBoundaries(_) => serializer.serialize_str("GLOBAL_TIME"),
Error::LessonTimeNotFound(_) => serializer.serialize_str("LESSON_TIME_NOT_FOUND"),
}
}
}
}
/// Data cell storing the group name. /// Data cell storing the group name.
pub struct GroupCellInfo { pub struct GroupMarkup {
/// Column index. /// Column index.
pub column: u32, pub column: u32,
@@ -97,7 +27,7 @@ pub struct GroupCellInfo {
} }
/// Data cell storing the line. /// Data cell storing the line.
pub struct DayCellInfo { pub struct DayMarkup {
/// Line index. /// Line index.
pub row: u32, pub row: u32,
@@ -111,8 +41,13 @@ pub struct DayCellInfo {
pub date: DateTime<Utc>, pub date: DateTime<Utc>,
} }
pub struct WorkSheetMarkup {
days: Box<[DayMarkup]>,
groups: Box<[GroupMarkup]>,
}
/// Data on the time of lessons from the second column of the schedule. /// Data on the time of lessons from the second column of the schedule.
pub struct BoundariesCellInfo { pub struct BoundariesData {
/// Temporary segment of the lesson. /// Temporary segment of the lesson.
pub time_range: LessonBoundaries, pub time_range: LessonBoundaries,
@@ -123,23 +58,26 @@ pub struct BoundariesCellInfo {
pub default_index: Option<u32>, pub default_index: Option<u32>,
/// The frame of the cell. /// The frame of the cell.
pub xls_range: ((u32, u32), (u32, u32)), pub range: CellRange,
} }
/// Obtaining a "skeleton" schedule from the working sheet. /// Obtaining a "skeleton" schedule from the working sheet.
fn parse_skeleton( fn parse_markup(worksheet: &WorkSheet) -> Result<WorkSheetMarkup> {
worksheet: &WorkSheet, struct PartialDayMarkup {
) -> Result<(Vec<DayCellInfo>, Vec<GroupCellInfo>), crate::parser::error::Error> { row: u32,
let mut groups: Vec<GroupCellInfo> = Vec::new(); name: String,
let mut days: Vec<(u32, String, Option<DateTime<Utc>>)> = Vec::new(); date: Option<DateTime<Utc>>,
}
let worksheet_start = worksheet let mut groups: Vec<GroupMarkup> = Vec::new();
.start() let mut days: Vec<PartialDayMarkup> = Vec::new();
.ok_or(error::Error::UnknownWorkSheetRange)?;
let worksheet_end = worksheet.end().ok_or(error::Error::UnknownWorkSheetRange)?;
let mut row = worksheet_start.0; let (start_row, start_col) = worksheet.start().ok_or(Error::UnknownWorkSheetRange)?;
let (end_row, end_col) = worksheet.end().ok_or(Error::UnknownWorkSheetRange)?;
while row < worksheet_end.0 { let mut row = start_row;
while row < end_row {
row += 1; row += 1;
let day_full_name = or_continue!(worksheet.get_string_from_cell(row, 0)); let day_full_name = or_continue!(worksheet.get_string_from_cell(row, 0));
@@ -149,8 +87,8 @@ fn parse_skeleton(
// переход на предыдущую строку // переход на предыдущую строку
row -= 1; row -= 1;
for column in (worksheet_start.1 + 2)..=worksheet_end.1 { for column in (start_col + 2)..=end_col {
groups.push(GroupCellInfo { groups.push(GroupMarkup {
column, column,
name: or_continue!(worksheet.get_string_from_cell(row, column)) name: or_continue!(worksheet.get_string_from_cell(row, column))
.replace(" ", ""), .replace(" ", ""),
@@ -183,37 +121,44 @@ fn parse_skeleton(
(name, date) (name, date)
}; };
days.push((row, day_name, day_date)); days.push(PartialDayMarkup {
row,
name: day_name,
date: day_date,
});
} }
// fix unparsable day dates // fix unparsable day dates
let days_max = days.len().min(5); let days_max = days.len().min(5);
for i in 0..days_max { for i in 0..days_max {
if days[i].2.is_none() && days[i + 1].2.is_some() { if days[i].date.is_none() && days[i + 1].date.is_some() {
days[i].2 = Some(days[i + 1].2.unwrap() - Duration::days(1)); days[i].date = Some(days[i + 1].date.unwrap() - Duration::days(1));
} }
} }
for i in 0..days_max { for i in 0..days_max {
let i = days_max - i; let i = days_max - i;
if days[i - 1].2.is_none() && days[i].2.is_some() { if days[i - 1].date.is_none() && days[i].date.is_some() {
days[i - 1].2 = Some(days[i].2.unwrap() - Duration::days(1)); days[i - 1].date = Some(days[i].date.unwrap() - Duration::days(1));
} }
} }
let days = days let days = days
.into_iter() .into_iter()
.map(|day| DayCellInfo { .map(|day| DayMarkup {
row: day.0, row: day.row,
column: 0, column: 0,
name: day.1, name: day.name,
date: day.2.unwrap(), date: day.date.unwrap(),
}) })
.collect(); .collect();
Ok((days, groups)) Ok(WorkSheetMarkup {
days,
groups: groups.into_boxed_slice(),
})
} }
/// The result of obtaining a lesson from the cell. /// The result of obtaining a lesson from the cell.
@@ -233,6 +178,7 @@ enum LessonParseResult {
fn guess_lesson_type(text: &str) -> Option<LessonType> { fn guess_lesson_type(text: &str) -> Option<LessonType> {
static MAP: LazyLock<HashMap<&str, LessonType>> = LazyLock::new(|| { static MAP: LazyLock<HashMap<&str, LessonType>> = LazyLock::new(|| {
HashMap::from([ HashMap::from([
("о важном", LessonType::Additional),
("консультация", LessonType::Consultation), ("консультация", LessonType::Consultation),
("самостоятельная работа", LessonType::IndependentWork), ("самостоятельная работа", LessonType::IndependentWork),
("зачет", LessonType::Exam), ("зачет", LessonType::Exam),
@@ -240,6 +186,7 @@ fn guess_lesson_type(text: &str) -> Option<LessonType> {
("экзамен", LessonType::ExamDefault), ("экзамен", LessonType::ExamDefault),
("курсовой проект", LessonType::CourseProject), ("курсовой проект", LessonType::CourseProject),
("защита курсового проекта", LessonType::CourseProjectDefense), ("защита курсового проекта", LessonType::CourseProjectDefense),
("практическое занятие", LessonType::Practice),
]) ])
}); });
@@ -256,11 +203,11 @@ fn guess_lesson_type(text: &str) -> Option<LessonType> {
fn parse_lesson( fn parse_lesson(
worksheet: &WorkSheet, worksheet: &WorkSheet,
day: &Day, day: &Day,
day_boundaries: &[BoundariesCellInfo], day_boundaries: &[BoundariesData],
lesson_boundaries: &BoundariesCellInfo, lesson_boundaries: &BoundariesData,
group_column: u32, group_column: u32,
) -> Result<LessonParseResult, crate::parser::error::Error> { ) -> Result<LessonParseResult> {
let row = lesson_boundaries.xls_range.0.0; let row = lesson_boundaries.range.start.row;
let name = { let name = {
let cell_data = match worksheet.get_string_from_cell(row, group_column) { let cell_data = match worksheet.get_string_from_cell(row, group_column) {
@@ -269,7 +216,7 @@ fn parse_lesson(
}; };
static OTHER_STREET_RE: LazyLock<Regex> = static OTHER_STREET_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^[А-Я][а-я]+[,\s]\d+$").unwrap()); LazyLock::new(|| Regex::new(r"^[А-Я][а-я]+[,\s]+д\.\s\d+$").unwrap());
if OTHER_STREET_RE.is_match(&cell_data) { if OTHER_STREET_RE.is_match(&cell_data) {
return Ok(Street(cell_data)); return Ok(Street(cell_data));
@@ -278,20 +225,22 @@ fn parse_lesson(
cell_data cell_data
}; };
let cell_range = worksheet.get_merge_from_start(row, group_column); let lesson_cell_range = worksheet.get_merge_from_start(row, group_column);
let (default_range, lesson_time) = { let (default_range, lesson_time) = {
let end_time_arr = day_boundaries let end_time_arr = day_boundaries
.iter() .iter()
.filter(|time| time.xls_range.1.0 == cell_range.1.0) .filter(
.collect::<Vec<&BoundariesCellInfo>>(); |BoundariesData {
range: CellRange { end, .. },
..
}| { lesson_cell_range.end.row <= end.row },
)
.collect::<Vec<&BoundariesData>>();
let end_time = end_time_arr let end_time = end_time_arr
.first() .first()
.ok_or(error::Error::LessonTimeNotFound(ErrorCellPos { .ok_or(Error::LessonTimeNotFound(CellPos::new(row, group_column)))?;
row,
column: group_column,
}))?;
let range: Option<[u8; 2]> = if lesson_boundaries.default_index.is_some() { let range: Option<[u8; 2]> = if lesson_boundaries.default_index.is_some() {
let default = lesson_boundaries.default_index.unwrap() as u8; let default = lesson_boundaries.default_index.unwrap() as u8;
@@ -305,19 +254,19 @@ fn parse_lesson(
end: end_time.time_range.end, end: end_time.time_range.end,
}; };
Ok((range, time)) (range, time)
}?; };
let ParsedLessonName { let ParsedLessonName {
name, name,
mut subgroups, mut subgroups,
r#type: lesson_type, r#type: lesson_type,
} = parse_name_and_subgroups(&name)?; } = parse_name_and_subgroups(&name, row, group_column)?;
{ {
let cabinets: Vec<String> = parse_cabinets( let cabinets: Vec<String> = parse_cabinets(
worksheet, worksheet,
(cell_range.0.0, cell_range.1.0), (lesson_cell_range.start.row, lesson_cell_range.end.row),
group_column + 1, group_column + 1,
); );
@@ -419,118 +368,136 @@ struct ParsedLessonName {
//noinspection GrazieInspection //noinspection GrazieInspection
/// Getting the "pure" name of the lesson and list of teachers from the text of the lesson cell. /// Getting the "pure" name of the lesson and list of teachers from the text of the lesson cell.
fn parse_name_and_subgroups(text: &str) -> Result<ParsedLessonName, Error> { fn parse_name_and_subgroups(text: &str, row: u32, column: u32) -> Result<ParsedLessonName> {
// Части названия пары: // Части названия пары:
// 1. Само название. // 1. Само название.
// 2. Список преподавателей и подгрупп. // 2. Список преподавателей и подгрупп.
// 3. "Модификатор" (чаще всего). // 3. "Модификатор" (чаще всего).
// //
// Регулярное выражение для получения ФИО преподавателей и номеров подгрупп (aka. второй части). // Регулярное выражение для получения ФИО преподавателей и номеров подгрупп (aka. второй части).
// (?:[А-Я][а-я]+\s?(?:[А-Я][\s.]*){2}(?:\(\d\s?[а-я]+\))?(?:, )?)+[\s.]* static NAME_RE: LazyLock<fancy_regex::Regex> = LazyLock::new(|| {
// fancy_regex::Regex::new(
// Подробнее: r"([А-Я][а-я]+(?:[\s.]*[А-Я]){1,2})(?=[^А-Яа-я])[.\s]*(?:\(?(\d)[\sа-я]*\)?)?",
// (?:
// [А-Я][а-я]+ - Фамилия.
// \s? - Кто знает, будет ли там пробел.
// (?:[А-Я][\s.]*){2} - Имя и отчество с учётом случайных пробелов и точек.
// (?:
// \( - Открытие подгруппы.
// \s? - Кто знает, будет ли там пробел.
// \d - Номер подгруппы.
// \s? - Кто знает, будет ли там пробел.
// [а-я\s]+ - Слово "подгруппа" с учётов ошибок.
// \) - Закрытие подгруппы.
// )? - Явное указание подгруппы может отсутствовать по понятным причинам.
// (?:, )? - Разделители между отдельными частями.
// )+
// [\s.]* - Забираем с собой всякий мусор, что бы не передать его в третью часть.
static NAMES_REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"(?:[А-Я][а-я]+\s?(?:[А-Я][\s.]*){2}(?:\(\s*\d\s*[а-я\s]+\))?(?:[\s,]+)?){1,2}+[\s.,]*",
) )
.unwrap() .unwrap()
}); });
// Отчистка let text = text
static CLEAN_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[\s\n\t]+").unwrap()); .chars()
.filter(|c: &char| {
c.is_whitespace()
|| c.is_ascii_digit()
|| (*c >= 'а' && *c <= 'я')
|| (*c >= 'А' && *c <= 'Я')
|| *c == '.'
|| *c == '-'
})
.collect::<String>()
.replace(r"\s+", " ");
let text = CLEAN_RE let mut lesson_name: Option<&str> = None;
.replace(&text.replace([' ', '\t', '\n'], " "), " ") let mut extra: Option<&str> = None;
.to_string();
let (lesson_name, subgroups, lesson_type) = match NAMES_REGEX.captures(&text) { let mut shared_subgroup = true;
Some(captures) => {
let capture = captures.get(0).unwrap();
let subgroups: Vec<Option<LessonSubGroup>> = {
let src = capture.as_str().replace([' ', '.'], "");
let mut shared_subgroup = false;
let mut subgroups: [Option<LessonSubGroup>; 2] = [None, None]; let mut subgroups: [Option<LessonSubGroup>; 2] = [None, None];
for name in src.split(',') { for capture in NAME_RE.captures_iter(&text).take(2) {
let open_bracket_index = name.find('('); let capture = capture.unwrap();
let number: u8 = open_bracket_index if lesson_name.is_none() {
.map_or(0, |index| name[(index + 1)..(index + 2)].parse().unwrap()); lesson_name = Some(&text[..capture.get(0).unwrap().start()]);
}
extra = Some(&text[capture.get(0).unwrap().end()..]);
let teacher_name = { let teacher_name = {
let name_end = open_bracket_index.unwrap_or(name.len()); let clean = capture
.get(1)
.unwrap()
.as_str()
.chars()
.filter(|c| c.is_alphabetic())
.collect::<Vec<char>>();
// Я ебал. Как же я долго до этого доходил. if clean.get(clean.len() - 2).is_some_and(|c| c.is_uppercase()) {
let (name, remaining) = clean.split_at(clean.len() - 2);
format!( format!(
"{} {}.{}.", "{} {}.{}.",
name.get(..name_end - 4).unwrap(), name.iter().collect::<String>(),
name.get(name_end - 4..name_end - 2).unwrap(), remaining[0],
name.get(name_end - 2..name_end).unwrap(), remaining[1]
) )
} else {
let (remaining, name) = clean.split_last().unwrap();
format!("{} {}.", name.iter().collect::<String>(), remaining)
}
}; };
let lesson = Some(LessonSubGroup { let subgroup_index = capture.get(2).map(|m| m.as_str().parse::<u32>().unwrap());
let subgroup = Some(LessonSubGroup {
cabinet: None, cabinet: None,
teacher: Some(teacher_name), teacher: Some(teacher_name),
}); });
match number { match subgroup_index {
0 => { None => {
subgroups[0] = lesson; // we have only 2 matches max so more than 2 subgroups we cant have 100%
subgroups[1] = None; *subgroups.iter_mut().find(|x| x.is_none()).unwrap() = subgroup;
shared_subgroup = true;
break;
} }
num => { Some(num) => {
// bc we have indexed subgroup
shared_subgroup = false;
// 1 - 1 = 0 | 2 - 1 = 1 | 3 - 1 = 2 (schedule index to array index) // 1 - 1 = 0 | 2 - 1 = 1 | 3 - 1 = 2 (schedule index to array index)
// 0 % 2 = 0 | 1 % 2 = 1 | 2 % 2 = 0 (clamp) // 0 % 2 = 0 | 1 % 2 = 1 | 2 % 2 = 0 (clamp)
let normalised = (num - 1) % 2; let subgroup_index = ((num - 1) % 2) as usize;
subgroups[normalised as usize] = lesson; // if we have subgroup in that index (probably non-indexed, we change it index to free)
if subgroups[subgroup_index].is_some() {
subgroups.swap(0, 1);
}
subgroups[subgroup_index] = subgroup;
} }
} }
} }
if shared_subgroup { let subgroups = if lesson_name.is_none() {
Vec::from([subgroups[0].take()]) Vec::new()
} else if shared_subgroup {
Vec::from([subgroups.into_iter().next().unwrap()])
} else { } else {
Vec::from(subgroups) Vec::from(subgroups)
}
}; };
let name = text[..capture.start()].trim().to_string(); if extra.is_none() {
let extra = text[capture.end()..].trim().to_string(); extra = text
.rfind(" ")
.and_then(|i| text[..i].rfind(" "))
.map(|i| &text[i + 1..]);
}
let lesson_type = if extra.len() > 4 { let lesson_type = if let Some(extra) = extra
let result = guess_lesson_type(&extra); && extra.len() > 4
{
let result = guess_lesson_type(extra);
if result.is_none() { if result.is_none() {
#[cfg(not(debug_assertions))] #[cfg(not(debug_assertions))]
sentry::capture_message( sentry::capture_error(&Error::UnknownLessonType {
&*format!("Не удалось угадать тип пары '{}'!", extra), r#type: extra.to_string(),
sentry::Level::Warning, pos: CellPos::new(row, column),
); });
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
log::warn!("Не удалось угадать тип пары '{}'!", extra); log::warn!(
"{}",
Error::UnknownLessonType {
r#type: extra.to_string(),
pos: CellPos::new(row, column),
}
);
} }
result result
@@ -538,13 +505,8 @@ fn parse_name_and_subgroups(text: &str) -> Result<ParsedLessonName, Error> {
None None
}; };
(name, subgroups, lesson_type)
}
None => (text, Vec::new(), None),
};
Ok(ParsedLessonName { Ok(ParsedLessonName {
name: lesson_name, name: lesson_name.unwrap_or(&text).to_string(),
subgroups, subgroups,
r#type: lesson_type, r#type: lesson_type,
}) })
@@ -592,8 +554,8 @@ fn parse_day_boundaries(
date: DateTime<Utc>, date: DateTime<Utc>,
row_range: (u32, u32), row_range: (u32, u32),
column: u32, column: u32,
) -> Result<Vec<BoundariesCellInfo>, crate::parser::error::Error> { ) -> Result<Vec<BoundariesData>> {
let mut day_times: Vec<BoundariesCellInfo> = Vec::new(); let mut day_times: Vec<BoundariesData> = Vec::new();
for row in row_range.0..row_range.1 { for row in row_range.0..row_range.1 {
let time_cell = if let Some(str) = worksheet.get_string_from_cell(row, column) { let time_cell = if let Some(str) = worksheet.get_string_from_cell(row, column) {
@@ -602,9 +564,8 @@ fn parse_day_boundaries(
continue; continue;
}; };
let lesson_time = parse_lesson_boundaries_cell(&time_cell, date).ok_or( let lesson_time = parse_lesson_boundaries_cell(&time_cell, date)
error::Error::LessonBoundaries(ErrorCell::new(row, column, time_cell.clone())), .ok_or(Error::NoLessonBoundaries(CellPos::new(row, column)))?;
)?;
// type // type
let lesson_type = if time_cell.contains("пара") { let lesson_type = if time_cell.contains("пара") {
@@ -628,11 +589,11 @@ fn parse_day_boundaries(
None None
}; };
day_times.push(BoundariesCellInfo { day_times.push(BoundariesData {
time_range: lesson_time, time_range: lesson_time,
lesson_type, lesson_type,
default_index, default_index,
xls_range: worksheet.get_merge_from_start(row, column), range: worksheet.get_merge_from_start(row, column),
}); });
} }
@@ -647,9 +608,9 @@ fn parse_day_boundaries(
/// * `week_markup`: markup of the current week. /// * `week_markup`: markup of the current week.
fn parse_week_boundaries( fn parse_week_boundaries(
worksheet: &WorkSheet, worksheet: &WorkSheet,
week_markup: &[DayCellInfo], week_markup: &[DayMarkup],
) -> Result<Vec<Vec<BoundariesCellInfo>>, crate::parser::error::Error> { ) -> Result<Vec<Vec<BoundariesData>>> {
let mut result: Vec<Vec<BoundariesCellInfo>> = Vec::new(); let mut result: Vec<Vec<BoundariesData>> = Vec::new();
let worksheet_end_row = worksheet.end().unwrap().0; let worksheet_end_row = worksheet.end().unwrap().0;
let lesson_time_column = week_markup[0].column + 1; let lesson_time_column = week_markup[0].column + 1;
@@ -768,22 +729,21 @@ fn convert_groups_to_teachers(
/// ///
/// * `buffer`: XLS data containing schedule. /// * `buffer`: XLS data containing schedule.
/// ///
/// returns: Result<ParseResult, crate::parser::error::Error> /// returns: Result<ParseResult, Error>
pub fn parse_xls(buffer: &Vec<u8>) -> Result<ParsedSchedule, crate::parser::error::Error> { pub fn parse_xls(buffer: &Vec<u8>) -> Result<ParsedSchedule> {
let cursor = Cursor::new(&buffer); let cursor = Cursor::new(&buffer);
let mut workbook: Xls<_> = let mut workbook: Xls<_> = open_workbook_from_rs(cursor)?;
open_workbook_from_rs(cursor).map_err(|e| error::Error::BadXLS(std::sync::Arc::new(e)))?;
let worksheet = { let worksheet = {
let (worksheet_name, worksheet) = workbook let (worksheet_name, worksheet) = workbook
.worksheets() .worksheets()
.first() .first()
.ok_or(error::Error::NoWorkSheets)? .ok_or(Error::NoWorkSheets)?
.clone(); .clone();
let worksheet_merges = workbook let worksheet_merges = workbook
.worksheet_merge_cells(&worksheet_name) .worksheet_merge_cells(&worksheet_name)
.ok_or(error::Error::NoWorkSheets)?; .ok_or(Error::NoWorkSheets)?;
WorkSheet { WorkSheet {
data: worksheet, data: worksheet,
@@ -791,7 +751,11 @@ pub fn parse_xls(buffer: &Vec<u8>) -> Result<ParsedSchedule, crate::parser::erro
} }
}; };
let (week_markup, groups_markup) = parse_skeleton(&worksheet)?; let WorkSheetMarkup {
days: week_markup,
groups: groups_markup,
} = parse_markup(&worksheet)?;
let week_boundaries = parse_week_boundaries(&worksheet, &week_markup)?; let week_boundaries = parse_week_boundaries(&worksheet, &week_markup)?;
let mut groups: HashMap<String, ScheduleEntry> = HashMap::new(); let mut groups: HashMap<String, ScheduleEntry> = HashMap::new();
@@ -844,7 +808,7 @@ pub mod test_utils {
use super::*; use super::*;
use base::ParsedSchedule; use base::ParsedSchedule;
pub fn test_result() -> Result<ParsedSchedule, crate::parser::error::Error> { pub fn test_result() -> Result<ParsedSchedule> {
parse_xls(&include_bytes!("../../../../test-data/engels-polytechnic.xls").to_vec()) parse_xls(&include_bytes!("../../../../test-data/engels-polytechnic.xls").to_vec())
} }
} }

View File

@@ -1,4 +1,5 @@
use regex::Regex; use regex::Regex;
use std::fmt::{Display, Formatter};
use std::ops::Deref; use std::ops::Deref;
use std::sync::LazyLock; use std::sync::LazyLock;
@@ -8,6 +9,40 @@ pub struct WorkSheet {
pub merges: Vec<calamine::Dimensions>, pub merges: Vec<calamine::Dimensions>,
} }
#[derive(Clone, Debug, derive_more::Error)]
pub struct CellPos {
pub row: u32,
pub column: u32,
}
fn format_column_index(index: u32) -> String {
// https://stackoverflow.com/a/297214
let quotient = index / 26;
let char = char::from((65 + (index % 26)) as u8);
if quotient > 0 {
return format!("{}{}", format_column_index(quotient - 1), char);
}
return char.to_string();
}
impl Display for CellPos {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_fmt(format_args!(
"column {}, row {}",
format_column_index(self.column),
self.row + 1,
))
}
}
pub struct CellRange {
pub start: CellPos,
pub end: CellPos,
}
impl Deref for WorkSheet { impl Deref for WorkSheet {
type Target = calamine::Range<calamine::Data>; type Target = calamine::Range<calamine::Data>;
@@ -45,14 +80,26 @@ impl WorkSheet {
} }
/// Obtaining the boundaries of the cell along its upper left coordinate. /// Obtaining the boundaries of the cell along its upper left coordinate.
pub fn get_merge_from_start(&self, row: u32, column: u32) -> ((u32, u32), (u32, u32)) { pub fn get_merge_from_start(&self, row: u32, column: u32) -> CellRange {
match self match self
.merges .merges
.iter() .iter()
.find(|merge| merge.start.0 == row && merge.start.1 == column) .find(|merge| merge.start.0 == row && merge.start.1 == column)
{ {
Some(merge) => (merge.start, (merge.end.0 + 1, merge.end.1 + 1)), Some(merge) => CellRange {
None => ((row, column), (row + 1, column + 1)), start: CellPos::new(merge.start.0, merge.start.1),
end: CellPos::new(merge.end.0 + 1, merge.end.1 + 1),
},
None => CellRange {
start: CellPos::new(row, column),
end: CellPos::new(row + 1, column + 1),
},
} }
} }
} }
impl CellPos {
pub fn new(row: u32, column: u32) -> Self {
Self { row, column }
}
}

View File

@@ -0,0 +1,33 @@
use crate::xls_downloader::FetchError;
use derive_more::{Display, Error, From};
#[derive(Debug, Display, Error, From)]
pub enum Error {
/// Occurs when the request to the Yandex Cloud API fails.
///
/// This may be due to network issues, invalid API key, incorrect function ID, or other
/// problems with the Yandex Cloud Function invocation.
#[display("An error occurred during the request to the Yandex Cloud API: {_0}")]
Reqwest(reqwest::Error),
#[display("Unable to get URI in 3 retries")]
EmptyUri,
/// The ETag is the same (no update needed).
#[display("The ETag is the same.")]
SameETag,
/// The URL query for the XLS file failed to execute, either due to network issues or invalid API parameters.
#[display("Failed to fetch URL: {_0}")]
ScheduleFetchFailed(FetchError),
/// Downloading the XLS file content failed after successfully obtaining the URL.
#[display("Download failed: {_0}")]
ScheduleDownloadFailed(FetchError),
/// The XLS file could not be parsed into a valid schedule format.
#[from]
InvalidSchedule(crate::parser::Error),
}
pub type Result<T> = core::result::Result<T, Error>;

View File

@@ -1,7 +1,8 @@
pub use self::error::{Error, Result};
use crate::parser::parse_xls; use crate::parser::parse_xls;
use crate::updater::error::{Error, QueryUrlError, SnapshotCreationError};
use crate::xls_downloader::{FetchError, XlsDownloader}; use crate::xls_downloader::{FetchError, XlsDownloader};
use base::ScheduleSnapshot; use base::ScheduleSnapshot;
mod error;
pub enum UpdateSource { pub enum UpdateSource {
Prepared(ScheduleSnapshot), Prepared(ScheduleSnapshot),
@@ -19,56 +20,6 @@ pub struct Updater {
update_source: UpdateSource, update_source: UpdateSource,
} }
pub mod error {
use crate::xls_downloader::FetchError;
use derive_more::{Display, Error};
#[derive(Debug, Display, Error)]
pub enum Error {
/// An error occurred while querying the Yandex Cloud API for a URL.
///
/// This may result from network failures, invalid API credentials, or issues with the Yandex Cloud Function invocation.
/// See [`QueryUrlError`] for more details about specific causes.
QueryUrlFailed(QueryUrlError),
/// The schedule snapshot creation process failed.
///
/// This can happen due to URL conflicts (same URL already in use), failed network requests,
/// download errors, or invalid XLS file content. See [`SnapshotCreationError`] for details.
SnapshotCreationFailed(SnapshotCreationError),
}
/// Errors that may occur when querying the Yandex Cloud API to retrieve a URL.
#[derive(Debug, Display, Error)]
pub enum QueryUrlError {
/// Occurs when the request to the Yandex Cloud API fails.
///
/// This may be due to network issues, invalid API key, incorrect function ID, or other
/// problems with the Yandex Cloud Function invocation.
#[display("An error occurred during the request to the Yandex Cloud API: {_0}")]
RequestFailed(reqwest::Error),
}
/// Errors that may occur during the creation of a schedule snapshot.
#[derive(Debug, Display, Error)]
pub enum SnapshotCreationError {
/// The URL is the same as the one already being used (no update needed).
#[display("The URL is the same as the one already being used.")]
SameUrl,
/// The URL query for the XLS file failed to execute, either due to network issues or invalid API parameters.
#[display("Failed to fetch URL: {_0}")]
FetchFailed(FetchError),
/// Downloading the XLS file content failed after successfully obtaining the URL.
#[display("Download failed: {_0}")]
DownloadFailed(FetchError),
/// The XLS file could not be parsed into a valid schedule format.
#[display("Schedule data is invalid: {_0}")]
InvalidSchedule(crate::parser::error::Error),
}
}
impl Updater { impl Updater {
/// Constructs a new `ScheduleSnapshot` by downloading and parsing schedule data from the specified URL. /// Constructs a new `ScheduleSnapshot` by downloading and parsing schedule data from the specified URL.
/// ///
@@ -82,40 +33,33 @@ impl Updater {
/// * `url`: The source URL pointing to the XLS file containing schedule data. /// * `url`: The source URL pointing to the XLS file containing schedule data.
/// ///
/// returns: Result<ScheduleSnapshot, SnapshotCreationError> /// returns: Result<ScheduleSnapshot, SnapshotCreationError>
pub async fn new_snapshot( async fn new_snapshot(downloader: &mut XlsDownloader, url: String) -> Result<ScheduleSnapshot> {
downloader: &mut XlsDownloader,
url: String,
) -> Result<ScheduleSnapshot, SnapshotCreationError> {
if downloader.url.as_ref().is_some_and(|_url| _url.eq(&url)) {
return Err(SnapshotCreationError::SameUrl);
}
let head_result = downloader.set_url(&url).await.map_err(|error| { let head_result = downloader.set_url(&url).await.map_err(|error| {
if let FetchError::Unknown(error) = &error { if let FetchError::Reqwest(error) = &error {
sentry::capture_error(&error); sentry::capture_error(&error);
} }
SnapshotCreationError::FetchFailed(error) Error::ScheduleFetchFailed(error)
})?; })?;
if downloader.etag == Some(head_result.etag) {
return Err(Error::SameETag);
}
let xls_data = downloader let xls_data = downloader
.fetch(false) .fetch(false)
.await .await
.map_err(|error| { .map_err(|error| {
if let FetchError::Unknown(error) = &error { if let FetchError::Reqwest(error) = &error {
sentry::capture_error(&error); sentry::capture_error(&error);
} }
SnapshotCreationError::DownloadFailed(error) Error::ScheduleDownloadFailed(error)
})? })?
.data .data
.unwrap(); .unwrap();
let parse_result = parse_xls(&xls_data).map_err(|error| { let parse_result = parse_xls(&xls_data)?;
sentry::capture_error(&error);
SnapshotCreationError::InvalidSchedule(error)
})?;
Ok(ScheduleSnapshot { Ok(ScheduleSnapshot {
fetched_at: head_result.requested_at, fetched_at: head_result.requested_at,
@@ -141,10 +85,24 @@ impl Updater {
/// Result containing: /// Result containing:
/// - `Ok(String)` - Complete URL constructed from the Function's response /// - `Ok(String)` - Complete URL constructed from the Function's response
/// - `Err(QueryUrlError)` - If the request or response processing fails /// - `Err(QueryUrlError)` - If the request or response processing fails
async fn query_url(api_key: &str, func_id: &str) -> Result<String, QueryUrlError> { async fn query_url(api_key: &str, func_id: &str) -> Result<String> {
let client = reqwest::Client::new(); let client = reqwest::Client::new();
let uri = client let uri = {
// вот бы добавили named-scopes как в котлине,
// чтоб мне не пришлось такой хуйнёй страдать.
#[allow(unused_assignments)]
let mut uri = String::new();
let mut counter = 0;
loop {
if counter == 3 {
return Err(Error::EmptyUri);
}
counter += 1;
uri = client
.post(format!( .post(format!(
"https://functions.yandexcloud.net/{}?integration=raw", "https://functions.yandexcloud.net/{}?integration=raw",
func_id func_id
@@ -152,10 +110,21 @@ impl Updater {
.header("Authorization", format!("Api-Key {}", api_key)) .header("Authorization", format!("Api-Key {}", api_key))
.send() .send()
.await .await
.map_err(QueryUrlError::RequestFailed)? .map_err(Error::Reqwest)?
.text() .text()
.await .await
.map_err(QueryUrlError::RequestFailed)?; .map_err(Error::Reqwest)?;
if uri.is_empty() {
log::warn!("[{}] Unable to get uri! Retrying in 5 seconds...", counter);
continue;
}
break;
}
uri
};
Ok(format!("https://politehnikum-eng.ru{}", uri.trim())) Ok(format!("https://politehnikum-eng.ru{}", uri.trim()))
} }
@@ -173,7 +142,7 @@ impl Updater {
/// Returns `Ok(())` if the snapshot was successfully initialized, or an `Error` if: /// Returns `Ok(())` if the snapshot was successfully initialized, or an `Error` if:
/// - URL query to Yandex Cloud failed ([`QueryUrlError`]) /// - URL query to Yandex Cloud failed ([`QueryUrlError`])
/// - Schedule snapshot creation failed ([`SnapshotCreationError`]) /// - Schedule snapshot creation failed ([`SnapshotCreationError`])
pub async fn new(update_source: UpdateSource) -> Result<(Self, ScheduleSnapshot), Error> { pub async fn new(update_source: UpdateSource) -> Result<(Self, ScheduleSnapshot)> {
let mut this = Updater { let mut this = Updater {
downloader: XlsDownloader::new(), downloader: XlsDownloader::new(),
update_source, update_source,
@@ -194,19 +163,14 @@ impl Updater {
yandex_func_id, yandex_func_id,
} => { } => {
log::info!("Obtaining a link using FaaS..."); log::info!("Obtaining a link using FaaS...");
Self::query_url(yandex_api_key, yandex_func_id) Self::query_url(yandex_api_key, yandex_func_id).await?
.await
.map_err(Error::QueryUrlFailed)?
} }
_ => unreachable!(), _ => unreachable!(),
}; };
log::info!("For the initial setup, a link {} will be used", url); log::info!("For the initial setup, a link {} will be used", url);
let snapshot = Self::new_snapshot(&mut this.downloader, url) let snapshot = Self::new_snapshot(&mut this.downloader, url).await?;
.await
.map_err(Error::SnapshotCreationFailed)?;
log::info!("Schedule snapshot successfully created!"); log::info!("Schedule snapshot successfully created!");
Ok((this, snapshot)) Ok((this, snapshot))
@@ -229,7 +193,7 @@ impl Updater {
pub async fn update( pub async fn update(
&mut self, &mut self,
current_snapshot: &ScheduleSnapshot, current_snapshot: &ScheduleSnapshot,
) -> Result<ScheduleSnapshot, Error> { ) -> Result<ScheduleSnapshot> {
if let UpdateSource::Prepared(snapshot) = &self.update_source { if let UpdateSource::Prepared(snapshot) = &self.update_source {
let mut snapshot = snapshot.clone(); let mut snapshot = snapshot.clone();
snapshot.update(); snapshot.update();
@@ -241,21 +205,19 @@ impl Updater {
UpdateSource::GrabFromSite { UpdateSource::GrabFromSite {
yandex_api_key, yandex_api_key,
yandex_func_id, yandex_func_id,
} => Self::query_url(yandex_api_key.as_str(), yandex_func_id.as_str()) } => Self::query_url(yandex_api_key.as_str(), yandex_func_id.as_str()).await?,
.await
.map_err(Error::QueryUrlFailed)?,
_ => unreachable!(), _ => unreachable!(),
}; };
let snapshot = match Self::new_snapshot(&mut self.downloader, url).await { let snapshot = match Self::new_snapshot(&mut self.downloader, url).await {
Ok(snapshot) => snapshot, Ok(snapshot) => snapshot,
Err(SnapshotCreationError::SameUrl) => { Err(Error::SameETag) => {
let mut clone = current_snapshot.clone(); let mut clone = current_snapshot.clone();
clone.update(); clone.update();
clone clone
} }
Err(error) => return Err(Error::SnapshotCreationFailed(error)), Err(error) => return Err(error),
}; };
Ok(snapshot) Ok(snapshot)

View File

@@ -14,7 +14,7 @@ pub enum FetchError {
/// Unknown error. /// Unknown error.
#[display("An unknown error occurred while downloading the file.")] #[display("An unknown error occurred while downloading the file.")]
#[schema(value_type = String)] #[schema(value_type = String)]
Unknown(Arc<reqwest::Error>), Reqwest(Arc<reqwest::Error>),
/// Server returned a status code different from 200. /// Server returned a status code different from 200.
#[display("Server returned a status code {status_code}.")] #[display("Server returned a status code {status_code}.")]
@@ -31,7 +31,7 @@ pub enum FetchError {
impl FetchError { impl FetchError {
pub fn unknown(error: Arc<reqwest::Error>) -> Self { pub fn unknown(error: Arc<reqwest::Error>) -> Self {
Self::Unknown(error) Self::Reqwest(error)
} }
pub fn bad_status_code(status_code: u16) -> Self { pub fn bad_status_code(status_code: u16) -> Self {
@@ -66,25 +66,30 @@ pub struct FetchOk {
/// Date data received. /// Date data received.
pub requested_at: DateTime<Utc>, pub requested_at: DateTime<Utc>,
/// Etag.
pub etag: String,
/// File data. /// File data.
pub data: Option<Vec<u8>>, pub data: Option<Vec<u8>>,
} }
impl FetchOk { impl FetchOk {
/// Result without file content. /// Result without file content.
pub fn head(uploaded_at: DateTime<Utc>) -> Self { pub fn head(uploaded_at: DateTime<Utc>, etag: String) -> Self {
FetchOk { FetchOk {
uploaded_at, uploaded_at,
requested_at: Utc::now(), requested_at: Utc::now(),
etag,
data: None, data: None,
} }
} }
/// Full result. /// Full result.
pub fn get(uploaded_at: DateTime<Utc>, data: Vec<u8>) -> Self { pub fn get(uploaded_at: DateTime<Utc>, etag: String, data: Vec<u8>) -> Self {
FetchOk { FetchOk {
uploaded_at, uploaded_at,
requested_at: Utc::now(), requested_at: Utc::now(),
etag,
data: Some(data), data: Some(data),
} }
} }
@@ -94,11 +99,15 @@ pub type FetchResult = Result<FetchOk, FetchError>;
pub struct XlsDownloader { pub struct XlsDownloader {
pub url: Option<String>, pub url: Option<String>,
pub etag: Option<String>,
} }
impl XlsDownloader { impl XlsDownloader {
pub fn new() -> Self { pub fn new() -> Self {
XlsDownloader { url: None } XlsDownloader {
url: None,
etag: None,
}
} }
async fn fetch_specified(url: &str, head: bool) -> FetchResult { async fn fetch_specified(url: &str, head: bool) -> FetchResult {
@@ -124,9 +133,12 @@ impl XlsDownloader {
.get("Content-Type") .get("Content-Type")
.ok_or(FetchError::bad_headers("Content-Type"))?; .ok_or(FetchError::bad_headers("Content-Type"))?;
if !headers.contains_key("etag") { let etag = headers
return Err(FetchError::bad_headers("etag")); .get("etag")
} .ok_or(FetchError::bad_headers("etag"))?
.to_str()
.or(Err(FetchError::bad_headers("etag")))?
.to_string();
let last_modified = headers let last_modified = headers
.get("last-modified") .get("last-modified")
@@ -141,9 +153,13 @@ impl XlsDownloader {
.with_timezone(&Utc); .with_timezone(&Utc);
Ok(if head { Ok(if head {
FetchOk::head(last_modified) FetchOk::head(last_modified, etag)
} else { } else {
FetchOk::get(last_modified, response.bytes().await.unwrap().to_vec()) FetchOk::get(
last_modified,
etag,
response.bytes().await.unwrap().to_vec(),
)
}) })
} }

View File

@@ -2,16 +2,6 @@ use jsonwebtoken::errors::ErrorKind;
use jsonwebtoken::{Algorithm, DecodingKey, Validation, decode}; use jsonwebtoken::{Algorithm, DecodingKey, Validation, decode};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
#[derive(Deserialize, Serialize)]
struct TokenData {
iis: String,
sub: i32,
app: i32,
exp: i32,
iat: i32,
jti: i32,
}
#[derive(Debug, Serialize, Deserialize)] #[derive(Debug, Serialize, Deserialize)]
struct Claims { struct Claims {
sub: i32, sub: i32,

View File

@@ -185,7 +185,7 @@ mod tests {
id: Set(id.clone()), id: Set(id.clone()),
username: Set(username), username: Set(username),
password: Set(Some( password: Set(Some(
bcrypt::hash("example".to_string(), bcrypt::DEFAULT_COST).unwrap(), bcrypt::hash("example", bcrypt::DEFAULT_COST).unwrap(),
)), )),
vk_id: Set(None), vk_id: Set(None),
telegram_id: Set(None), telegram_id: Set(None),

View File

@@ -49,10 +49,12 @@ pub async fn telegram_auth(
let web_app_user = let web_app_user =
serde_json::from_str::<WebAppUser>(init_data.data_map.get("user").unwrap()).unwrap(); serde_json::from_str::<WebAppUser>(init_data.data_map.get("user").unwrap()).unwrap();
let user = let user = match Query::find_user_by_telegram_id(app_state.get_database(), web_app_user.id)
match Query::find_user_by_telegram_id(app_state.get_database(), web_app_user.id).await { .await
Ok(Some(value)) => Ok(value), .expect("Failed to find user by telegram id")
_ => { {
Some(value) => value,
None => {
let new_user = ActiveUser { let new_user = ActiveUser {
id: Set(ObjectId::new().unwrap().to_string()), id: Set(ObjectId::new().unwrap().to_string()),
username: Set(format!("telegram_{}", web_app_user.id)), // можно оставить, а можно поменять username: Set(format!("telegram_{}", web_app_user.id)), // можно оставить, а можно поменять
@@ -64,10 +66,12 @@ pub async fn telegram_auth(
android_version: Set(None), android_version: Set(None),
}; };
new_user.insert(app_state.get_database()).await new_user
.insert(app_state.get_database())
.await
.expect("Failed to insert user")
} }
} };
.expect("Failed to get or add user");
let access_token = utility::jwt::encode(&user.id); let access_token = utility::jwt::encode(&user.id);
Ok(Response::new(&access_token, user.group.is_some())).into() Ok(Response::new(&access_token, user.group.is_some())).into()

View File

@@ -2,10 +2,12 @@ mod env;
pub use crate::state::env::AppEnv; pub use crate::state::env::AppEnv;
use actix_web::web; use actix_web::web;
use database::sea_orm::{Database, DatabaseConnection}; use database::migration::{Migrator, MigratorTrait};
use database::sea_orm::{ConnectOptions, Database, DatabaseConnection};
use providers::base::{ScheduleProvider, ScheduleSnapshot}; use providers::base::{ScheduleProvider, ScheduleSnapshot};
use std::collections::HashMap; use std::collections::HashMap;
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration;
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
/// Common data provided to endpoints. /// Common data provided to endpoints.
@@ -55,9 +57,24 @@ impl AppState {
database database
} else { } else {
let database_url = std::env::var("DATABASE_URL").expect("DATABASE_URL must be set"); let database_url = std::env::var("DATABASE_URL").expect("DATABASE_URL must be set");
Database::connect(&database_url)
let mut opt = ConnectOptions::new(database_url.clone());
opt.max_connections(4)
.min_connections(2)
.connect_timeout(Duration::from_secs(10))
.idle_timeout(Duration::from_secs(8))
.sqlx_logging(true);
let database = Database::connect(opt)
.await .await
.unwrap_or_else(|_| panic!("Error connecting to {}", database_url)) .unwrap_or_else(|_| panic!("Error connecting to {}", database_url));
Migrator::up(&database, None)
.await
.expect("Failed to run database migrations");
database
}, },
env, env,
providers, providers,

View File

@@ -24,14 +24,13 @@ static ENCODING_KEY: LazyLock<EncodingKey> = LazyLock::new(|| {
}); });
/// Token verification errors. /// Token verification errors.
#[allow(dead_code)]
#[derive(Debug)] #[derive(Debug)]
pub enum Error { pub enum Error {
/// The token has a different signature. /// The token has a different signature.
InvalidSignature, InvalidSignature,
/// Token reading error. /// Token reading error.
InvalidToken(ErrorKind), InvalidToken,
/// Token expired. /// Token expired.
Expired, Expired,
@@ -82,7 +81,7 @@ pub fn verify_and_decode(token: &str) -> Result<String, Error> {
Err(err) => Err(match err.into_kind() { Err(err) => Err(match err.into_kind() {
ErrorKind::InvalidSignature => Error::InvalidSignature, ErrorKind::InvalidSignature => Error::InvalidSignature,
ErrorKind::ExpiredSignature => Error::Expired, ErrorKind::ExpiredSignature => Error::Expired,
kind => Error::InvalidToken(kind), _ => Error::InvalidToken,
}), }),
} }
} }
@@ -115,7 +114,7 @@ mod tests {
fn test_encode() { fn test_encode() {
test_env(); test_env();
assert_eq!(encode(&"test".to_string()).is_empty(), false); assert!(!encode("test").is_empty());
} }
#[test] #[test]
@@ -128,7 +127,7 @@ mod tests {
assert!(result.is_err()); assert!(result.is_err());
assert_eq!( assert_eq!(
result.err().unwrap(), result.err().unwrap(),
Error::InvalidToken(ErrorKind::InvalidToken) Error::InvalidToken
); );
} }