diff --git a/.idea/schedule-parser-rusted.iml b/.idea/schedule-parser-rusted.iml index 9084b1e..23d1b34 100644 --- a/.idea/schedule-parser-rusted.iml +++ b/.idea/schedule-parser-rusted.iml @@ -4,9 +4,10 @@ - + + diff --git a/Cargo.lock b/Cargo.lock index ce5a742..46e4ed1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -696,6 +696,15 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1cb3c4a0d3776f7535c32793be81d6d5fec0d48ac70955d9834e643aa249a52f" +[[package]] +name = "convert_case" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb402b8d4c85569410425650ce3eddc7d698ed96d39a73f941b08fb63082f1e7" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "cookie" version = "0.16.2" @@ -753,25 +762,22 @@ dependencies = [ [[package]] name = "criterion" -version = "0.5.1" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +checksum = "3bf7af66b0989381bd0be551bd7cc91912a655a58c6918420c9527b1fd8b4679" dependencies = [ "anes", "cast", "ciborium", "clap", "criterion-plot", - "is-terminal", - "itertools", + "itertools 0.13.0", "num-traits", - "once_cell", "oorandom", "plotters", "rayon", "regex", "serde", - "serde_derive", "serde_json", "tinytemplate", "walkdir", @@ -784,7 +790,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ "cast", - "itertools", + "itertools 0.10.5", ] [[package]] @@ -918,6 +924,7 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3" dependencies = [ + "convert_case", "proc-macro2", "quote", "syn 2.0.100", @@ -1404,12 +1411,6 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" -[[package]] -name = "hermit-abi" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbd780fe5cc30f81464441920d82ac8740e2e46b29a6fad543ddd075229ce37e" - [[package]] name = "hex" version = "0.4.3" @@ -1848,17 +1849,6 @@ version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" -[[package]] -name = "is-terminal" -version = "0.4.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" -dependencies = [ - "hermit-abi", - "libc", - "windows-sys 0.59.0", -] - [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -1874,6 +1864,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.15" @@ -2396,7 +2395,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools", + "itertools 0.10.5", "proc-macro2", "quote", "syn 2.0.100", @@ -2441,7 +2440,7 @@ dependencies = [ "rustc-hash", "rustls", "socket2", - "thiserror 2.0.12", + "thiserror", "tokio", "tracing", "web-time", @@ -2461,7 +2460,7 @@ dependencies = [ "rustls", "rustls-pki-types", "slab", - "thiserror 2.0.12", + "thiserror", "tinyvec", "tracing", "web-time", @@ -2874,6 +2873,21 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "schedule-parser" +version = "0.1.0" +dependencies = [ + "calamine", + "chrono", + "criterion", + "derive_more", + "fuzzy-matcher", + "regex", + "serde", + "serde_repr", + "utoipa", +] + [[package]] name = "schedule-parser-rusted" version = "1.0.3" @@ -2882,9 +2896,7 @@ dependencies = [ "actix-test", "actix-web", "bcrypt", - "calamine", "chrono", - "criterion", "derive_more", "diesel", "diesel-derive-enum", @@ -2892,19 +2904,17 @@ dependencies = [ "env_logger", "firebase-messaging-rs", "futures-util", - "fuzzy-matcher", "hex", "jsonwebtoken", "mime", "objectid", "rand 0.9.0", - "regex", "reqwest", + "schedule-parser", "sentry", "sentry-actix", "serde", "serde_json", - "serde_repr", "serde_with", "sha1 0.11.0-pre.5", "tokio", @@ -2977,13 +2987,14 @@ checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" [[package]] name = "sentry" -version = "0.37.0" +version = "0.38.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "255914a8e53822abd946e2ce8baa41d4cded6b8e938913b7f7b9da5b7ab44335" +checksum = "a505499b38861edd82b5a688fa06ba4ba5875bb832adeeeba22b7b23fc4bc39a" dependencies = [ "httpdate", "native-tls", "reqwest", + "sentry-actix", "sentry-backtrace", "sentry-contexts", "sentry-core", @@ -2996,9 +3007,9 @@ dependencies = [ [[package]] name = "sentry-actix" -version = "0.37.0" +version = "0.38.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a927aed43cce0e9240f7477ac81cdfa2ffb048e0e2b17000eb5976e14f063993" +checksum = "39ad8bfdcfbc6e0d0dacaa5728555085ef459fa9226cfc2fe64eefa4b8038b7f" dependencies = [ "actix-http", "actix-web", @@ -3009,21 +3020,20 @@ dependencies = [ [[package]] name = "sentry-backtrace" -version = "0.37.0" +version = "0.38.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00293cd332a859961f24fd69258f7e92af736feaeb91020cff84dac4188a4302" +checksum = "8dace796060e4ad10e3d1405b122ae184a8b2e71dce05ae450e4f81b7686b0d9" dependencies = [ "backtrace", - "once_cell", "regex", "sentry-core", ] [[package]] name = "sentry-contexts" -version = "0.37.0" +version = "0.38.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "961990f9caa76476c481de130ada05614cd7f5aa70fb57c2142f0e09ad3fb2aa" +checksum = "87bd9e6b51ffe2bc7188ebe36cb67557cb95749c08a3f81f33e8c9b135e0d1bc" dependencies = [ "hostname 0.4.1", "libc", @@ -3035,12 +3045,11 @@ dependencies = [ [[package]] name = "sentry-core" -version = "0.37.0" +version = "0.38.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a6409d845707d82415c800290a5d63be5e3df3c2e417b0997c60531dfbd35ef" +checksum = "7426d4beec270cfdbb50f85f0bb2ce176ea57eed0b11741182a163055a558187" dependencies = [ - "once_cell", - "rand 0.8.5", + "rand 0.9.0", "sentry-types", "serde", "serde_json", @@ -3048,20 +3057,19 @@ dependencies = [ [[package]] name = "sentry-debug-images" -version = "0.37.0" +version = "0.38.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71ab5df4f3b64760508edfe0ba4290feab5acbbda7566a79d72673065888e5cc" +checksum = "9df15c066c04f34c4dfd496a8e76590106b93283f72ef1a47d8fb24d88493424" dependencies = [ "findshlibs", - "once_cell", "sentry-core", ] [[package]] name = "sentry-panic" -version = "0.37.0" +version = "0.38.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "609b1a12340495ce17baeec9e08ff8ed423c337c1a84dffae36a178c783623f3" +checksum = "c92beed69b776a162b6d269bef1eaa3e614090b6df45a88d9b239c4fdbffdfba" dependencies = [ "sentry-backtrace", "sentry-core", @@ -3069,9 +3077,9 @@ dependencies = [ [[package]] name = "sentry-tracing" -version = "0.37.0" +version = "0.38.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f4e86402d5c50239dc7d8fd3f6d5e048221d5fcb4e026d8d50ab57fe4644cb" +checksum = "55c323492795de90824f3198562e33dd74ae3bc852fbb13c0cabec54a1cf73cd" dependencies = [ "sentry-backtrace", "sentry-core", @@ -3081,16 +3089,16 @@ dependencies = [ [[package]] name = "sentry-types" -version = "0.37.0" +version = "0.38.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d3f117b8755dbede8260952de2aeb029e20f432e72634e8969af34324591631" +checksum = "04b6c9287202294685cb1f749b944dbbce8160b81a1061ecddc073025fed129f" dependencies = [ "debugid", "hex", - "rand 0.8.5", + "rand 0.9.0", "serde", "serde_json", - "thiserror 1.0.69", + "thiserror", "time 0.3.40", "url", "uuid", @@ -3232,7 +3240,7 @@ checksum = "297f631f50729c8c99b84667867963997ec0b50f32b2a7dbcab828ef0541e8bb" dependencies = [ "num-bigint", "num-traits", - "thiserror 2.0.12", + "thiserror", "time 0.3.40", ] @@ -3355,33 +3363,13 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "thiserror" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" -dependencies = [ - "thiserror-impl 1.0.69", -] - [[package]] name = "thiserror" version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" dependencies = [ - "thiserror-impl 2.0.12", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.100", + "thiserror-impl", ] [[package]] @@ -3722,6 +3710,12 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + [[package]] name = "unicode-xid" version = "0.2.6" @@ -4367,7 +4361,7 @@ dependencies = [ "flate2", "indexmap 2.8.0", "memchr", - "thiserror 2.0.12", + "thiserror", "zopfli", ] diff --git a/Cargo.toml b/Cargo.toml index 43ae97b..d81a0c6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["actix-macros", "actix-test"] +members = ["actix-macros", "actix-test", "schedule-parser"] [package] name = "schedule-parser-rusted" @@ -13,29 +13,26 @@ debug = true [dependencies] actix-web = "4.10.2" actix-macros = { path = "actix-macros" } +schedule-parser = { path = "schedule-parser", features = ["test-utils"] } bcrypt = "0.17.0" -calamine = "0.26.1" chrono = { version = "0.4.40", features = ["serde"] } -derive_more = "2.0.1" +derive_more = { version = "2", features = ["full"] } diesel = { version = "2.2.8", features = ["postgres"] } diesel-derive-enum = { git = "https://github.com/Havunen/diesel-derive-enum.git", features = ["postgres"] } dotenvy = "0.15.7" env_logger = "0.11.7" firebase-messaging-rs = { git = "https://github.com/i10416/firebase-messaging-rs.git" } futures-util = "0.3.31" -fuzzy-matcher = "0.3.7" jsonwebtoken = { version = "9.3.1", features = ["use_pem"] } hex = "0.4.3" mime = "0.3.17" objectid = "0.2.0" -regex = "1.11.1" reqwest = { version = "0.12.15", features = ["json"] } -sentry = "0.37.0" -sentry-actix = "0.37.0" +sentry = "0.38" +sentry-actix = "0.38" serde = { version = "1.0.219", features = ["derive"] } serde_json = "1.0.140" serde_with = "3.12.0" -serde_repr = "0.1.20" sha1 = "0.11.0-pre.5" tokio = { version = "1.44.1", features = ["macros", "rt-multi-thread"] } rand = "0.9.0" @@ -46,8 +43,3 @@ uuid = { version = "1.16.0", features = ["v4"] } [dev-dependencies] actix-test = { path = "actix-test" } -criterion = "0.5.1" - -[[bench]] -name = "parse" -harness = false \ No newline at end of file diff --git a/schedule-parser/Cargo.toml b/schedule-parser/Cargo.toml new file mode 100644 index 0000000..381f74f --- /dev/null +++ b/schedule-parser/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "schedule-parser" +version = "0.1.0" +edition = "2024" + +[features] +test-utils = [] + +[dependencies] +calamine = "0.26" +chrono = { version = "0.4", features = ["serde"] } +derive_more = { version = "2", features = ["full"] } +serde = { version = "1.0.219", features = ["derive"] } +serde_repr = "0.1.20" +fuzzy-matcher = "0.3.7" +regex = "1.11.1" +utoipa = { version = "5", features = ["chrono"] } + +[dev-dependencies] +criterion = "0.6" + +[[bench]] +name = "parse" +harness = false \ No newline at end of file diff --git a/benches/parse.rs b/schedule-parser/benches/parse.rs similarity index 68% rename from benches/parse.rs rename to schedule-parser/benches/parse.rs index 91d5f8f..2889f6a 100644 --- a/benches/parse.rs +++ b/schedule-parser/benches/parse.rs @@ -1,9 +1,9 @@ use criterion::{Criterion, criterion_group, criterion_main}; -use schedule_parser_rusted::parser::parse_xls; +use schedule_parser::parse_xls; pub fn bench_parse_xls(c: &mut Criterion) { - let buffer: Vec = include_bytes!("../schedule.xls").to_vec(); + let buffer: Vec = include_bytes!("../../schedule.xls").to_vec(); c.bench_function("parse_xls", |b| b.iter(|| parse_xls(&buffer).unwrap())); } diff --git a/src/parser/mod.rs b/schedule-parser/src/lib.rs similarity index 59% rename from src/parser/mod.rs rename to schedule-parser/src/lib.rs index 550d9f2..698a2d4 100644 --- a/src/parser/mod.rs +++ b/schedule-parser/src/lib.rs @@ -1,7 +1,7 @@ -use crate::parser::LessonParseResult::{Lessons, Street}; -use crate::parser::schema::LessonType::Break; -use crate::parser::schema::{ - Day, ErrorCell, ErrorCellPos, Lesson, LessonSubGroup, LessonTime, LessonType, ParseError, +use crate::LessonParseResult::{Lessons, Street}; +use crate::schema::LessonType::Break; +use crate::schema::{ + Day, ErrorCell, ErrorCellPos, Lesson, LessonBoundaries, LessonSubGroup, LessonType, ParseError, ParseResult, ScheduleEntry, }; use calamine::{Reader, Xls, open_workbook_from_rs}; @@ -11,15 +11,14 @@ use fuzzy_matcher::skim::SkimMatcherV2; use regex::Regex; use std::collections::HashMap; use std::io::Cursor; +use std::ops::Deref; use std::sync::LazyLock; +mod macros; pub mod schema; -/// Data cell storing the line. -struct InternalId { - /// Line index. - row: u32, - +/// Data cell storing the group name. +struct GroupCellInfo { /// Column index. column: u32, @@ -27,10 +26,25 @@ struct InternalId { name: String, } +/// Data cell storing the line. +struct DayCellInfo { + /// Line index. + row: u32, + + /// Column index. + column: u32, + + /// Day name. + name: String, + + /// Date of the day. + date: DateTime, +} + /// Data on the time of lessons from the second column of the schedule. -struct InternalTime { +struct BoundariesCellInfo { /// Temporary segment of the lesson. - time_range: LessonTime, + time_range: LessonBoundaries, /// Type of lesson. lesson_type: LessonType, @@ -108,58 +122,58 @@ fn get_merge_from_start(worksheet: &WorkSheet, row: u32, column: u32) -> ((u32, } /// Obtaining a "skeleton" schedule from the working sheet. -fn parse_skeleton(worksheet: &WorkSheet) -> Result<(Vec, Vec), ParseError> { - let range = &worksheet; +fn parse_skeleton( + worksheet: &WorkSheet, +) -> Result<(Vec, Vec), ParseError> { + let mut groups: Vec = Vec::new(); + let mut days: Vec = Vec::new(); - let mut is_parsed = false; + let worksheet_start = worksheet.start().ok_or(ParseError::UnknownWorkSheetRange)?; + let worksheet_end = worksheet.end().ok_or(ParseError::UnknownWorkSheetRange)?; - let mut groups: Vec = Vec::new(); - let mut days: Vec = Vec::new(); + let mut row = worksheet_start.0; - let start = range.start().ok_or(ParseError::UnknownWorkSheetRange)?; - let end = range.end().ok_or(ParseError::UnknownWorkSheetRange)?; - - let mut row = start.0; - while row < end.0 { + while row < worksheet_end.0 { row += 1; - let day_name_opt = get_string_from_cell(&worksheet, row, 0); - if day_name_opt.is_none() { - continue; - } - - let day_name = day_name_opt.unwrap(); - - if !is_parsed { - is_parsed = true; + let day_full_name = or_continue!(get_string_from_cell(&worksheet, row, 0)); + // parse groups row when days column will found + if groups.is_empty() { + // переход на предыдущую строку row -= 1; - for column in (start.1 + 2)..=end.1 { - let group_name = get_string_from_cell(&worksheet, row, column); - if group_name.is_none() { - continue; - } - - groups.push(InternalId { - row, + for column in (worksheet_start.1 + 2)..=worksheet_end.1 { + groups.push(GroupCellInfo { column, - name: group_name.unwrap(), + name: or_continue!(get_string_from_cell(&worksheet, row, column)), }); } + // возврат на текущую строку row += 1; } - days.push(InternalId { + let (day_name, day_date) = { + let space_index = day_full_name.find(' ').unwrap(); + + let name = day_full_name[..space_index].to_string(); + + let date_raw = day_full_name[space_index + 1..].to_string(); + let date_add = format!("{} 00:00:00", date_raw); + + let date = + or_break!(NaiveDateTime::parse_from_str(&*date_add, "%d.%m.%Y %H:%M:%S").ok()); + + (name, date.and_utc()) + }; + + days.push(DayCellInfo { row, column: 0, - name: day_name.clone(), + name: day_name, + date: day_date, }); - - if days.len() > 2 && day_name.starts_with("Суббота") { - break; - } } Ok((days, groups)) @@ -238,106 +252,104 @@ fn guess_lesson_type(name: &String) -> Option<(String, LessonType)> { fn parse_lesson( worksheet: &WorkSheet, day: &mut Day, - day_times: &Vec, - time: &InternalTime, + day_boundaries: &Vec, + lesson_boundaries: &BoundariesCellInfo, column: u32, ) -> Result { - let row = time.xls_range.0.0; + let row = lesson_boundaries.xls_range.0.0; let (name, lesson_type) = { - let raw_name_opt = get_string_from_cell(&worksheet, row, column); - if raw_name_opt.is_none() { - return Ok(Lessons(Vec::new())); - } - - let raw_name = raw_name_opt.unwrap(); + let full_name = match get_string_from_cell(&worksheet, row, column) { + Some(x) => x, + None => return Ok(Lessons(Vec::new())), + }; static OTHER_STREET_RE: LazyLock = LazyLock::new(|| Regex::new(r"^[А-Я][а-я]+,?\s?[0-9]+$").unwrap()); - if OTHER_STREET_RE.is_match(&raw_name) { - return Ok(Street(raw_name)); + if OTHER_STREET_RE.is_match(&full_name) { + return Ok(Street(full_name)); } - if let Some(guess) = guess_lesson_type(&raw_name) { - guess - } else { - (raw_name, time.lesson_type.clone()) + match guess_lesson_type(&full_name) { + Some(x) => x, + None => (full_name, lesson_boundaries.lesson_type.clone()), } }; - let (default_range, lesson_time) = || -> Result<(Option<[u8; 2]>, LessonTime), ParseError> { - // check if multi-lesson + let (default_range, lesson_time) = { let cell_range = get_merge_from_start(worksheet, row, column); - - let end_time_arr = day_times + + let end_time_arr = day_boundaries .iter() .filter(|time| time.xls_range.1.0 == cell_range.1.0) - .collect::>(); + .collect::>(); let end_time = end_time_arr .first() .ok_or(ParseError::LessonTimeNotFound(ErrorCellPos { row, column }))?; - let range: Option<[u8; 2]> = if time.default_index != None { - let default = time.default_index.unwrap() as u8; + let range: Option<[u8; 2]> = if lesson_boundaries.default_index != None { + let default = lesson_boundaries.default_index.unwrap() as u8; Some([default, end_time.default_index.unwrap() as u8]) } else { None }; - let time = LessonTime { - start: time.time_range.start, + let time = LessonBoundaries { + start: lesson_boundaries.time_range.start, end: end_time.time_range.end, }; Ok((range, time)) - }()?; + }?; let (name, mut subgroups) = parse_name_and_subgroups(&name)?; { let cabinets: Vec = parse_cabinets(worksheet, row, column + 1); - // Если количество кабинетов равно 1, назначаем этот кабинет всем подгруппам - if cabinets.len() == 1 { - for subgroup in &mut subgroups { - subgroup.cabinet = Some(cabinets.get(0).or(Some(&String::new())).unwrap().clone()) + match cabinets.len() { + // Если кабинетов нет, но есть подгруппы, назначаем им кабинет "??" + 0 => { + for subgroup in &mut subgroups { + subgroup.cabinet = Some("??".to_string()); + } } - } - // Если количество кабинетов совпадает с количеством подгрупп, назначаем кабинеты по порядку - else if cabinets.len() == subgroups.len() { - for subgroup in &mut subgroups { - subgroup.cabinet = Some( - cabinets - .get((subgroup.number - 1) as usize) - .unwrap() - .clone(), - ); - } - } - // Если количество кабинетов больше количества подгрупп, делаем ещё одну подгруппу. - else if cabinets.len() > subgroups.len() { - for index in 0..subgroups.len() { - subgroups[index].cabinet = Some(cabinets[index].clone()); + // Назначаем этот кабинет всем подгруппам + 1 => { + for subgroup in &mut subgroups { + subgroup.cabinet = + Some(cabinets.get(0).or(Some(&String::new())).unwrap().clone()) + } } + len => { + // Если количество кабинетов совпадает с количеством подгрупп, назначаем кабинеты по порядку + if len == subgroups.len() { + for subgroup in &mut subgroups { + subgroup.cabinet = Some( + cabinets + .get((subgroup.number - 1) as usize) + .unwrap() + .clone(), + ); + } + // Если количество кабинетов больше количества подгрупп, делаем ещё одну подгруппу. + } else if len > subgroups.len() { + for index in 0..subgroups.len() { + subgroups[index].cabinet = Some(cabinets[index].clone()); + } - while cabinets.len() > subgroups.len() { - subgroups.push(LessonSubGroup { - number: (subgroups.len() + 1) as u8, - cabinet: Some(cabinets[subgroups.len()].clone()), - teacher: "Ошибка в расписании".to_string(), - }); + while cabinets.len() > subgroups.len() { + subgroups.push(LessonSubGroup { + number: (subgroups.len() + 1) as u8, + cabinet: Some(cabinets[subgroups.len()].clone()), + teacher: "Ошибка в расписании".to_string(), + }); + } + } } - } - // Если кабинетов нет, но есть подгруппы, назначаем им значение "??" - else { - for subgroup in &mut subgroups { - subgroup.cabinet = Some("??".to_string()); - } - } - - cabinets + }; }; let lesson = Lesson { @@ -349,7 +361,7 @@ fn parse_lesson( group: None, }; - let prev_lesson = if day.lessons.len() == 0 { + let prev_lesson = if day.lessons.is_empty() { return Ok(Lessons(Vec::from([lesson]))); } else { &day.lessons[day.lessons.len() - 1] @@ -360,7 +372,7 @@ fn parse_lesson( lesson_type: Break, default_range: None, name: None, - time: LessonTime { + time: LessonBoundaries { start: prev_lesson.time.end, end: lesson.time.start, }, @@ -474,6 +486,122 @@ fn parse_name_and_subgroups(name: &String) -> Result<(String, Vec, +) -> Option { + static TIME_RE: LazyLock = + LazyLock::new(|| Regex::new(r"(\d+\.\d+)-(\d+\.\d+)").unwrap()); + + let parse_res = if let Some(captures) = TIME_RE.captures(cell_data) { + captures + } else { + return None; + }; + + let start_match = parse_res.get(1).unwrap().as_str(); + let start_parts: Vec<&str> = start_match.split(".").collect(); + + let end_match = parse_res.get(2).unwrap().as_str(); + let end_parts: Vec<&str> = end_match.split(".").collect(); + + static GET_TIME: fn(DateTime, &Vec<&str>) -> DateTime = |date, parts| { + date + Duration::hours(parts[0].parse::().unwrap() - 4) + + Duration::minutes(parts[1].parse::().unwrap()) + }; + + Some(LessonBoundaries { + start: GET_TIME(date.clone(), &start_parts), + end: GET_TIME(date, &end_parts), + }) +} + +fn parse_day_boundaries_column( + worksheet: &WorkSheet, + day_markup: &DayCellInfo, + lesson_time_column: u32, + row_distance: u32, +) -> Result, ParseError> { + let mut day_times: Vec = Vec::new(); + + for row in day_markup.row..(day_markup.row + row_distance) { + let time_cell = if let Some(str) = get_string_from_cell(&worksheet, row, lesson_time_column) + { + str + } else { + continue; + }; + + let lesson_time = parse_lesson_boundaries_cell(&time_cell, day_markup.date.clone()).ok_or( + ParseError::LessonBoundaries(ErrorCell::new( + row, + lesson_time_column, + time_cell.clone(), + )), + )?; + + // type + let lesson_type = if time_cell.contains("пара") { + LessonType::Default + } else { + LessonType::Additional + }; + + // lesson index + let default_index = if lesson_type == LessonType::Default { + Some( + time_cell + .chars() + .next() + .unwrap() + .to_string() + .parse::() + .unwrap(), + ) + } else { + None + }; + + day_times.push(BoundariesCellInfo { + time_range: lesson_time, + lesson_type, + default_index, + xls_range: get_merge_from_start(&worksheet, row, lesson_time_column), + }); + } + + return Ok(day_times); +} + +fn parse_week_boundaries_column( + worksheet: &WorkSheet, + week_markup: &Vec, +) -> Result>, ParseError> { + let mut result: Vec> = Vec::new(); + + let worksheet_end_row = worksheet.end().unwrap().0; + let lesson_time_column = week_markup[0].column + 1; + + for day_index in 0..week_markup.len() { + let day_markup = &week_markup[day_index]; + + // Если текущий день не последнему, то индекс строки следующего дня минус индекс строки текущего дня. + // Если текущий день - последний, то индекс последней строки документа минус индекс строки текущего дня. + let row_distance = if day_index != week_markup.len() - 1 { + week_markup[day_index + 1].row + } else { + worksheet_end_row + } - day_markup.row; + + let day_boundaries = + parse_day_boundaries_column(&worksheet, day_markup, lesson_time_column, row_distance)?; + + result.push(day_boundaries); + } + + Ok(result) +} + /// Conversion of the list of couples of groups in the list of lessons of teachers. fn convert_groups_to_teachers( groups: &HashMap, @@ -562,11 +690,11 @@ fn convert_groups_to_teachers( /// # Examples /// /// ``` -/// use schedule_parser_rusted::parser::parse_xls; +/// use schedule_parser::parse_xls; /// /// let result = parse_xls(&include_bytes!("../../schedule.xls").to_vec()); /// -/// assert!(result.is_ok()); +/// assert!(result.is_ok(), "{}", result.err().unwrap()); /// /// assert_ne!(result.as_ref().unwrap().groups.len(), 0); /// assert_ne!(result.as_ref().unwrap().teachers.len(), 0); @@ -583,12 +711,10 @@ pub fn parse_xls(buffer: &Vec) -> Result { .1 .to_owned(); - let (days_markup, groups_markup) = parse_skeleton(&worksheet)?; + let (week_markup, groups_markup) = parse_skeleton(&worksheet)?; + let week_boundaries = parse_week_boundaries_column(&worksheet, &week_markup)?; let mut groups: HashMap = HashMap::new(); - let mut days_times: Vec> = Vec::new(); - - let saturday_end_row = worksheet.end().unwrap().0; for group_markup in groups_markup { let mut group = ScheduleEntry { @@ -596,118 +722,28 @@ pub fn parse_xls(buffer: &Vec) -> Result { days: Vec::new(), }; - for day_index in 0..(&days_markup).len() { - let day_markup = &days_markup[day_index]; + for day_index in 0..(&week_markup).len() { + let day_markup = &week_markup[day_index]; - let mut day = { - let space_index = day_markup.name.find(' ').unwrap(); - - let name = day_markup.name[..space_index].to_string(); - - let date_raw = day_markup.name[space_index + 1..].to_string(); - let date_add = format!("{} 00:00:00", date_raw); - - let date = NaiveDateTime::parse_from_str(&*date_add, "%d.%m.%Y %H:%M:%S"); - - Day { - name, - street: None, - date: date.unwrap().and_utc(), - lessons: Vec::new(), - } + let mut day = Day { + name: day_markup.name.clone(), + street: None, + date: day_markup.date, + lessons: Vec::new(), }; - let lesson_time_column = days_markup[0].column + 1; + let day_boundaries = &week_boundaries[day_index]; - let row_distance = if day_index != days_markup.len() - 1 { - days_markup[day_index + 1].row - } else { - saturday_end_row - } - day_markup.row; - - if days_times.len() != 6 { - let mut day_times: Vec = Vec::new(); - - for row in day_markup.row..(day_markup.row + row_distance) { - // time - let time_opt = get_string_from_cell(&worksheet, row, lesson_time_column); - if time_opt.is_none() { - continue; - } - - let time = time_opt.unwrap(); - - // type - let lesson_type = if time.contains("пара") { - LessonType::Default - } else { - LessonType::Additional - }; - - // lesson index - let default_index = if lesson_type == LessonType::Default { - Some( - time.chars() - .next() - .unwrap() - .to_string() - .parse::() - .unwrap(), - ) - } else { - None - }; - - // time - let time_range = { - static TIME_RE: LazyLock = - LazyLock::new(|| Regex::new(r"(\d+\.\d+)-(\d+\.\d+)").unwrap()); - - let parse_res = TIME_RE.captures(&time).ok_or(ParseError::GlobalTime( - ErrorCell::new(row, lesson_time_column, time.clone()), - ))?; - - let start_match = parse_res.get(1).unwrap().as_str(); - let start_parts: Vec<&str> = start_match.split(".").collect(); - - let end_match = parse_res.get(2).unwrap().as_str(); - let end_parts: Vec<&str> = end_match.split(".").collect(); - - static GET_TIME: fn(DateTime, &Vec<&str>) -> DateTime = - |date, parts| { - date + Duration::hours(parts[0].parse::().unwrap() - 4) - + Duration::minutes(parts[1].parse::().unwrap()) - }; - - LessonTime { - start: GET_TIME(day.date.clone(), &start_parts), - end: GET_TIME(day.date.clone(), &end_parts), - } - }; - - day_times.push(InternalTime { - time_range, - lesson_type, - default_index, - xls_range: get_merge_from_start(&worksheet, row, lesson_time_column), - }); - } - - days_times.push(day_times); - } - - let day_times = &days_times[day_index]; - - for time in day_times { + for lesson_boundaries in day_boundaries { match &mut parse_lesson( &worksheet, &mut day, - &day_times, - &time, + &day_boundaries, + &lesson_boundaries, group_markup.column, )? { - Lessons(l) => day.lessons.append(l), - Street(s) => day.street = Some(s.to_owned()), + Lessons(lesson) => day.lessons.append(lesson), + Street(street) => day.street = Some(street.to_owned()), } } @@ -723,21 +759,39 @@ pub fn parse_xls(buffer: &Vec) -> Result { }) } -#[cfg(test)] -pub mod tests { +#[cfg(any(test, feature = "test-utils"))] +pub mod test_utils { use super::*; pub fn test_result() -> Result { parse_xls(&include_bytes!("../../schedule.xls").to_vec()) } +} +#[cfg(test)] +pub mod tests { #[test] fn read() { - let result = test_result(); + let result = super::test_utils::test_result(); - assert!(result.is_ok()); + assert!(result.is_ok(), "{}", result.err().unwrap()); assert_ne!(result.as_ref().unwrap().groups.len(), 0); assert_ne!(result.as_ref().unwrap().teachers.len(), 0); } + + #[test] + fn test_split_lesson() { + let result = super::test_utils::test_result(); + assert!(result.is_ok(), "{}", result.err().unwrap()); + + let result = result.unwrap(); + assert!(result.groups.contains_key("ИС-214/23")); + + let group = result.groups.get("ИС-214/23").unwrap(); + let thursday = group.days.get(3).unwrap(); + + assert_eq!(thursday.lessons.len(), 1); + assert_eq!(thursday.lessons[0].default_range.unwrap()[1], 3); + } } diff --git a/schedule-parser/src/macros.rs b/schedule-parser/src/macros.rs new file mode 100644 index 0000000..460421d --- /dev/null +++ b/schedule-parser/src/macros.rs @@ -0,0 +1,25 @@ +#[macro_export] +macro_rules! or_continue { + ( $e:expr ) => { + { + if let Some(x) = $e { + x + } else { + continue; + } + } + } +} + +#[macro_export] +macro_rules! or_break { + ( $e:expr ) => { + { + if let Some(x) = $e { + x + } else { + break; + } + } + } +} \ No newline at end of file diff --git a/src/parser/schema.rs b/schedule-parser/src/schema.rs similarity index 94% rename from src/parser/schema.rs rename to schedule-parser/src/schema.rs index 526d1c9..296340c 100644 --- a/src/parser/schema.rs +++ b/schedule-parser/src/schema.rs @@ -8,7 +8,7 @@ use utoipa::ToSchema; /// The beginning and end of the lesson. #[derive(Clone, Hash, Debug, Serialize, Deserialize, ToSchema)] -pub struct LessonTime { +pub struct LessonBoundaries { /// The beginning of a lesson. pub start: DateTime, @@ -72,7 +72,7 @@ pub struct Lesson { pub name: Option, /// The beginning and end. - pub time: LessonTime, + pub time: LessonBoundaries, /// List of subgroups. #[serde(rename = "subGroups")] @@ -153,9 +153,9 @@ pub enum ParseError { #[display("There is no data on work sheet boundaries.")] UnknownWorkSheetRange, - /// Failed to read the beginning and end of the lesson from the line - #[display("Failed to read lesson start and end times from {_0}.")] - GlobalTime(ErrorCell), + /// Failed to read the beginning and end of the lesson from the cell + #[display("Failed to read lesson start and end from {_0}.")] + LessonBoundaries(ErrorCell), /// Not found the beginning and the end corresponding to the lesson. #[display("No start and end times matching the lesson (at {_0}) was found.")] @@ -173,7 +173,7 @@ impl Serialize for ParseError { ParseError::UnknownWorkSheetRange => { serializer.serialize_str("UNKNOWN_WORK_SHEET_RANGE") } - ParseError::GlobalTime(_) => serializer.serialize_str("GLOBAL_TIME"), + ParseError::LessonBoundaries(_) => serializer.serialize_str("GLOBAL_TIME"), ParseError::LessonTimeNotFound(_) => serializer.serialize_str("LESSON_TIME_NOT_FOUND"), } } diff --git a/src/app_state.rs b/src/app_state.rs index bd7b4e3..ecdde37 100644 --- a/src/app_state.rs +++ b/src/app_state.rs @@ -1,4 +1,4 @@ -use crate::parser::schema::ParseResult; +use schedule_parser::schema::ParseResult; use crate::utility::hasher::DigestHasher; use crate::xls_downloader::basic_impl::BasicXlsDownloader; use actix_web::web; diff --git a/src/lib.rs b/src/lib.rs deleted file mode 100644 index b2819a7..0000000 --- a/src/lib.rs +++ /dev/null @@ -1 +0,0 @@ -pub mod parser; \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index be9844d..6c17a20 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,7 +13,6 @@ mod app_state; mod database; -mod parser; mod xls_downloader; mod extractors; diff --git a/src/routes/schedule/group.rs b/src/routes/schedule/group.rs index dc6081b..90a4015 100644 --- a/src/routes/schedule/group.rs +++ b/src/routes/schedule/group.rs @@ -39,7 +39,7 @@ pub async fn group(user: SyncExtractor, app_state: web::Data) -> } mod schema { - use crate::parser::schema::ScheduleEntry; + use schedule_parser::schema::ScheduleEntry; use actix_macros::{IntoResponseErrorNamed, StatusCode}; use chrono::{DateTime, NaiveDateTime, Utc}; use derive_more::Display; diff --git a/src/routes/schedule/schema.rs b/src/routes/schedule/schema.rs index 4842c0a..b279661 100644 --- a/src/routes/schedule/schema.rs +++ b/src/routes/schedule/schema.rs @@ -1,5 +1,5 @@ use crate::app_state::{AppState, Schedule}; -use crate::parser::schema::ScheduleEntry; +use schedule_parser::schema::ScheduleEntry; use actix_macros::{IntoResponseErrorNamed, ResponderJson, StatusCode}; use actix_web::web; use chrono::{DateTime, Duration, Utc}; diff --git a/src/routes/schedule/teacher.rs b/src/routes/schedule/teacher.rs index 318d5e6..b8fb7a5 100644 --- a/src/routes/schedule/teacher.rs +++ b/src/routes/schedule/teacher.rs @@ -40,7 +40,7 @@ pub async fn teacher( } mod schema { - use crate::parser::schema::ScheduleEntry; + use schedule_parser::schema::ScheduleEntry; use actix_macros::{IntoResponseErrorNamed, StatusCode}; use chrono::{DateTime, NaiveDateTime, Utc}; use derive_more::Display; diff --git a/src/routes/schedule/update_download_url.rs b/src/routes/schedule/update_download_url.rs index 4bb8e48..f5e5504 100644 --- a/src/routes/schedule/update_download_url.rs +++ b/src/routes/schedule/update_download_url.rs @@ -1,7 +1,7 @@ use self::schema::*; use crate::AppState; use crate::app_state::Schedule; -use crate::parser::parse_xls; +use schedule_parser::parse_xls; use crate::routes::schedule::schema::CacheStatus; use crate::routes::schema::{IntoResponseAsError, ResponseError}; use crate::xls_downloader::interface::{FetchError, XLSDownloader}; @@ -79,7 +79,7 @@ pub async fn update_download_url( } mod schema { - use crate::parser::schema::ParseError; + use schedule_parser::schema::ParseError; use crate::routes::schedule::schema::CacheStatus; use actix_macros::{IntoResponseErrorNamed, StatusCode}; use derive_more::Display; diff --git a/src/test_env.rs b/src/test_env.rs index d4b634f..caa1104 100644 --- a/src/test_env.rs +++ b/src/test_env.rs @@ -1,7 +1,7 @@ #[cfg(test)] pub(crate) mod tests { use crate::app_state::{AppState, Schedule, app_state}; - use crate::parser::tests::test_result; + use schedule_parser::test_utils::test_result; use crate::utility::mutex::MutexScope; use actix_web::web; use std::default::Default;