From 5e39fc9acca4a694dfd5f8fb6ed163fe89ad18ea Mon Sep 17 00:00:00 2001 From: n08i40k Date: Tue, 2 Sep 2025 08:54:22 +0400 Subject: [PATCH] feat(schedule)!: move schedule parser, downloader, and updater to external library This can be used to support more schedule formats in the future. --- .idea/schedule-parser-rusted.iml | 3 + Cargo.lock | 480 ++++++------------ Cargo.toml | 11 +- providers/Cargo.toml | 12 + providers/base/Cargo.toml | 17 + {src/utility => providers/base/src}/hasher.rs | 0 providers/base/src/lib.rs | 289 +++++++++++ .../provider-engels-polytechnic/Cargo.toml | 32 ++ .../provider-engels-polytechnic/src/lib.rs | 85 ++++ .../src/parser}/macros.rs | 0 .../src/parser/mod.rs | 180 +++++-- .../src/parser}/worksheet.rs | 0 .../src/updater.rs | 263 ++++++++++ .../src/xls_downloader.rs | 237 +++++++++ providers/src/lib.rs | 9 + schedule-parser/Cargo.toml | 27 - schedule-parser/benches/parse.rs | 12 - schedule-parser/src/schema.rs | 227 --------- src/extractors/authorized_user.rs | 4 +- src/main.rs | 2 - src/routes/auth/sign_up.rs | 3 +- src/routes/flow/telegram_complete.rs | 3 +- src/routes/schedule/group.rs | 3 +- src/routes/schedule/group_names.rs | 3 +- src/routes/schedule/schema.rs | 19 +- src/routes/schedule/teacher.rs | 5 +- src/routes/schedule/teacher_names.rs | 3 +- src/routes/users/change_group.rs | 3 +- src/state/env/mod.rs | 6 + src/state/env/schedule.rs | 2 + src/state/mod.rs | 91 ++-- src/state/schedule.rs | 290 ----------- src/test_env.rs | 13 +- src/xls_downloader/basic_impl.rs | 199 -------- src/xls_downloader/interface.rs | 100 ---- src/xls_downloader/mod.rs | 2 - .../engels-polytechnic.xls | Bin 37 files changed, 1364 insertions(+), 1271 deletions(-) create mode 100644 providers/Cargo.toml create mode 100644 providers/base/Cargo.toml rename {src/utility => providers/base/src}/hasher.rs (100%) create mode 100644 providers/base/src/lib.rs create mode 100644 providers/provider-engels-polytechnic/Cargo.toml create mode 100644 providers/provider-engels-polytechnic/src/lib.rs rename {schedule-parser/src => providers/provider-engels-polytechnic/src/parser}/macros.rs (100%) rename schedule-parser/src/lib.rs => providers/provider-engels-polytechnic/src/parser/mod.rs (83%) rename {schedule-parser/src => providers/provider-engels-polytechnic/src/parser}/worksheet.rs (100%) create mode 100644 providers/provider-engels-polytechnic/src/updater.rs create mode 100644 providers/provider-engels-polytechnic/src/xls_downloader.rs create mode 100644 providers/src/lib.rs delete mode 100644 schedule-parser/Cargo.toml delete mode 100644 schedule-parser/benches/parse.rs delete mode 100644 schedule-parser/src/schema.rs delete mode 100644 src/state/schedule.rs delete mode 100644 src/xls_downloader/basic_impl.rs delete mode 100644 src/xls_downloader/interface.rs delete mode 100644 src/xls_downloader/mod.rs rename schedule.xls => test-data/engels-polytechnic.xls (100%) diff --git a/.idea/schedule-parser-rusted.iml b/.idea/schedule-parser-rusted.iml index 23d1b34..b9cbb45 100644 --- a/.idea/schedule-parser-rusted.iml +++ b/.idea/schedule-parser-rusted.iml @@ -8,6 +8,9 @@ + + + diff --git a/Cargo.lock b/Cargo.lock index 3c8276a..346b3cb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -114,7 +114,7 @@ dependencies = [ "futures-core", "futures-util", "mio", - "socket2", + "socket2 0.5.10", "tokio", "tracing", ] @@ -184,7 +184,7 @@ dependencies = [ "serde_json", "serde_urlencoded", "smallvec", - "socket2", + "socket2 0.5.10", "time 0.3.41", "tracing", "url", @@ -256,12 +256,6 @@ dependencies = [ "libc", ] -[[package]] -name = "anes" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" - [[package]] name = "anstream" version = "0.6.19" @@ -364,9 +358,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.88" +version = "0.1.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", @@ -456,6 +450,19 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "base" +version = "0.1.0" +dependencies = [ + "async-trait", + "chrono", + "serde", + "serde_repr", + "sha1 0.11.0-rc.0", + "tokio-util", + "utoipa", +] + [[package]] name = "base64" version = "0.22.1" @@ -595,12 +602,6 @@ dependencies = [ "zip", ] -[[package]] -name = "cast" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" - [[package]] name = "cc" version = "1.2.26" @@ -639,33 +640,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "ciborium" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" -dependencies = [ - "ciborium-io", - "ciborium-ll", - "serde", -] - -[[package]] -name = "ciborium-io" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" - -[[package]] -name = "ciborium-ll" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" -dependencies = [ - "ciborium-io", - "half", -] - [[package]] name = "cipher" version = "0.4.4" @@ -676,31 +650,6 @@ dependencies = [ "inout", ] -[[package]] -name = "clap" -version = "4.5.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f" -dependencies = [ - "clap_builder", -] - -[[package]] -name = "clap_builder" -version = "4.5.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e" -dependencies = [ - "anstyle", - "clap_lex", -] - -[[package]] -name = "clap_lex" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" - [[package]] name = "codepage" version = "0.1.2" @@ -792,70 +741,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "criterion" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3bf7af66b0989381bd0be551bd7cc91912a655a58c6918420c9527b1fd8b4679" -dependencies = [ - "anes", - "cast", - "ciborium", - "clap", - "criterion-plot", - "itertools 0.13.0", - "num-traits", - "oorandom", - "plotters", - "rayon", - "regex", - "serde", - "serde_json", - "tinytemplate", - "walkdir", -] - -[[package]] -name = "criterion-plot" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" -dependencies = [ - "cast", - "itertools 0.10.5", -] - -[[package]] -name = "crossbeam-deque" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" -dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" - -[[package]] -name = "crunchy" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" - [[package]] name = "crypto-common" version = "0.1.6" @@ -960,6 +845,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" dependencies = [ "const-oid 0.9.6", + "pem-rfc7468", "zeroize", ] @@ -1486,16 +1372,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "half" -version = "2.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" -dependencies = [ - "cfg-if", - "crunchy", -] - [[package]] name = "hashbrown" version = "0.12.3" @@ -1640,7 +1516,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2", + "socket2 0.5.10", "tokio", "tower-service", "tracing", @@ -1745,7 +1621,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2", + "socket2 0.5.10", "system-configuration", "tokio", "tower-service", @@ -1927,6 +1803,17 @@ dependencies = [ "generic-array", ] +[[package]] +name = "io-uring" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b" +dependencies = [ + "bitflags", + "cfg-if", + "libc", +] + [[package]] name = "ipnet" version = "2.11.0" @@ -1949,24 +1836,6 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" -[[package]] -name = "itertools" -version = "0.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" -dependencies = [ - "either", -] - -[[package]] -name = "itertools" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.14.0" @@ -2262,12 +2131,6 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" -[[package]] -name = "oorandom" -version = "11.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" - [[package]] name = "openssl" version = "0.10.73" @@ -2357,6 +2220,15 @@ dependencies = [ "serde", ] +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + [[package]] name = "percent-encoding" version = "2.3.1" @@ -2444,34 +2316,6 @@ dependencies = [ "time 0.3.41", ] -[[package]] -name = "plotters" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" -dependencies = [ - "num-traits", - "plotters-backend", - "plotters-svg", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "plotters-backend" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" - -[[package]] -name = "plotters-svg" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" -dependencies = [ - "plotters-backend", -] - [[package]] name = "portable-atomic" version = "1.11.1" @@ -2547,7 +2391,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.14.0", + "itertools", "proc-macro2", "quote", "syn 2.0.102", @@ -2562,6 +2406,35 @@ dependencies = [ "prost", ] +[[package]] +name = "provider-engels-polytechnic" +version = "0.1.0" +dependencies = [ + "async-trait", + "base", + "calamine", + "chrono", + "derive_more", + "log", + "regex", + "reqwest", + "sentry", + "serde", + "strsim", + "tokio", + "tokio-util", + "ua_generator", + "utoipa", +] + +[[package]] +name = "providers" +version = "0.1.0" +dependencies = [ + "base", + "provider-engels-polytechnic", +] + [[package]] name = "quick-error" version = "1.2.3" @@ -2591,7 +2464,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls", - "socket2", + "socket2 0.5.10", "thiserror", "tokio", "tracing", @@ -2628,7 +2501,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2", + "socket2 0.5.10", "tracing", "windows-sys 0.59.0", ] @@ -2745,26 +2618,6 @@ dependencies = [ "getrandom 0.3.3", ] -[[package]] -name = "rayon" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" -dependencies = [ - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" -dependencies = [ - "crossbeam-deque", - "crossbeam-utils", -] - [[package]] name = "rdrand" version = "0.4.0" @@ -2820,9 +2673,9 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "reqwest" -version = "0.12.20" +version = "0.12.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabf4c97d9130e2bf606614eb937e86edac8292eaa6f422f995d7e8de1eb1813" +checksum = "d429f34c8092b2d42c7c93cec323bb4adeb7c67698f70839adec842ec10c7ceb" dependencies = [ "async-compression", "base64", @@ -3004,15 +2857,6 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - [[package]] name = "schannel" version = "0.1.27" @@ -3022,23 +2866,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "schedule-parser" -version = "0.1.0" -dependencies = [ - "calamine", - "chrono", - "criterion", - "derive_more", - "log", - "regex", - "sentry", - "serde", - "serde_repr", - "strsim", - "utoipa", -] - [[package]] name = "schedule-parser-rusted" version = "1.0.5" @@ -3063,8 +2890,8 @@ dependencies = [ "mime", "objectid", "percent-encoding", + "providers", "reqwest", - "schedule-parser", "sentry", "sentry-actix", "serde", @@ -3072,7 +2899,7 @@ dependencies = [ "serde_with", "sha1 0.11.0-rc.0", "tokio", - "ua_generator", + "tokio-util", "utoipa", "utoipa-actix-web", "utoipa-rapidoc", @@ -3142,9 +2969,9 @@ checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" [[package]] name = "sentry" -version = "0.39.0" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eb7daabbc631b13f48e991f4d828f12ec43e2acd3fb2972b445bdc138231ee2" +checksum = "989425268ab5c011e06400187eed6c298272f8ef913e49fcadc3fda788b45030" dependencies = [ "httpdate", "native-tls", @@ -3157,14 +2984,14 @@ dependencies = [ "sentry-panic", "sentry-tracing", "tokio", - "ureq", + "ureq 3.1.0", ] [[package]] name = "sentry-actix" -version = "0.39.0" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1adf30c8878b392fa48e6a922ccbd5e6f10333f246d81b838750a9ac8f249b0" +checksum = "a5c675bdf6118764a8e265c3395c311b4d905d12866c92df52870c0223d2ffc1" dependencies = [ "actix-http", "actix-web", @@ -3175,9 +3002,9 @@ dependencies = [ [[package]] name = "sentry-backtrace" -version = "0.39.0" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0cbd964934e339213e5ffe2682575ae390327945856c067408ae121d1ceae72" +checksum = "68e299dd3f7bcf676875eee852c9941e1d08278a743c32ca528e2debf846a653" dependencies = [ "backtrace", "regex", @@ -3186,9 +3013,9 @@ dependencies = [ [[package]] name = "sentry-contexts" -version = "0.39.0" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "356d87fef14b05475f46e8554bf26833230c92bcac106d74f0f5719dce4a7850" +checksum = "fac0c5d6892cd4c414492fc957477b620026fb3411fca9fa12774831da561c88" dependencies = [ "hostname 0.4.1", "libc", @@ -3200,21 +3027,22 @@ dependencies = [ [[package]] name = "sentry-core" -version = "0.39.0" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e73bfcabac5a7f9e2e38f898ba0afc6bed36fb96108b246213b698e00f12268c" +checksum = "deaa38b94e70820ff3f1f9db3c8b0aef053b667be130f618e615e0ff2492cbcc" dependencies = [ "rand 0.9.1", "sentry-types", "serde", "serde_json", + "url", ] [[package]] name = "sentry-debug-images" -version = "0.39.0" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cf558b1a0feaa58965051f67a55b1d2ee0a807b326c965564f9867b967c2045" +checksum = "00950648aa0d371c7f57057434ad5671bd4c106390df7e7284739330786a01b6" dependencies = [ "findshlibs", "sentry-core", @@ -3222,9 +3050,9 @@ dependencies = [ [[package]] name = "sentry-panic" -version = "0.39.0" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29abe64e33902f6fefafdb652dc0efed303cea655324e85db9fbb8d83d503c44" +checksum = "2b7a23b13c004873de3ce7db86eb0f59fe4adfc655a31f7bbc17fd10bacc9bfe" dependencies = [ "sentry-backtrace", "sentry-core", @@ -3232,10 +3060,11 @@ dependencies = [ [[package]] name = "sentry-tracing" -version = "0.39.0" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbfd32bfd54bd829a92ee9b32894b05c726edcdc40d9c580ce7b3f4a56f13a45" +checksum = "fac841c7050aa73fc2bec8f7d8e9cb1159af0b3095757b99820823f3e54e5080" dependencies = [ + "bitflags", "sentry-backtrace", "sentry-core", "tracing-core", @@ -3244,9 +3073,9 @@ dependencies = [ [[package]] name = "sentry-types" -version = "0.39.0" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7205055c5bde8131c62e4f5367f4291956c331a64e1a32e737e8d81b26b3bcb" +checksum = "e477f4d4db08ddb4ab553717a8d3a511bc9e81dde0c808c680feacbb8105c412" dependencies = [ "debugid", "hex", @@ -3453,6 +3282,16 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "socket2" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" +dependencies = [ + "libc", + "windows-sys 0.59.0", +] + [[package]] name = "spki" version = "0.7.3" @@ -3629,16 +3468,6 @@ dependencies = [ "zerovec", ] -[[package]] -name = "tinytemplate" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" -dependencies = [ - "serde", - "serde_json", -] - [[package]] name = "tinyvec" version = "1.9.0" @@ -3656,20 +3485,22 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.45.1" +version = "1.47.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75ef51a33ef1da925cea3e4eb122833cb377c61439ca401b770f54902b806779" +checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" dependencies = [ "backtrace", "bytes", + "io-uring", "libc", "mio", "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2", + "slab", + "socket2 0.6.0", "tokio-macros", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -3716,9 +3547,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.15" +version = "0.7.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66a539a9ad6d5d281510d5bd368c973d636c02dbf8a67300bfb6b950696ad7df" +checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5" dependencies = [ "bytes", "futures-core", @@ -3791,7 +3622,7 @@ dependencies = [ "prost", "rustls-native-certs", "rustls-pemfile", - "socket2", + "socket2 0.5.10", "tokio", "tokio-rustls", "tokio-stream", @@ -3934,16 +3765,16 @@ checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" [[package]] name = "ua_generator" -version = "0.5.16" +version = "0.5.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fffa7e1ef86f4ed29df5ecbac2f47160cdfbc3296c25b609cd83835ec3b7151" +checksum = "3a68ea0a55d5ad9e86e85f767180daff9f24a030490ac66e8490861e8484d7ed" dependencies = [ "dotenvy", "fastrand", "serde", "serde_json", "toml", - "ureq", + "ureq 2.12.1", ] [[package]] @@ -3996,7 +3827,6 @@ dependencies = [ "encoding_rs", "flate2", "log", - "native-tls", "once_cell", "rustls", "rustls-pki-types", @@ -4006,6 +3836,36 @@ dependencies = [ "webpki-roots 0.26.11", ] +[[package]] +name = "ureq" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00432f493971db5d8e47a65aeb3b02f8226b9b11f1450ff86bb772776ebadd70" +dependencies = [ + "base64", + "der", + "log", + "native-tls", + "percent-encoding", + "rustls-pemfile", + "rustls-pki-types", + "ureq-proto", + "utf-8", + "webpki-root-certs", +] + +[[package]] +name = "ureq-proto" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbe120bb823a0061680e66e9075942fcdba06d46551548c2c259766b9558bc9a" +dependencies = [ + "base64", + "http 1.3.1", + "httparse", + "log", +] + [[package]] name = "url" version = "2.5.4" @@ -4018,6 +3878,12 @@ dependencies = [ "serde", ] +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -4032,9 +3898,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "utoipa" -version = "5.3.1" +version = "5.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "435c6f69ef38c9017b4b4eea965dfb91e71e53d869e896db40d1cf2441dd75c0" +checksum = "2fcc29c80c21c31608227e0912b2d7fddba57ad76b606890627ba8ee7964e993" dependencies = [ "indexmap 2.9.0", "serde", @@ -4055,9 +3921,9 @@ dependencies = [ [[package]] name = "utoipa-gen" -version = "5.3.1" +version = "5.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a77d306bc75294fd52f3e99b13ece67c02c1a2789190a6f31d32f736624326f7" +checksum = "6d79d08d92ab8af4c5e8a6da20c47ae3f61a0f1dabc1997cdf2d082b757ca08b" dependencies = [ "proc-macro2", "quote", @@ -4107,16 +3973,6 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" -[[package]] -name = "walkdir" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" -dependencies = [ - "same-file", - "winapi-util", -] - [[package]] name = "want" version = "0.3.1" @@ -4251,6 +4107,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki-root-certs" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4ffd8df1c57e87c325000a3d6ef93db75279dc3a231125aac571650f22b12a" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "webpki-roots" version = "0.26.11" @@ -4285,15 +4150,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" -[[package]] -name = "winapi-util" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" -dependencies = [ - "windows-sys 0.59.0", -] - [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" diff --git a/Cargo.toml b/Cargo.toml index f762648..9499084 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["actix-macros", "actix-test", "schedule-parser"] +members = ["actix-macros", "actix-test", "providers"] [package] name = "schedule-parser-rusted" @@ -11,7 +11,7 @@ publish = false debug = true [dependencies] -schedule-parser = { path = "schedule-parser", features = ["test-utils"] } +providers = { path = "providers" } actix-macros = { path = "actix-macros" } # serve api @@ -34,6 +34,7 @@ firebase-messaging-rs = { git = "https://github.com/i10416/firebase-messaging-rs # async tokio = { version = "1.44.1", features = ["macros", "rt-multi-thread"] } +tokio-util = "0.7.16" futures-util = "0.3.31" # authorization @@ -45,12 +46,11 @@ objectid = "0.2.0" # schedule downloader reqwest = { version = "0.12.15", features = ["json"] } -ua_generator = "0.5" mime = "0.3.17" # error handling -sentry = "0.39" -sentry-actix = "0.39" +sentry = "0.42.0" +sentry-actix = "0.42.0" # [de]serializing serde = { version = "1.0.219", features = ["derive"] } @@ -74,4 +74,5 @@ percent-encoding = "2.3" ed25519-dalek = "2" [dev-dependencies] +providers = { path = "providers", features = ["test"] } actix-test = { path = "actix-test" } diff --git a/providers/Cargo.toml b/providers/Cargo.toml new file mode 100644 index 0000000..f1fc7f9 --- /dev/null +++ b/providers/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "providers" +version = "0.1.0" +edition = "2024" + +[features] +test = ["provider-engels-polytechnic/test"] + +[dependencies] +base = { path = "base" } + +provider-engels-polytechnic = { path = "provider-engels-polytechnic" } \ No newline at end of file diff --git a/providers/base/Cargo.toml b/providers/base/Cargo.toml new file mode 100644 index 0000000..330a56e --- /dev/null +++ b/providers/base/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "base" +version = "0.1.0" +edition = "2024" + +[dependencies] +tokio-util = "0.7.16" +async-trait = "0.1.89" + +chrono = { version = "0.4.41", features = ["serde"] } + +serde = { version = "1.0.219", features = ["derive"] } +serde_repr = "0.1.20" + +utoipa = { version = "5.4.0", features = ["macros", "chrono"] } + +sha1 = "0.11.0-rc.0" diff --git a/src/utility/hasher.rs b/providers/base/src/hasher.rs similarity index 100% rename from src/utility/hasher.rs rename to providers/base/src/hasher.rs diff --git a/providers/base/src/lib.rs b/providers/base/src/lib.rs new file mode 100644 index 0000000..726d611 --- /dev/null +++ b/providers/base/src/lib.rs @@ -0,0 +1,289 @@ +use crate::hasher::DigestHasher; +use async_trait::async_trait; +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use serde_repr::{Deserialize_repr, Serialize_repr}; +use sha1::{Digest, Sha1}; +use std::collections::HashMap; +use std::hash::Hash; +use std::sync::Arc; +use tokio_util::sync::CancellationToken; +use utoipa::ToSchema; + +mod hasher; + +// pub(crate) mod internal { +// use super::{LessonBoundaries, LessonType}; +// use chrono::{DateTime, Utc}; +// +// /// Data cell storing the group name. +// pub struct GroupCellInfo { +// /// Column index. +// pub column: u32, +// +// /// Text in the cell. +// pub name: String, +// } +// +// /// Data cell storing the line. +// pub struct DayCellInfo { +// /// Line index. +// pub row: u32, +// +// /// Column index. +// pub column: u32, +// +// /// Day name. +// pub name: String, +// +// /// Date of the day. +// pub date: DateTime, +// } +// +// /// Data on the time of lessons from the second column of the schedule. +// pub struct BoundariesCellInfo { +// /// Temporary segment of the lesson. +// pub time_range: LessonBoundaries, +// +// /// Type of lesson. +// pub lesson_type: LessonType, +// +// /// The lesson index. +// pub default_index: Option, +// +// /// The frame of the cell. +// pub xls_range: ((u32, u32), (u32, u32)), +// } +// } + +/// The beginning and end of the lesson. +#[derive(Clone, Hash, Debug, Serialize, Deserialize, ToSchema)] +pub struct LessonBoundaries { + /// The beginning of a lesson. + pub start: DateTime, + + /// The end of the lesson. + pub end: DateTime, +} + +/// Type of lesson. +#[derive(Clone, Hash, PartialEq, Debug, Serialize_repr, Deserialize_repr, ToSchema)] +#[serde(rename_all = "SCREAMING_SNAKE_CASE")] +#[repr(u8)] +pub enum LessonType { + /// Обычная. + Default = 0, + + /// Допы. + Additional, + + /// Перемена. + Break, + + /// Консультация. + Consultation, + + /// Самостоятельная работа. + IndependentWork, + + /// Зачёт. + Exam, + + /// Зачёт с оценкой. + ExamWithGrade, + + /// Экзамен. + ExamDefault, + + /// Курсовой проект. + CourseProject, + + /// Защита курсового проекта. + CourseProjectDefense, +} + +#[derive(Clone, Hash, Debug, Serialize, Deserialize, ToSchema)] +pub struct LessonSubGroup { + /// Cabinet, if present. + pub cabinet: Option, + + /// Full name of the teacher. + pub teacher: Option, +} + +#[derive(Clone, Hash, Debug, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct Lesson { + /// Type. + #[serde(rename = "type")] + pub lesson_type: LessonType, + + /// Lesson indexes, if present. + pub range: Option<[u8; 2]>, + + /// Name. + pub name: Option, + + /// The beginning and end. + pub time: LessonBoundaries, + + /// List of subgroups. + #[serde(rename = "subgroups")] + pub subgroups: Option>>, + + /// Group name, if this is a schedule for teachers. + pub group: Option, +} + +#[derive(Clone, Hash, Debug, Serialize, Deserialize, ToSchema)] +pub struct Day { + /// Day of the week. + pub name: String, + + /// Address of another corps. + pub street: Option, + + /// Date. + pub date: DateTime, + + /// List of lessons on this day. + pub lessons: Vec, +} + +#[derive(Clone, Hash, Debug, Serialize, Deserialize, ToSchema)] +pub struct ScheduleEntry { + /// The name of the group or name of the teacher. + pub name: String, + + /// List of six days. + pub days: Vec, +} + +#[derive(Clone)] +pub struct ParsedSchedule { + /// List of groups. + pub groups: HashMap, + + /// List of teachers. + pub teachers: HashMap, +} + +/// Represents a snapshot of the schedule parsed from an XLS file. +#[derive(Clone)] +pub struct ScheduleSnapshot { + /// Timestamp when the Polytechnic website was queried for the schedule. + pub fetched_at: DateTime, + + /// Timestamp indicating when the schedule was last updated on the Polytechnic website. + /// + /// + /// This value is determined by the website's content and does not depend on the application. + /// + pub updated_at: DateTime, + + /// URL pointing to the XLS file containing the source schedule data. + pub url: String, + + /// Parsed schedule data in the application's internal representation. + pub data: ParsedSchedule, +} + +impl ScheduleSnapshot { + /// Converting the schedule data into a hash. + /// ### Important! + /// The hash does not depend on the dates. + /// If the application is restarted, but the file with source schedule will remain unchanged, then the hash will not change. + pub fn hash(&self) -> String { + let mut hasher = DigestHasher::from(Sha1::new()); + + self.data.teachers.iter().for_each(|e| e.hash(&mut hasher)); + self.data.groups.iter().for_each(|e| e.hash(&mut hasher)); + + hasher.finalize() + } + + /// Simply updates the value of [`ScheduleSnapshot::fetched_at`]. + /// Used for auto-updates. + pub fn update(&mut self) { + self.fetched_at = Utc::now(); + } +} + +// #[derive(Clone, Debug, Display, Error, ToSchema)] +// #[display("row {row}, column {column}")] +// pub struct ErrorCellPos { +// pub row: u32, +// pub column: u32, +// } +// +// #[derive(Clone, Debug, Display, Error, ToSchema)] +// #[display("'{data}' at {pos}")] +// pub struct ErrorCell { +// pub pos: ErrorCellPos, +// pub data: String, +// } +// +// impl ErrorCell { +// pub fn new(row: u32, column: u32, data: String) -> Self { +// Self { +// pos: ErrorCellPos { row, column }, +// data, +// } +// } +// } + +// #[derive(Clone, Debug, Display, Error, ToSchema)] +// pub enum ParseError { +// /// Errors related to reading XLS file. +// #[display("{_0:?}: Failed to read XLS file.")] +// #[schema(value_type = String)] +// BadXLS(Arc), +// +// /// Not a single sheet was found. +// #[display("No work sheets found.")] +// NoWorkSheets, +// +// /// There are no data on the boundaries of the sheet. +// #[display("There is no data on work sheet boundaries.")] +// UnknownWorkSheetRange, +// +// /// Failed to read the beginning and end of the lesson from the cell +// #[display("Failed to read lesson start and end from {_0}.")] +// LessonBoundaries(ErrorCell), +// +// /// Not found the beginning and the end corresponding to the lesson. +// #[display("No start and end times matching the lesson (at {_0}) was found.")] +// LessonTimeNotFound(ErrorCellPos), +// } +// +// impl Serialize for ParseError { +// fn serialize(&self, serializer: S) -> Result +// where +// S: Serializer, +// { +// match self { +// ParseError::BadXLS(_) => serializer.serialize_str("BAD_XLS"), +// ParseError::NoWorkSheets => serializer.serialize_str("NO_WORK_SHEETS"), +// ParseError::UnknownWorkSheetRange => { +// serializer.serialize_str("UNKNOWN_WORK_SHEET_RANGE") +// } +// ParseError::LessonBoundaries(_) => serializer.serialize_str("GLOBAL_TIME"), +// ParseError::LessonTimeNotFound(_) => serializer.serialize_str("LESSON_TIME_NOT_FOUND"), +// } +// } +// } + +#[async_trait] +pub trait ScheduleProvider +where + Self: Sync + Send, +{ + /// Returns ok when task has been canceled. + /// Returns err when error appeared while trying to parse or download schedule + async fn start_auto_update_task( + &self, + cancellation_token: CancellationToken, + ) -> Result<(), Box>; + + async fn get_schedule(&self) -> Arc; +} diff --git a/providers/provider-engels-polytechnic/Cargo.toml b/providers/provider-engels-polytechnic/Cargo.toml new file mode 100644 index 0000000..2c164ed --- /dev/null +++ b/providers/provider-engels-polytechnic/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "provider-engels-polytechnic" +version = "0.1.0" +edition = "2024" + +[features] +test = [] + +[dependencies] +base = { path = "../base" } + +tokio = { version = "1.47.1", features = ["sync", "macros", "time"] } +tokio-util = "0.7.16" + +chrono = { version = "0.4.41", features = ["serde"] } + +serde = { version = "1.0.219", features = ["derive"] } + +derive_more = { version = "2.0.1", features = ["error", "display"] } + +utoipa = { version = "5.4.0", features = ["macros", "chrono"] } + +calamine = { git = "https://github.com/prophittcorey/calamine.git", branch = "fix/zip-3.0" } +async-trait = "0.1.89" + +reqwest = "0.12.23" +ua_generator = "0.5.22" +regex = "1.11.1" +strsim = "0.11.1" +log = "0.4.27" +sentry = "0.42.0" + diff --git a/providers/provider-engels-polytechnic/src/lib.rs b/providers/provider-engels-polytechnic/src/lib.rs new file mode 100644 index 0000000..f6e2a08 --- /dev/null +++ b/providers/provider-engels-polytechnic/src/lib.rs @@ -0,0 +1,85 @@ +use crate::updater::Updater; +use async_trait::async_trait; +use base::{ScheduleProvider, ScheduleSnapshot}; +use std::ops::DerefMut; +use std::sync::Arc; +use std::time::Duration; +use tokio::sync::RwLock; +use tokio::time::interval; +use tokio_util::sync::CancellationToken; + +pub use crate::updater::UpdateSource; + +mod parser; +mod updater; +mod xls_downloader; + +#[cfg(feature = "test")] +pub mod test_utils { + pub use crate::parser::test_utils::test_result; +} + +pub struct EngelsPolytechnicProvider { + updater: Updater, + snapshot: Arc, +} + +impl EngelsPolytechnicProvider { + pub async fn new( + update_source: UpdateSource, + ) -> Result, crate::updater::error::Error> { + let (updater, snapshot) = Updater::new(update_source).await?; + + Ok(Arc::new(Wrapper { + inner: RwLock::new(Self { + updater, + snapshot: Arc::new(snapshot), + }), + })) + } +} + +pub struct Wrapper { + inner: RwLock, +} + +#[async_trait] +impl ScheduleProvider for Wrapper { + async fn start_auto_update_task( + &self, + cancellation_token: CancellationToken, + ) -> Result<(), Box> { + let mut ticker = interval(Duration::from_secs(60 * 30)); + ticker.tick().await; // bc we already have the latest schedule, when instantiating provider + + loop { + tokio::select! { + _ = ticker.tick() => { + let mut lock = self.inner.write().await; + let this= lock.deref_mut(); + + log::info!("Updating schedule..."); + + match this.updater.update(&mut this.snapshot).await { + Ok(snapshot) => { + this.snapshot = Arc::new(snapshot); + }, + + Err(err) => { + cancellation_token.cancel(); + return Err(err.into()); + } + } + } + + _ = cancellation_token.cancelled() => { + return Ok(()); + } + } + } + } + + async fn get_schedule(&self) -> Arc { + self.inner.read().await.snapshot.clone() + } +} diff --git a/schedule-parser/src/macros.rs b/providers/provider-engels-polytechnic/src/parser/macros.rs similarity index 100% rename from schedule-parser/src/macros.rs rename to providers/provider-engels-polytechnic/src/parser/macros.rs diff --git a/schedule-parser/src/lib.rs b/providers/provider-engels-polytechnic/src/parser/mod.rs similarity index 83% rename from schedule-parser/src/lib.rs rename to providers/provider-engels-polytechnic/src/parser/mod.rs index 284f585..c9eb554 100644 --- a/schedule-parser/src/lib.rs +++ b/providers/provider-engels-polytechnic/src/parser/mod.rs @@ -1,12 +1,12 @@ -use crate::LessonParseResult::{Lessons, Street}; -use crate::schema::LessonType::Break; -use crate::schema::internal::{BoundariesCellInfo, DayCellInfo, GroupCellInfo}; -use crate::schema::{ - Day, ErrorCell, ErrorCellPos, Lesson, LessonBoundaries, LessonSubGroup, LessonType, ParseError, - ParseResult, ScheduleEntry, +use crate::or_continue; +use crate::parser::error::{ErrorCell, ErrorCellPos}; +use crate::parser::worksheet::WorkSheet; +use crate::parser::LessonParseResult::{Lessons, Street}; +use base::LessonType::Break; +use base::{ + Day, Lesson, LessonBoundaries, LessonSubGroup, LessonType, ParsedSchedule, ScheduleEntry, }; -use crate::worksheet::WorkSheet; -use calamine::{Reader, Xls, open_workbook_from_rs}; +use calamine::{open_workbook_from_rs, Reader, Xls}; use chrono::{DateTime, Duration, NaiveDate, NaiveTime, Utc}; use regex::Regex; use std::collections::HashMap; @@ -14,18 +14,128 @@ use std::io::Cursor; use std::sync::LazyLock; mod macros; -pub mod schema; mod worksheet; +pub mod error { + use derive_more::{Display, Error}; + use serde::{Serialize, Serializer}; + use std::sync::Arc; + use utoipa::ToSchema; + + #[derive(Clone, Debug, Display, Error, ToSchema)] + #[display("row {row}, column {column}")] + pub struct ErrorCellPos { + pub row: u32, + pub column: u32, + } + + #[derive(Clone, Debug, Display, Error, ToSchema)] + #[display("'{data}' at {pos}")] + pub struct ErrorCell { + pub pos: ErrorCellPos, + pub data: String, + } + + impl ErrorCell { + pub fn new(row: u32, column: u32, data: String) -> Self { + Self { + pos: ErrorCellPos { row, column }, + data, + } + } + } + + #[derive(Clone, Debug, Display, Error, ToSchema)] + pub enum Error { + /// Errors related to reading XLS file. + #[display("{_0:?}: Failed to read XLS file.")] + #[schema(value_type = String)] + BadXLS(Arc), + + /// Not a single sheet was found. + #[display("No work sheets found.")] + NoWorkSheets, + + /// There are no data on the boundaries of the sheet. + #[display("There is no data on work sheet boundaries.")] + UnknownWorkSheetRange, + + /// Failed to read the beginning and end of the lesson from the cell + #[display("Failed to read lesson start and end from {_0}.")] + LessonBoundaries(ErrorCell), + + /// Not found the beginning and the end corresponding to the lesson. + #[display("No start and end times matching the lesson (at {_0}) was found.")] + LessonTimeNotFound(ErrorCellPos), + } + + impl Serialize for Error { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + match self { + Error::BadXLS(_) => serializer.serialize_str("BAD_XLS"), + Error::NoWorkSheets => serializer.serialize_str("NO_WORK_SHEETS"), + Error::UnknownWorkSheetRange => { + serializer.serialize_str("UNKNOWN_WORK_SHEET_RANGE") + } + Error::LessonBoundaries(_) => serializer.serialize_str("GLOBAL_TIME"), + Error::LessonTimeNotFound(_) => serializer.serialize_str("LESSON_TIME_NOT_FOUND"), + } + } + } +} + +/// Data cell storing the group name. +pub struct GroupCellInfo { + /// Column index. + pub column: u32, + + /// Text in the cell. + pub name: String, +} + +/// Data cell storing the line. +pub struct DayCellInfo { + /// Line index. + pub row: u32, + + /// Column index. + pub column: u32, + + /// Day name. + pub name: String, + + /// Date of the day. + pub date: DateTime, +} + +/// Data on the time of lessons from the second column of the schedule. +pub struct BoundariesCellInfo { + /// Temporary segment of the lesson. + pub time_range: LessonBoundaries, + + /// Type of lesson. + pub lesson_type: LessonType, + + /// The lesson index. + pub default_index: Option, + + /// The frame of the cell. + pub xls_range: ((u32, u32), (u32, u32)), +} /// Obtaining a "skeleton" schedule from the working sheet. fn parse_skeleton( worksheet: &WorkSheet, -) -> Result<(Vec, Vec), ParseError> { +) -> Result<(Vec, Vec), crate::parser::error::Error> { let mut groups: Vec = Vec::new(); let mut days: Vec<(u32, String, Option>)> = Vec::new(); - let worksheet_start = worksheet.start().ok_or(ParseError::UnknownWorkSheetRange)?; - let worksheet_end = worksheet.end().ok_or(ParseError::UnknownWorkSheetRange)?; + let worksheet_start = worksheet + .start() + .ok_or(error::Error::UnknownWorkSheetRange)?; + let worksheet_end = worksheet.end().ok_or(error::Error::UnknownWorkSheetRange)?; let mut row = worksheet_start.0; @@ -42,7 +152,8 @@ fn parse_skeleton( for column in (worksheet_start.1 + 2)..=worksheet_end.1 { groups.push(GroupCellInfo { column, - name: or_continue!(worksheet.get_string_from_cell(row, column)), + name: or_continue!(worksheet.get_string_from_cell(row, column)) + .replace(" ", ""), }); } @@ -152,7 +263,7 @@ fn parse_lesson( day_boundaries: &Vec, lesson_boundaries: &BoundariesCellInfo, group_column: u32, -) -> Result { +) -> Result { let row = lesson_boundaries.xls_range.0.0; let name = { @@ -179,13 +290,12 @@ fn parse_lesson( .filter(|time| time.xls_range.1.0 == cell_range.1.0) .collect::>(); - let end_time = - end_time_arr - .first() - .ok_or(ParseError::LessonTimeNotFound(ErrorCellPos { - row, - column: group_column, - }))?; + let end_time = end_time_arr + .first() + .ok_or(error::Error::LessonTimeNotFound(ErrorCellPos { + row, + column: group_column, + }))?; let range: Option<[u8; 2]> = if lesson_boundaries.default_index != None { let default = lesson_boundaries.default_index.unwrap() as u8; @@ -310,7 +420,8 @@ fn parse_cabinets(worksheet: &WorkSheet, row_range: (u32, u32), column: u32) -> /// Getting the "pure" name of the lesson and list of teachers from the text of the lesson cell. fn parse_name_and_subgroups( text: &String, -) -> Result<(String, Vec>, Option), ParseError> { +) -> Result<(String, Vec>, Option), crate::parser::error::Error> +{ // Части названия пары: // 1. Само название. // 2. Список преподавателей и подгрупп. @@ -486,7 +597,7 @@ fn parse_day_boundaries( date: DateTime, row_range: (u32, u32), column: u32, -) -> Result, ParseError> { +) -> Result, crate::parser::error::Error> { let mut day_times: Vec = Vec::new(); for row in row_range.0..row_range.1 { @@ -497,7 +608,7 @@ fn parse_day_boundaries( }; let lesson_time = parse_lesson_boundaries_cell(&time_cell, date.clone()).ok_or( - ParseError::LessonBoundaries(ErrorCell::new(row, column, time_cell.clone())), + error::Error::LessonBoundaries(ErrorCell::new(row, column, time_cell.clone())), )?; // type @@ -542,7 +653,7 @@ fn parse_day_boundaries( fn parse_week_boundaries( worksheet: &WorkSheet, week_markup: &Vec, -) -> Result>, ParseError> { +) -> Result>, crate::parser::error::Error> { let mut result: Vec> = Vec::new(); let worksheet_end_row = worksheet.end().unwrap().0; @@ -662,7 +773,7 @@ fn convert_groups_to_teachers( /// /// * `buffer`: XLS data containing schedule. /// -/// returns: Result +/// returns: Result /// /// # Examples /// @@ -676,21 +787,21 @@ fn convert_groups_to_teachers( /// assert_ne!(result.as_ref().unwrap().groups.len(), 0); /// assert_ne!(result.as_ref().unwrap().teachers.len(), 0); /// ``` -pub fn parse_xls(buffer: &Vec) -> Result { +pub fn parse_xls(buffer: &Vec) -> Result { let cursor = Cursor::new(&buffer); let mut workbook: Xls<_> = - open_workbook_from_rs(cursor).map_err(|e| ParseError::BadXLS(std::sync::Arc::new(e)))?; + open_workbook_from_rs(cursor).map_err(|e| error::Error::BadXLS(std::sync::Arc::new(e)))?; let worksheet = { let (worksheet_name, worksheet) = workbook .worksheets() .first() - .ok_or(ParseError::NoWorkSheets)? + .ok_or(error::Error::NoWorkSheets)? .clone(); let worksheet_merges = workbook .worksheet_merge_cells(&*worksheet_name) - .ok_or(ParseError::NoWorkSheets)?; + .ok_or(error::Error::NoWorkSheets)?; WorkSheet { data: worksheet, @@ -740,18 +851,19 @@ pub fn parse_xls(buffer: &Vec) -> Result { groups.insert(group.name.clone(), group); } - Ok(ParseResult { + Ok(ParsedSchedule { teachers: convert_groups_to_teachers(&groups), groups, }) } -#[cfg(any(test, feature = "test-utils"))] +#[cfg(any(test, feature = "test"))] pub mod test_utils { use super::*; + use base::ParsedSchedule; - pub fn test_result() -> Result { - parse_xls(&include_bytes!("../../schedule.xls").to_vec()) + pub fn test_result() -> Result { + parse_xls(&include_bytes!("../../../../test-data/engels-polytechnic.xls").to_vec()) } } diff --git a/schedule-parser/src/worksheet.rs b/providers/provider-engels-polytechnic/src/parser/worksheet.rs similarity index 100% rename from schedule-parser/src/worksheet.rs rename to providers/provider-engels-polytechnic/src/parser/worksheet.rs diff --git a/providers/provider-engels-polytechnic/src/updater.rs b/providers/provider-engels-polytechnic/src/updater.rs new file mode 100644 index 0000000..3214974 --- /dev/null +++ b/providers/provider-engels-polytechnic/src/updater.rs @@ -0,0 +1,263 @@ +use crate::parser::parse_xls; +use crate::updater::error::{Error, QueryUrlError, SnapshotCreationError}; +use crate::xls_downloader::{FetchError, XlsDownloader}; +use base::ScheduleSnapshot; + +pub enum UpdateSource { + Prepared(ScheduleSnapshot), + + Url(String), + + GrabFromSite { + yandex_api_key: String, + yandex_func_id: String, + }, +} + +pub struct Updater { + downloader: XlsDownloader, + update_source: UpdateSource, +} + +pub mod error { + use crate::xls_downloader::FetchError; + use derive_more::{Display, Error}; + + #[derive(Debug, Display, Error)] + pub enum Error { + /// An error occurred while querying the Yandex Cloud API for a URL. + /// + /// This may result from network failures, invalid API credentials, or issues with the Yandex Cloud Function invocation. + /// See [`QueryUrlError`] for more details about specific causes. + QueryUrlFailed(QueryUrlError), + + /// The schedule snapshot creation process failed. + /// + /// This can happen due to URL conflicts (same URL already in use), failed network requests, + /// download errors, or invalid XLS file content. See [`SnapshotCreationError`] for details. + SnapshotCreationFailed(SnapshotCreationError), + } + /// Errors that may occur when querying the Yandex Cloud API to retrieve a URL. + #[derive(Debug, Display, Error)] + pub enum QueryUrlError { + /// Occurs when the request to the Yandex Cloud API fails. + /// + /// This may be due to network issues, invalid API key, incorrect function ID, or other + /// problems with the Yandex Cloud Function invocation. + #[display("An error occurred during the request to the Yandex Cloud API: {_0}")] + RequestFailed(reqwest::Error), + } + + /// Errors that may occur during the creation of a schedule snapshot. + #[derive(Debug, Display, Error)] + pub enum SnapshotCreationError { + /// The URL is the same as the one already being used (no update needed). + #[display("The URL is the same as the one already being used.")] + SameUrl, + + /// The URL query for the XLS file failed to execute, either due to network issues or invalid API parameters. + #[display("Failed to fetch URL: {_0}")] + FetchFailed(FetchError), + + /// Downloading the XLS file content failed after successfully obtaining the URL. + #[display("Download failed: {_0}")] + DownloadFailed(FetchError), + + /// The XLS file could not be parsed into a valid schedule format. + #[display("Schedule data is invalid: {_0}")] + InvalidSchedule(crate::parser::error::Error), + } +} + +impl Updater { + /// Constructs a new `ScheduleSnapshot` by downloading and parsing schedule data from the specified URL. + /// + /// This method first checks if the provided URL is the same as the one already configured in the downloader. + /// If different, it updates the downloader's URL, fetches the XLS content, parses it, and creates a snapshot. + /// Errors are returned for URL conflicts, network issues, download failures, or invalid data. + /// + /// # Arguments + /// + /// * `downloader`: A mutable reference to an `XLSDownloader` implementation used to fetch and parse the schedule data. + /// * `url`: The source URL pointing to the XLS file containing schedule data. + /// + /// returns: Result + pub async fn new_snapshot( + downloader: &mut XlsDownloader, + url: String, + ) -> Result { + if downloader.url.as_ref().is_some_and(|_url| _url.eq(&url)) { + return Err(SnapshotCreationError::SameUrl); + } + + let head_result = downloader.set_url(&*url).await.map_err(|error| { + if let FetchError::Unknown(error) = &error { + sentry::capture_error(&error); + } + + SnapshotCreationError::FetchFailed(error) + })?; + + let xls_data = downloader + .fetch(false) + .await + .map_err(|error| { + if let FetchError::Unknown(error) = &error { + sentry::capture_error(&error); + } + + SnapshotCreationError::DownloadFailed(error) + })? + .data + .unwrap(); + + let parse_result = parse_xls(&xls_data).map_err(|error| { + sentry::capture_error(&error); + + SnapshotCreationError::InvalidSchedule(error) + })?; + + Ok(ScheduleSnapshot { + fetched_at: head_result.requested_at, + updated_at: head_result.uploaded_at, + url, + data: parse_result, + }) + } + + /// Queries the Yandex Cloud Function (FaaS) to obtain a URL for the schedule file. + /// + /// This sends a POST request to the specified Yandex Cloud Function endpoint, + /// using the provided API key for authentication. The returned URI is combined + /// with the "https://politehnikum-eng.ru" base domain to form the complete URL. + /// + /// # Arguments + /// + /// * `api_key` - Authentication token for Yandex Cloud API + /// * `func_id` - ID of the target Yandex Cloud Function to invoke + /// + /// # Returns + /// + /// Result containing: + /// - `Ok(String)` - Complete URL constructed from the Function's response + /// - `Err(QueryUrlError)` - If the request or response processing fails + async fn query_url(api_key: &str, func_id: &str) -> Result { + let client = reqwest::Client::new(); + + let uri = client + .post(format!( + "https://functions.yandexcloud.net/{}?integration=raw", + func_id + )) + .header("Authorization", format!("Api-Key {}", api_key)) + .send() + .await + .map_err(|error| QueryUrlError::RequestFailed(error))? + .text() + .await + .map_err(|error| QueryUrlError::RequestFailed(error))?; + + Ok(format!("https://politehnikum-eng.ru{}", uri.trim())) + } + + /// Initializes the schedule by fetching the URL from the environment or Yandex Cloud Function (FaaS) + /// and creating a [`ScheduleSnapshot`] with the downloaded data. + /// + /// # Arguments + /// + /// * `downloader`: Mutable reference to an `XLSDownloader` implementation used to fetch and parse the schedule + /// * `app_env`: Reference to the application environment containing either a predefined URL or Yandex Cloud credentials + /// + /// # Returns + /// + /// Returns `Ok(())` if the snapshot was successfully initialized, or an `Error` if: + /// - URL query to Yandex Cloud failed ([`QueryUrlError`]) + /// - Schedule snapshot creation failed ([`SnapshotCreationError`]) + pub async fn new(update_source: UpdateSource) -> Result<(Self, ScheduleSnapshot), Error> { + let mut this = Updater { + downloader: XlsDownloader::new(), + update_source, + }; + + if let UpdateSource::Prepared(snapshot) = &this.update_source { + let snapshot = snapshot.clone(); + return Ok((this, snapshot)); + } + + let url = match &this.update_source { + UpdateSource::Url(url) => { + log::info!("The default link {} will be used", url); + url.clone() + } + UpdateSource::GrabFromSite { + yandex_api_key, + yandex_func_id, + } => { + log::info!("Obtaining a link using FaaS..."); + Self::query_url(yandex_api_key, yandex_func_id) + .await + .map_err(|error| Error::QueryUrlFailed(error))? + } + _ => unreachable!(), + }; + + log::info!("For the initial setup, a link {} will be used", url); + + let snapshot = Self::new_snapshot(&mut this.downloader, url) + .await + .map_err(|error| Error::SnapshotCreationFailed(error))?; + + log::info!("Schedule snapshot successfully created!"); + + Ok((this, snapshot)) + } + + /// Updates the schedule snapshot by querying the latest URL from FaaS and checking for changes. + /// If the URL hasn't changed, only updates the [`fetched_at`] timestamp. If changed, downloads + /// and parses the new schedule data. + /// + /// # Arguments + /// + /// * `downloader`: XLS file downloader used to fetch and parse the schedule data + /// * `app_env`: Application environment containing Yandex Cloud configuration and auto-update settings + /// + /// returns: `Result<(), Error>` - Returns error if URL query fails or schedule parsing encounters issues + /// + /// # Safety + /// + /// Use `unsafe` to access the initialized snapshot, guaranteed valid by prior `init()` call + pub async fn update( + &mut self, + current_snapshot: &ScheduleSnapshot, + ) -> Result { + if let UpdateSource::Prepared(snapshot) = &self.update_source { + let mut snapshot = snapshot.clone(); + snapshot.update(); + return Ok(snapshot); + } + + let url = match &self.update_source { + UpdateSource::Url(url) => url.clone(), + UpdateSource::GrabFromSite { + yandex_api_key, + yandex_func_id, + } => Self::query_url(yandex_api_key.as_str(), yandex_func_id.as_str()) + .await + .map_err(|error| Error::QueryUrlFailed(error))?, + _ => unreachable!(), + }; + + let snapshot = match Self::new_snapshot(&mut self.downloader, url).await { + Ok(snapshot) => snapshot, + Err(SnapshotCreationError::SameUrl) => { + let mut clone = current_snapshot.clone(); + clone.update(); + + clone + } + Err(error) => return Err(Error::SnapshotCreationFailed(error)), + }; + + Ok(snapshot) + } +} diff --git a/providers/provider-engels-polytechnic/src/xls_downloader.rs b/providers/provider-engels-polytechnic/src/xls_downloader.rs new file mode 100644 index 0000000..c5e465b --- /dev/null +++ b/providers/provider-engels-polytechnic/src/xls_downloader.rs @@ -0,0 +1,237 @@ +use chrono::{DateTime, Utc}; +use derive_more::{Display, Error}; +use std::mem::discriminant; +use std::sync::Arc; +use utoipa::ToSchema; + +/// XLS data retrieval errors. +#[derive(Clone, Debug, ToSchema, Display, Error)] +pub enum FetchError { + /// File url is not set. + #[display("The link to the timetable was not provided earlier.")] + NoUrlProvided, + + /// Unknown error. + #[display("An unknown error occurred while downloading the file.")] + #[schema(value_type = String)] + Unknown(Arc), + + /// Server returned a status code different from 200. + #[display("Server returned a status code {status_code}.")] + BadStatusCode { status_code: u16 }, + + /// The url leads to a file of a different type. + #[display("The link leads to a file of type '{content_type}'.")] + BadContentType { content_type: String }, + + /// Server doesn't return expected headers. + #[display("Server doesn't return expected header(s) '{expected_header}'.")] + BadHeaders { expected_header: String }, +} + +impl FetchError { + pub fn unknown(error: Arc) -> Self { + Self::Unknown(error) + } + + pub fn bad_status_code(status_code: u16) -> Self { + Self::BadStatusCode { status_code } + } + + pub fn bad_content_type(content_type: &str) -> Self { + Self::BadContentType { + content_type: content_type.to_string(), + } + } + + pub fn bad_headers(expected_header: &str) -> Self { + Self::BadHeaders { + expected_header: expected_header.to_string(), + } + } +} + +impl PartialEq for FetchError { + fn eq(&self, other: &Self) -> bool { + discriminant(self) == discriminant(other) + } +} + +/// Result of XLS data retrieval. +#[derive(Debug, PartialEq)] +pub struct FetchOk { + /// File upload date. + pub uploaded_at: DateTime, + + /// Date data received. + pub requested_at: DateTime, + + /// File data. + pub data: Option>, +} + +impl FetchOk { + /// Result without file content. + pub fn head(uploaded_at: DateTime) -> Self { + FetchOk { + uploaded_at, + requested_at: Utc::now(), + data: None, + } + } + + /// Full result. + pub fn get(uploaded_at: DateTime, data: Vec) -> Self { + FetchOk { + uploaded_at, + requested_at: Utc::now(), + data: Some(data), + } + } +} + +pub type FetchResult = Result; + +pub struct XlsDownloader { + pub url: Option, +} + +impl XlsDownloader { + pub fn new() -> Self { + XlsDownloader { url: None } + } + + async fn fetch_specified(url: &str, head: bool) -> FetchResult { + let client = reqwest::Client::new(); + + let response = if head { + client.head(url) + } else { + client.get(url) + } + .header("User-Agent", ua_generator::ua::spoof_chrome_ua()) + .send() + .await + .map_err(|e| FetchError::unknown(Arc::new(e)))?; + + if response.status().as_u16() != 200 { + return Err(FetchError::bad_status_code(response.status().as_u16())); + } + + let headers = response.headers(); + + let content_type = headers + .get("Content-Type") + .ok_or(FetchError::bad_headers("Content-Type"))?; + + if !headers.contains_key("etag") { + return Err(FetchError::bad_headers("etag")); + } + + let last_modified = headers + .get("last-modified") + .ok_or(FetchError::bad_headers("last-modified"))?; + + if content_type != "application/vnd.ms-excel" { + return Err(FetchError::bad_content_type(content_type.to_str().unwrap())); + } + + let last_modified = DateTime::parse_from_rfc2822(&last_modified.to_str().unwrap()) + .unwrap() + .with_timezone(&Utc); + + Ok(if head { + FetchOk::head(last_modified) + } else { + FetchOk::get(last_modified, response.bytes().await.unwrap().to_vec()) + }) + } + + pub async fn fetch(&self, head: bool) -> FetchResult { + if self.url.is_none() { + Err(FetchError::NoUrlProvided) + } else { + Self::fetch_specified(&*self.url.as_ref().unwrap(), head).await + } + } + + pub async fn set_url(&mut self, url: &str) -> FetchResult { + let result = Self::fetch_specified(url, true).await; + + if let Ok(_) = result { + self.url = Some(url.to_string()); + } + + result + } +} + +#[cfg(test)] +mod tests { + use crate::xls_downloader::{FetchError, XlsDownloader}; + + #[tokio::test] + async fn bad_url() { + let url = "bad_url"; + + let mut downloader = XlsDownloader::new(); + assert!(downloader.set_url(url).await.is_err()); + } + + #[tokio::test] + async fn bad_status_code() { + let url = "https://www.google.com/not-found"; + + let mut downloader = XlsDownloader::new(); + assert_eq!( + downloader.set_url(url).await, + Err(FetchError::bad_status_code(404)) + ); + } + + #[tokio::test] + async fn bad_headers() { + let url = "https://www.google.com/favicon.ico"; + + let mut downloader = XlsDownloader::new(); + assert_eq!( + downloader.set_url(url).await, + Err(FetchError::BadHeaders { + expected_header: "ETag".to_string(), + }) + ); + } + + #[tokio::test] + async fn bad_content_type() { + let url = "https://s3.aero-storage.ldragol.ru/679e5d1145a6ad00843ad3f1/67ddb59fd46303008396ac96%2Fexample.txt"; + + let mut downloader = XlsDownloader::new(); + assert!(downloader.set_url(url).await.is_err()); + } + + #[tokio::test] + async fn ok() { + let url = "https://s3.aero-storage.ldragol.ru/679e5d1145a6ad00843ad3f1/67ddb5fad46303008396ac97%2Fschedule.xls"; + + let mut downloader = XlsDownloader::new(); + assert!(downloader.set_url(url).await.is_ok()); + } + + #[tokio::test] + async fn downloader_ok() { + let url = "https://s3.aero-storage.ldragol.ru/679e5d1145a6ad00843ad3f1/67ddb5fad46303008396ac97%2Fschedule.xls"; + + let mut downloader = XlsDownloader::new(); + assert!(downloader.set_url(url).await.is_ok()); + assert!(downloader.fetch(false).await.is_ok()); + } + + #[tokio::test] + async fn downloader_no_url_provided() { + let downloader = XlsDownloader::new(); + + let result = downloader.fetch(false).await; + assert_eq!(result, Err(FetchError::NoUrlProvided)); + } +} diff --git a/providers/src/lib.rs b/providers/src/lib.rs new file mode 100644 index 0000000..1642c36 --- /dev/null +++ b/providers/src/lib.rs @@ -0,0 +1,9 @@ +pub use base; + +pub use provider_engels_polytechnic::EngelsPolytechnicProvider; +pub use provider_engels_polytechnic::UpdateSource as EngelsPolytechnicUpdateSource; + +#[cfg(feature = "test")] +pub mod test_utils { + pub use provider_engels_polytechnic::test_utils as engels_polytechnic; +} diff --git a/schedule-parser/Cargo.toml b/schedule-parser/Cargo.toml deleted file mode 100644 index 903238c..0000000 --- a/schedule-parser/Cargo.toml +++ /dev/null @@ -1,27 +0,0 @@ -[package] -name = "schedule-parser" -version = "0.1.0" -edition = "2024" - -[features] -test-utils = [] - -[dependencies] -# because original repo is using yanked zip dependency -calamine = { git = "https://github.com/prophittcorey/calamine.git", branch = "fix/zip-3.0" } -chrono = { version = "0.4", features = ["serde"] } -derive_more = { version = "2", features = ["full"] } -sentry = "0.39" -serde = { version = "1.0.219", features = ["derive"] } -serde_repr = "0.1.20" -regex = "1.11.1" -utoipa = { version = "5", features = ["chrono"] } -strsim = "0.11.1" -log = "0.4.26" - -[dev-dependencies] -criterion = "0.6" - -[[bench]] -name = "parse" -harness = false \ No newline at end of file diff --git a/schedule-parser/benches/parse.rs b/schedule-parser/benches/parse.rs deleted file mode 100644 index 2889f6a..0000000 --- a/schedule-parser/benches/parse.rs +++ /dev/null @@ -1,12 +0,0 @@ -use criterion::{Criterion, criterion_group, criterion_main}; - -use schedule_parser::parse_xls; - -pub fn bench_parse_xls(c: &mut Criterion) { - let buffer: Vec = include_bytes!("../../schedule.xls").to_vec(); - - c.bench_function("parse_xls", |b| b.iter(|| parse_xls(&buffer).unwrap())); -} - -criterion_group!(benches, bench_parse_xls); -criterion_main!(benches); diff --git a/schedule-parser/src/schema.rs b/schedule-parser/src/schema.rs deleted file mode 100644 index 176b8fb..0000000 --- a/schedule-parser/src/schema.rs +++ /dev/null @@ -1,227 +0,0 @@ -use chrono::{DateTime, Utc}; -use derive_more::{Display, Error}; -use serde::{Deserialize, Serialize, Serializer}; -use serde_repr::{Deserialize_repr, Serialize_repr}; -use std::collections::HashMap; -use std::sync::Arc; -use utoipa::ToSchema; - -pub(crate) mod internal { - use crate::schema::{LessonBoundaries, LessonType}; - use chrono::{DateTime, Utc}; - - /// Data cell storing the group name. - pub struct GroupCellInfo { - /// Column index. - pub column: u32, - - /// Text in the cell. - pub name: String, - } - - /// Data cell storing the line. - pub struct DayCellInfo { - /// Line index. - pub row: u32, - - /// Column index. - pub column: u32, - - /// Day name. - pub name: String, - - /// Date of the day. - pub date: DateTime, - } - - /// Data on the time of lessons from the second column of the schedule. - pub struct BoundariesCellInfo { - /// Temporary segment of the lesson. - pub time_range: LessonBoundaries, - - /// Type of lesson. - pub lesson_type: LessonType, - - /// The lesson index. - pub default_index: Option, - - /// The frame of the cell. - pub xls_range: ((u32, u32), (u32, u32)), - } -} - -/// The beginning and end of the lesson. -#[derive(Clone, Hash, Debug, Serialize, Deserialize, ToSchema)] -pub struct LessonBoundaries { - /// The beginning of a lesson. - pub start: DateTime, - - /// The end of the lesson. - pub end: DateTime, -} - -/// Type of lesson. -#[derive(Clone, Hash, PartialEq, Debug, Serialize_repr, Deserialize_repr, ToSchema)] -#[serde(rename_all = "SCREAMING_SNAKE_CASE")] -#[repr(u8)] -pub enum LessonType { - /// Обычная. - Default = 0, - - /// Допы. - Additional, - - /// Перемена. - Break, - - /// Консультация. - Consultation, - - /// Самостоятельная работа. - IndependentWork, - - /// Зачёт. - Exam, - - /// Зачёт с оценкой. - ExamWithGrade, - - /// Экзамен. - ExamDefault, - - /// Курсовой проект. - CourseProject, - - /// Защита курсового проекта. - CourseProjectDefense, -} - -#[derive(Clone, Hash, Debug, Serialize, Deserialize, ToSchema)] -pub struct LessonSubGroup { - /// Cabinet, if present. - pub cabinet: Option, - - /// Full name of the teacher. - pub teacher: Option, -} - -#[derive(Clone, Hash, Debug, Serialize, Deserialize, ToSchema)] -#[serde(rename_all = "camelCase")] -pub struct Lesson { - /// Type. - #[serde(rename = "type")] - pub lesson_type: LessonType, - - /// Lesson indexes, if present. - pub range: Option<[u8; 2]>, - - /// Name. - pub name: Option, - - /// The beginning and end. - pub time: LessonBoundaries, - - /// List of subgroups. - #[serde(rename = "subgroups")] - pub subgroups: Option>>, - - /// Group name, if this is a schedule for teachers. - pub group: Option, -} - -#[derive(Clone, Hash, Debug, Serialize, Deserialize, ToSchema)] -pub struct Day { - /// Day of the week. - pub name: String, - - /// Address of another corps. - pub street: Option, - - /// Date. - pub date: DateTime, - - /// List of lessons on this day. - pub lessons: Vec, -} - -#[derive(Clone, Hash, Debug, Serialize, Deserialize, ToSchema)] -pub struct ScheduleEntry { - /// The name of the group or name of the teacher. - pub name: String, - - /// List of six days. - pub days: Vec, -} - -#[derive(Clone)] -pub struct ParseResult { - /// List of groups. - pub groups: HashMap, - - /// List of teachers. - pub teachers: HashMap, -} - -#[derive(Clone, Debug, Display, Error, ToSchema)] -#[display("row {row}, column {column}")] -pub struct ErrorCellPos { - pub row: u32, - pub column: u32, -} - -#[derive(Clone, Debug, Display, Error, ToSchema)] -#[display("'{data}' at {pos}")] -pub struct ErrorCell { - pub pos: ErrorCellPos, - pub data: String, -} - -impl ErrorCell { - pub fn new(row: u32, column: u32, data: String) -> Self { - Self { - pos: ErrorCellPos { row, column }, - data, - } - } -} - -#[derive(Clone, Debug, Display, Error, ToSchema)] -pub enum ParseError { - /// Errors related to reading XLS file. - #[display("{_0:?}: Failed to read XLS file.")] - #[schema(value_type = String)] - BadXLS(Arc), - - /// Not a single sheet was found. - #[display("No work sheets found.")] - NoWorkSheets, - - /// There are no data on the boundaries of the sheet. - #[display("There is no data on work sheet boundaries.")] - UnknownWorkSheetRange, - - /// Failed to read the beginning and end of the lesson from the cell - #[display("Failed to read lesson start and end from {_0}.")] - LessonBoundaries(ErrorCell), - - /// Not found the beginning and the end corresponding to the lesson. - #[display("No start and end times matching the lesson (at {_0}) was found.")] - LessonTimeNotFound(ErrorCellPos), -} - -impl Serialize for ParseError { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - match self { - ParseError::BadXLS(_) => serializer.serialize_str("BAD_XLS"), - ParseError::NoWorkSheets => serializer.serialize_str("NO_WORK_SHEETS"), - ParseError::UnknownWorkSheetRange => { - serializer.serialize_str("UNKNOWN_WORK_SHEET_RANGE") - } - ParseError::LessonBoundaries(_) => serializer.serialize_str("GLOBAL_TIME"), - ParseError::LessonTimeNotFound(_) => serializer.serialize_str("LESSON_TIME_NOT_FOUND"), - } - } -} diff --git a/src/extractors/authorized_user.rs b/src/extractors/authorized_user.rs index e0d7741..24567e4 100644 --- a/src/extractors/authorized_user.rs +++ b/src/extractors/authorized_user.rs @@ -1,5 +1,5 @@ use crate::database::driver; -use crate::database::models::{FCM, User}; +use crate::database::models::{User, FCM}; use crate::extractors::base::{AsyncExtractor, FromRequestAsync}; use crate::state::AppState; use crate::utility::jwt; @@ -7,7 +7,7 @@ use actix_macros::MiddlewareError; use actix_web::body::BoxBody; use actix_web::dev::Payload; use actix_web::http::header; -use actix_web::{FromRequest, HttpRequest, web}; +use actix_web::{web, FromRequest, HttpRequest}; use derive_more::Display; use serde::{Deserialize, Serialize}; use std::fmt::Debug; diff --git a/src/main.rs b/src/main.rs index 4620572..1b11485 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,8 +14,6 @@ mod state; mod database; -mod xls_downloader; - mod extractors; mod middlewares; mod routes; diff --git a/src/routes/auth/sign_up.rs b/src/routes/auth/sign_up.rs index 6245709..67226c4 100644 --- a/src/routes/auth/sign_up.rs +++ b/src/routes/auth/sign_up.rs @@ -18,8 +18,9 @@ async fn sign_up_combined( } if !app_state - .get_schedule_snapshot() + .get_schedule_snapshot("eng_polytechnic") .await + .unwrap() .data .groups .contains_key(&data.group) diff --git a/src/routes/flow/telegram_complete.rs b/src/routes/flow/telegram_complete.rs index 5ce1678..4485aa4 100644 --- a/src/routes/flow/telegram_complete.rs +++ b/src/routes/flow/telegram_complete.rs @@ -40,8 +40,9 @@ pub async fn telegram_complete( // проверка на существование группы if !app_state - .get_schedule_snapshot() + .get_schedule_snapshot("eng_polytechnic") .await + .unwrap() .data .groups .contains_key(&data.group) diff --git a/src/routes/schedule/group.rs b/src/routes/schedule/group.rs index a9d3f0c..943197e 100644 --- a/src/routes/schedule/group.rs +++ b/src/routes/schedule/group.rs @@ -31,8 +31,9 @@ pub async fn group(user: AsyncExtractor, app_state: web::Data) - None => Err(ErrorCode::SignUpNotCompleted), Some(group) => match app_state - .get_schedule_snapshot() + .get_schedule_snapshot("eng_polytechnic") .await + .unwrap() .data .groups .get(group) diff --git a/src/routes/schedule/group_names.rs b/src/routes/schedule/group_names.rs index 50275c0..c9ab94f 100644 --- a/src/routes/schedule/group_names.rs +++ b/src/routes/schedule/group_names.rs @@ -6,8 +6,9 @@ use actix_web::{get, web}; #[get("/group-names")] pub async fn group_names(app_state: web::Data) -> Response { let mut names: Vec = app_state - .get_schedule_snapshot() + .get_schedule_snapshot("eng_polytechnic") .await + .unwrap() .data .groups .keys() diff --git a/src/routes/schedule/schema.rs b/src/routes/schedule/schema.rs index 463d088..6aa5219 100644 --- a/src/routes/schedule/schema.rs +++ b/src/routes/schedule/schema.rs @@ -1,7 +1,7 @@ -use crate::state::{AppState, ScheduleSnapshot}; +use crate::state::AppState; use actix_macros::{OkResponse, ResponderJson}; use actix_web::web; -use schedule_parser::schema::ScheduleEntry; +use providers::base::{ScheduleEntry, ScheduleSnapshot}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::ops::Deref; @@ -32,7 +32,12 @@ impl From for ScheduleEntryResponse { impl ScheduleView { pub async fn from(app_state: &web::Data) -> Self { - let schedule = app_state.get_schedule_snapshot().await.clone(); + let schedule = app_state + .get_schedule_snapshot("eng_polytechnic") + .await + .unwrap() + .deref() + .clone(); Self { url: schedule.url, @@ -60,7 +65,13 @@ pub struct CacheStatus { impl CacheStatus { pub async fn from(value: &web::Data) -> Self { - From::<&ScheduleSnapshot>::from(value.get_schedule_snapshot().await.deref()) + From::<&ScheduleSnapshot>::from( + value + .get_schedule_snapshot("eng_polytechnic") + .await + .unwrap() + .deref(), + ) } } diff --git a/src/routes/schedule/teacher.rs b/src/routes/schedule/teacher.rs index 36fd9a4..06e3f76 100644 --- a/src/routes/schedule/teacher.rs +++ b/src/routes/schedule/teacher.rs @@ -2,7 +2,7 @@ use self::schema::*; use crate::AppState; use crate::routes::schema::ResponseError; use actix_web::{get, web}; -use schedule_parser::schema::ScheduleEntry; +use providers::base::ScheduleEntry; #[utoipa::path(responses( (status = OK, body = ScheduleEntry), @@ -18,8 +18,9 @@ use schedule_parser::schema::ScheduleEntry; #[get("/teacher/{name}")] pub async fn teacher(name: web::Path, app_state: web::Data) -> ServiceResponse { match app_state - .get_schedule_snapshot() + .get_schedule_snapshot("eng_polytechnic") .await + .unwrap() .data .teachers .get(&name.into_inner()) diff --git a/src/routes/schedule/teacher_names.rs b/src/routes/schedule/teacher_names.rs index 9a4e335..d35f1eb 100644 --- a/src/routes/schedule/teacher_names.rs +++ b/src/routes/schedule/teacher_names.rs @@ -6,8 +6,9 @@ use actix_web::{get, web}; #[get("/teacher-names")] pub async fn teacher_names(app_state: web::Data) -> Response { let mut names: Vec = app_state - .get_schedule_snapshot() + .get_schedule_snapshot("eng_polytechnic") .await + .unwrap() .data .teachers .keys() diff --git a/src/routes/users/change_group.rs b/src/routes/users/change_group.rs index 31a7008..28b11f7 100644 --- a/src/routes/users/change_group.rs +++ b/src/routes/users/change_group.rs @@ -19,8 +19,9 @@ pub async fn change_group( } if !app_state - .get_schedule_snapshot() + .get_schedule_snapshot("eng_polytechnic") .await + .unwrap() .data .groups .contains_key(&data.group) diff --git a/src/state/env/mod.rs b/src/state/env/mod.rs index 25245dd..0e2c9db 100644 --- a/src/state/env/mod.rs +++ b/src/state/env/mod.rs @@ -1,11 +1,15 @@ pub mod schedule; pub mod telegram; pub mod vk_id; + +#[cfg(not(test))] pub mod yandex_cloud; pub use self::schedule::ScheduleEnvData; pub use self::telegram::TelegramEnvData; pub use self::vk_id::VkIdEnvData; + +#[cfg(not(test))] pub use self::yandex_cloud::YandexCloudEnvData; #[derive(Default)] @@ -13,5 +17,7 @@ pub struct AppEnv { pub schedule: ScheduleEnvData, pub telegram: TelegramEnvData, pub vk_id: VkIdEnvData, + + #[cfg(not(test))] pub yandex_cloud: YandexCloudEnvData, } diff --git a/src/state/env/schedule.rs b/src/state/env/schedule.rs index 8ccc48e..74b2ae5 100644 --- a/src/state/env/schedule.rs +++ b/src/state/env/schedule.rs @@ -2,6 +2,7 @@ use std::env; #[derive(Clone)] pub struct ScheduleEnvData { + #[cfg(not(test))] pub url: Option, pub auto_update: bool, } @@ -9,6 +10,7 @@ pub struct ScheduleEnvData { impl Default for ScheduleEnvData { fn default() -> Self { Self { + #[cfg(not(test))] url: env::var("SCHEDULE_INIT_URL").ok(), auto_update: !env::var("SCHEDULE_DISABLE_AUTO_UPDATE") .is_ok_and(|v| v.eq("1") || v.eq("true")), diff --git a/src/state/mod.rs b/src/state/mod.rs index 7e1fee2..8892685 100644 --- a/src/state/mod.rs +++ b/src/state/mod.rs @@ -1,69 +1,88 @@ mod env; mod fcm_client; -mod schedule; +pub use crate::state::env::AppEnv; use crate::state::fcm_client::FCMClientData; -use crate::xls_downloader::basic_impl::BasicXlsDownloader; use actix_web::web; use diesel::{Connection, PgConnection}; use firebase_messaging_rs::FCMClient; -use std::ops::DerefMut; -use tokio::sync::{MappedMutexGuard, Mutex, MutexGuard}; - -pub use self::schedule::{Schedule, ScheduleSnapshot}; -pub use crate::state::env::AppEnv; +use providers::base::{ScheduleProvider, ScheduleSnapshot}; +use std::collections::HashMap; +use std::sync::Arc; +use tokio::sync::{Mutex, MutexGuard}; +use tokio_util::sync::CancellationToken; /// Common data provided to endpoints. pub struct AppState { + cancel_token: CancellationToken, database: Mutex, - downloader: Mutex, - schedule: Mutex, + providers: HashMap>, env: AppEnv, fcm_client: Option>, } impl AppState { - pub async fn new() -> Result { + pub async fn new() -> Result> { let database_url = std::env::var("DATABASE_URL").expect("DATABASE_URL must be set"); - let mut _self = Self { - downloader: Mutex::new(BasicXlsDownloader::new()), + let env = AppEnv::default(); + let providers: HashMap> = HashMap::from([( + "eng_polytechnic".to_string(), + providers::EngelsPolytechnicProvider::new({ + #[cfg(test)] + { + providers::EngelsPolytechnicUpdateSource::Prepared(ScheduleSnapshot { + url: "".to_string(), + fetched_at: chrono::DateTime::default(), + updated_at: chrono::DateTime::default(), + data: providers::test_utils::engels_polytechnic::test_result().unwrap(), + }) + } - schedule: Mutex::new(Schedule::default()), + #[cfg(not(test))] + { + if let Some(url) = &env.schedule.url { + providers::EngelsPolytechnicUpdateSource::Url(url.clone()) + } else { + providers::EngelsPolytechnicUpdateSource::GrabFromSite { + yandex_api_key: env.yandex_cloud.api_key.clone(), + yandex_func_id: env.yandex_cloud.func_id.clone(), + } + } + } + }) + .await?, + )]); + + let this = Self { + cancel_token: CancellationToken::new(), database: Mutex::new( PgConnection::establish(&database_url) .unwrap_or_else(|_| panic!("Error connecting to {}", database_url)), ), - env: AppEnv::default(), + env, + providers, fcm_client: FCMClientData::new().await, }; - if _self.env.schedule.auto_update { - _self - .get_schedule() - .await - .init(_self.get_downloader().await.deref_mut(), &_self.env) - .await?; + if this.env.schedule.auto_update { + for (_, provider) in &this.providers { + let provider = provider.clone(); + let cancel_token = this.cancel_token.clone(); + + tokio::spawn(async move { provider.start_auto_update_task(cancel_token).await }); + } } - Ok(_self) + Ok(this) } - pub async fn get_downloader(&'_ self) -> MutexGuard<'_, BasicXlsDownloader> { - self.downloader.lock().await - } + pub async fn get_schedule_snapshot(&'_ self, provider: &str) -> Option> { + if let Some(provider) = self.providers.get(provider) { + return Some(provider.get_schedule().await); + } - pub async fn get_schedule(&'_ self) -> MutexGuard<'_, Schedule> { - self.schedule.lock().await - } - - pub async fn get_schedule_snapshot(&'_ self) -> MappedMutexGuard<'_, ScheduleSnapshot> { - let snapshot = - MutexGuard::<'_, Schedule>::map(self.schedule.lock().await, |schedule| unsafe { - schedule.snapshot.assume_init_mut() - }); - - snapshot + None } pub async fn get_database(&'_ self) -> MutexGuard<'_, PgConnection> { @@ -83,6 +102,6 @@ impl AppState { } /// Create a new object web::Data. -pub async fn new_app_state() -> Result, self::schedule::Error> { +pub async fn new_app_state() -> Result, Box> { Ok(web::Data::new(AppState::new().await?)) } diff --git a/src/state/schedule.rs b/src/state/schedule.rs deleted file mode 100644 index 8a74759..0000000 --- a/src/state/schedule.rs +++ /dev/null @@ -1,290 +0,0 @@ -use crate::state::env::AppEnv; -use crate::utility::hasher::DigestHasher; -use chrono::{DateTime, Utc}; -use derive_more::{Display, Error}; -use schedule_parser::parse_xls; -use schedule_parser::schema::{ParseError, ParseResult}; -use sha1::{Digest, Sha1}; -use std::hash::Hash; -use std::mem::MaybeUninit; - -use crate::xls_downloader::basic_impl::BasicXlsDownloader; -use crate::xls_downloader::interface::{FetchError, XLSDownloader}; - -/// Represents errors that can occur during schedule-related operations. -#[derive(Debug, Display, Error)] -pub enum Error { - /// An error occurred while querying the Yandex Cloud API for a URL. - /// - /// This may result from network failures, invalid API credentials, or issues with the Yandex Cloud Function invocation. - /// See [`QueryUrlError`] for more details about specific causes. - QueryUrlFailed(QueryUrlError), - - /// The schedule snapshot creation process failed. - /// - /// This can happen due to URL conflicts (same URL already in use), failed network requests, - /// download errors, or invalid XLS file content. See [`SnapshotCreationError`] for details. - SnapshotCreationFailed(SnapshotCreationError), -} -/// Errors that may occur when querying the Yandex Cloud API to retrieve a URL. -#[derive(Debug, Display, Error)] -pub enum QueryUrlError { - /// Occurs when the request to the Yandex Cloud API fails. - /// - /// This may be due to network issues, invalid API key, incorrect function ID, or other - /// problems with the Yandex Cloud Function invocation. - #[display("An error occurred during the request to the Yandex Cloud API: {_0}")] - RequestFailed(reqwest::Error), -} - -/// Errors that may occur during the creation of a schedule snapshot. -#[derive(Debug, Display, Error)] -pub enum SnapshotCreationError { - /// The URL is the same as the one already being used (no update needed). - #[display("The URL is the same as the one already being used.")] - SameUrl, - - /// The URL query for the XLS file failed to execute, either due to network issues or invalid API parameters. - #[display("Failed to fetch URL: {_0}")] - FetchFailed(FetchError), - - /// Downloading the XLS file content failed after successfully obtaining the URL. - #[display("Download failed: {_0}")] - DownloadFailed(FetchError), - - /// The XLS file could not be parsed into a valid schedule format. - #[display("Schedule data is invalid: {_0}")] - InvalidSchedule(ParseError), -} - -/// Represents a snapshot of the schedule parsed from an XLS file. -#[derive(Clone)] -pub struct ScheduleSnapshot { - /// Timestamp when the Polytechnic website was queried for the schedule. - pub fetched_at: DateTime, - - /// Timestamp indicating when the schedule was last updated on the Polytechnic website. - /// - /// - /// This value is determined by the website's content and does not depend on the application. - /// - pub updated_at: DateTime, - - /// URL pointing to the XLS file containing the source schedule data. - pub url: String, - - /// Parsed schedule data in the application's internal representation. - pub data: ParseResult, -} - -impl ScheduleSnapshot { - /// Converting the schedule data into a hash. - /// ### Important! - /// The hash does not depend on the dates. - /// If the application is restarted, but the file with source schedule will remain unchanged, then the hash will not change. - pub fn hash(&self) -> String { - let mut hasher = DigestHasher::from(Sha1::new()); - - self.data.teachers.iter().for_each(|e| e.hash(&mut hasher)); - self.data.groups.iter().for_each(|e| e.hash(&mut hasher)); - - hasher.finalize() - } - - /// Simply updates the value of [`ScheduleSnapshot::fetched_at`]. - /// Used for auto-updates. - pub fn update(&mut self) { - self.fetched_at = Utc::now(); - } - - /// Constructs a new `ScheduleSnapshot` by downloading and parsing schedule data from the specified URL. - /// - /// This method first checks if the provided URL is the same as the one already configured in the downloader. - /// If different, it updates the downloader's URL, fetches the XLS content, parses it, and creates a snapshot. - /// Errors are returned for URL conflicts, network issues, download failures, or invalid data. - /// - /// # Arguments - /// - /// * `downloader`: A mutable reference to an `XLSDownloader` implementation used to fetch and parse the schedule data. - /// * `url`: The source URL pointing to the XLS file containing schedule data. - /// - /// returns: Result - pub async fn new( - downloader: &mut BasicXlsDownloader, - url: String, - ) -> Result { - if downloader.url.as_ref().is_some_and(|_url| _url.eq(&url)) { - return Err(SnapshotCreationError::SameUrl); - } - - let head_result = downloader.set_url(&*url).await.map_err(|error| { - if let FetchError::Unknown(error) = &error { - sentry::capture_error(&error); - } - - SnapshotCreationError::FetchFailed(error) - })?; - - let xls_data = downloader - .fetch(false) - .await - .map_err(|error| { - if let FetchError::Unknown(error) = &error { - sentry::capture_error(&error); - } - - SnapshotCreationError::DownloadFailed(error) - })? - .data - .unwrap(); - - let parse_result = parse_xls(&xls_data).map_err(|error| { - sentry::capture_error(&error); - - SnapshotCreationError::InvalidSchedule(error) - })?; - - Ok(ScheduleSnapshot { - fetched_at: head_result.requested_at, - updated_at: head_result.uploaded_at, - url, - data: parse_result, - }) - } -} - -pub struct Schedule { - pub snapshot: MaybeUninit, -} - -impl Default for Schedule { - fn default() -> Self { - Self { - snapshot: MaybeUninit::uninit(), - } - } -} - -impl Schedule { - /// Queries the Yandex Cloud Function (FaaS) to obtain a URL for the schedule file. - /// - /// This sends a POST request to the specified Yandex Cloud Function endpoint, - /// using the provided API key for authentication. The returned URI is combined - /// with the "https://politehnikum-eng.ru" base domain to form the complete URL. - /// - /// # Arguments - /// - /// * `api_key` - Authentication token for Yandex Cloud API - /// * `func_id` - ID of the target Yandex Cloud Function to invoke - /// - /// # Returns - /// - /// Result containing: - /// - `Ok(String)` - Complete URL constructed from the Function's response - /// - `Err(QueryUrlError)` - If the request or response processing fails - async fn query_url(api_key: &str, func_id: &str) -> Result { - let client = reqwest::Client::new(); - - let uri = client - .post(format!( - "https://functions.yandexcloud.net/{}?integration=raw", - func_id - )) - .header("Authorization", format!("Api-Key {}", api_key)) - .send() - .await - .map_err(|error| QueryUrlError::RequestFailed(error))? - .text() - .await - .map_err(|error| QueryUrlError::RequestFailed(error))?; - - Ok(format!("https://politehnikum-eng.ru{}", uri.trim())) - } - - /// Initializes the schedule by fetching the URL from the environment or Yandex Cloud Function (FaaS) - /// and creating a [`ScheduleSnapshot`] with the downloaded data. - /// - /// # Arguments - /// - /// * `downloader`: Mutable reference to an `XLSDownloader` implementation used to fetch and parse the schedule - /// * `app_env`: Reference to the application environment containing either a predefined URL or Yandex Cloud credentials - /// - /// # Returns - /// - /// Returns `Ok(())` if the snapshot was successfully initialized, or an `Error` if: - /// - URL query to Yandex Cloud failed ([`QueryUrlError`]) - /// - Schedule snapshot creation failed ([`SnapshotCreationError`]) - pub async fn init( - &mut self, - downloader: &mut BasicXlsDownloader, - app_env: &AppEnv, - ) -> Result<(), Error> { - let url = if let Some(url) = &app_env.schedule.url { - log::info!("The default link {} will be used", url); - url.clone() - } else { - log::info!("Obtaining a link using FaaS..."); - Self::query_url( - &*app_env.yandex_cloud.api_key, - &*app_env.yandex_cloud.func_id, - ) - .await - .map_err(|error| Error::QueryUrlFailed(error))? - }; - - log::info!("For the initial setup, a link {} will be used", url); - - let snapshot = ScheduleSnapshot::new(downloader, url) - .await - .map_err(|error| Error::SnapshotCreationFailed(error))?; - - log::info!("Schedule snapshot successfully created!"); - - self.snapshot.write(snapshot); - - Ok(()) - } - - /// Updates the schedule snapshot by querying the latest URL from FaaS and checking for changes. - /// If the URL hasn't changed, only updates the [`fetched_at`] timestamp. If changed, downloads - /// and parses the new schedule data. - /// - /// # Arguments - /// - /// * `downloader`: XLS file downloader used to fetch and parse the schedule data - /// * `app_env`: Application environment containing Yandex Cloud configuration and auto-update settings - /// - /// returns: `Result<(), Error>` - Returns error if URL query fails or schedule parsing encounters issues - /// - /// # Safety - /// - /// Uses `unsafe` to access the initialized snapshot, guaranteed valid by prior `init()` call - #[allow(unused)] // TODO: сделать авто апдейт - pub async fn update( - &mut self, - downloader: &mut BasicXlsDownloader, - app_env: &AppEnv, - ) -> Result<(), Error> { - assert!(app_env.schedule.auto_update); - - let url = Self::query_url( - &*app_env.yandex_cloud.api_key, - &*app_env.yandex_cloud.func_id, - ) - .await - .map_err(|error| Error::QueryUrlFailed(error))?; - - let snapshot = match ScheduleSnapshot::new(downloader, url).await { - Ok(snapshot) => snapshot, - Err(SnapshotCreationError::SameUrl) => { - unsafe { self.snapshot.assume_init_mut() }.update(); - return Ok(()); - } - Err(error) => return Err(Error::SnapshotCreationFailed(error)), - }; - - self.snapshot.write(snapshot); - - Ok(()) - } -} diff --git a/src/test_env.rs b/src/test_env.rs index 26b2edc..3867d82 100644 --- a/src/test_env.rs +++ b/src/test_env.rs @@ -1,10 +1,8 @@ #[cfg(test)] pub(crate) mod tests { - use crate::state::{AppState, ScheduleSnapshot, new_app_state}; + use crate::state::{new_app_state, AppState}; use actix_web::web; use log::info; - use schedule_parser::test_utils::test_result; - use std::default::Default; use tokio::sync::OnceCell; pub fn test_env() { @@ -17,19 +15,12 @@ pub(crate) mod tests { pub async fn test_app_state() -> web::Data { let state = new_app_state().await.unwrap(); - state.get_schedule().await.snapshot.write(ScheduleSnapshot { - fetched_at: Default::default(), - updated_at: Default::default(), - url: "".to_string(), - data: test_result().unwrap(), - }); - state.clone() } pub async fn static_app_state() -> web::Data { static STATE: OnceCell> = OnceCell::const_new(); - + STATE.get_or_init(|| test_app_state()).await.clone() } } diff --git a/src/xls_downloader/basic_impl.rs b/src/xls_downloader/basic_impl.rs deleted file mode 100644 index dc53fff..0000000 --- a/src/xls_downloader/basic_impl.rs +++ /dev/null @@ -1,199 +0,0 @@ -use crate::xls_downloader::interface::{FetchError, FetchOk, FetchResult, XLSDownloader}; -use chrono::{DateTime, Utc}; -use std::sync::Arc; - -pub struct BasicXlsDownloader { - pub url: Option, -} - -async fn fetch_specified(url: &str, head: bool) -> FetchResult { - let client = reqwest::Client::new(); - - let response = if head { - client.head(url) - } else { - client.get(url) - } - .header("User-Agent", ua_generator::ua::spoof_chrome_ua()) - .send() - .await - .map_err(|e| FetchError::unknown(Arc::new(e)))?; - - if response.status().as_u16() != 200 { - return Err(FetchError::bad_status_code(response.status().as_u16())); - } - - let headers = response.headers(); - - let content_type = headers - .get("Content-Type") - .ok_or(FetchError::bad_headers("Content-Type"))?; - - if !headers.contains_key("etag") { - return Err(FetchError::bad_headers("etag")); - } - - let last_modified = headers - .get("last-modified") - .ok_or(FetchError::bad_headers("last-modified"))?; - - if content_type != "application/vnd.ms-excel" { - return Err(FetchError::bad_content_type(content_type.to_str().unwrap())); - } - - let last_modified = DateTime::parse_from_rfc2822(&last_modified.to_str().unwrap()) - .unwrap() - .with_timezone(&Utc); - - Ok(if head { - FetchOk::head(last_modified) - } else { - FetchOk::get(last_modified, response.bytes().await.unwrap().to_vec()) - }) -} - -impl BasicXlsDownloader { - pub fn new() -> Self { - BasicXlsDownloader { url: None } - } -} - -impl XLSDownloader for BasicXlsDownloader { - async fn fetch(&self, head: bool) -> FetchResult { - if self.url.is_none() { - Err(FetchError::NoUrlProvided) - } else { - fetch_specified(&*self.url.as_ref().unwrap(), head).await - } - } - - async fn set_url(&mut self, url: &str) -> FetchResult { - let result = fetch_specified(url, true).await; - - if let Ok(_) = result { - self.url = Some(url.to_string()); - } - - result - } -} - -#[cfg(test)] -mod tests { - use crate::xls_downloader::basic_impl::{BasicXlsDownloader, fetch_specified}; - use crate::xls_downloader::interface::{FetchError, XLSDownloader}; - - #[tokio::test] - async fn bad_url() { - let url = "bad_url"; - - let results = [ - fetch_specified(url, true).await, - fetch_specified(url, false).await, - ]; - - assert!(results[0].is_err()); - assert!(results[1].is_err()); - } - - #[tokio::test] - async fn bad_status_code() { - let url = "https://www.google.com/not-found"; - - let results = [ - fetch_specified(url, true).await, - fetch_specified(url, false).await, - ]; - - assert!(results[0].is_err()); - assert!(results[1].is_err()); - - let expected_error = FetchError::BadStatusCode { status_code: 404 }; - - assert_eq!(*results[0].as_ref().err().unwrap(), expected_error); - assert_eq!(*results[1].as_ref().err().unwrap(), expected_error); - } - - #[tokio::test] - async fn bad_headers() { - let url = "https://www.google.com/favicon.ico"; - - let results = [ - fetch_specified(url, true).await, - fetch_specified(url, false).await, - ]; - - assert!(results[0].is_err()); - assert!(results[1].is_err()); - - let expected_error = FetchError::BadHeaders { - expected_header: "ETag".to_string(), - }; - - assert_eq!(*results[0].as_ref().err().unwrap(), expected_error); - assert_eq!(*results[1].as_ref().err().unwrap(), expected_error); - } - - #[tokio::test] - async fn bad_content_type() { - let url = "https://s3.aero-storage.ldragol.ru/679e5d1145a6ad00843ad3f1/67ddb59fd46303008396ac96%2Fexample.txt"; - - let results = [ - fetch_specified(url, true).await, - fetch_specified(url, false).await, - ]; - - assert!(results[0].is_err()); - assert!(results[1].is_err()); - } - - #[tokio::test] - async fn ok() { - let url = "https://s3.aero-storage.ldragol.ru/679e5d1145a6ad00843ad3f1/67ddb5fad46303008396ac97%2Fschedule.xls"; - - let results = [ - fetch_specified(url, true).await, - fetch_specified(url, false).await, - ]; - - assert!(results[0].is_ok()); - assert!(results[1].is_ok()); - } - - #[tokio::test] - async fn downloader_set_ok() { - let url = "https://s3.aero-storage.ldragol.ru/679e5d1145a6ad00843ad3f1/67ddb5fad46303008396ac97%2Fschedule.xls"; - - let mut downloader = BasicXlsDownloader::new(); - - assert!(downloader.set_url(url).await.is_ok()); - } - - #[tokio::test] - async fn downloader_set_err() { - let url = "bad_url"; - - let mut downloader = BasicXlsDownloader::new(); - - assert!(downloader.set_url(url).await.is_err()); - } - - #[tokio::test] - async fn downloader_ok() { - let url = "https://s3.aero-storage.ldragol.ru/679e5d1145a6ad00843ad3f1/67ddb5fad46303008396ac97%2Fschedule.xls"; - - let mut downloader = BasicXlsDownloader::new(); - - assert!(downloader.set_url(url).await.is_ok()); - assert!(downloader.fetch(false).await.is_ok()); - } - - #[tokio::test] - async fn downloader_no_url_provided() { - let downloader = BasicXlsDownloader::new(); - let result = downloader.fetch(false).await; - - assert!(result.is_err()); - assert_eq!(result.err().unwrap(), FetchError::NoUrlProvided); - } -} diff --git a/src/xls_downloader/interface.rs b/src/xls_downloader/interface.rs deleted file mode 100644 index 8e138ad..0000000 --- a/src/xls_downloader/interface.rs +++ /dev/null @@ -1,100 +0,0 @@ -use chrono::{DateTime, Utc}; -use derive_more::{Display, Error}; -use std::mem::discriminant; -use std::sync::Arc; -use utoipa::ToSchema; - -/// XLS data retrieval errors. -#[derive(Clone, Debug, ToSchema, Display, Error)] -pub enum FetchError { - /// File url is not set. - #[display("The link to the timetable was not provided earlier.")] - NoUrlProvided, - - /// Unknown error. - #[display("An unknown error occurred while downloading the file.")] - #[schema(value_type = String)] - Unknown(Arc), - - /// Server returned a status code different from 200. - #[display("Server returned a status code {status_code}.")] - BadStatusCode { status_code: u16 }, - - /// The url leads to a file of a different type. - #[display("The link leads to a file of type '{content_type}'.")] - BadContentType { content_type: String }, - - /// Server doesn't return expected headers. - #[display("Server doesn't return expected header(s) '{expected_header}'.")] - BadHeaders { expected_header: String }, -} - -impl FetchError { - pub fn unknown(error: Arc) -> Self { - Self::Unknown(error) - } - - pub fn bad_status_code(status_code: u16) -> Self { - Self::BadStatusCode { status_code } - } - - pub fn bad_content_type(content_type: &str) -> Self { - Self::BadContentType { - content_type: content_type.to_string(), - } - } - - pub fn bad_headers(expected_header: &str) -> Self { - Self::BadHeaders { - expected_header: expected_header.to_string(), - } - } -} - -impl PartialEq for FetchError { - fn eq(&self, other: &Self) -> bool { - discriminant(self) == discriminant(other) - } -} - -/// Result of XLS data retrieval. -pub struct FetchOk { - /// File upload date. - pub uploaded_at: DateTime, - - /// Date data received. - pub requested_at: DateTime, - - /// File data. - pub data: Option>, -} - -impl FetchOk { - /// Result without file content. - pub fn head(uploaded_at: DateTime) -> Self { - FetchOk { - uploaded_at, - requested_at: Utc::now(), - data: None, - } - } - - /// Full result. - pub fn get(uploaded_at: DateTime, data: Vec) -> Self { - FetchOk { - uploaded_at, - requested_at: Utc::now(), - data: Some(data), - } - } -} - -pub type FetchResult = Result; - -pub trait XLSDownloader { - /// Get data about the file, and optionally its content. - async fn fetch(&self, head: bool) -> FetchResult; - - /// Setting the file link. - async fn set_url(&mut self, url: &str) -> FetchResult; -} diff --git a/src/xls_downloader/mod.rs b/src/xls_downloader/mod.rs deleted file mode 100644 index 3e7bffa..0000000 --- a/src/xls_downloader/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod basic_impl; -pub mod interface; diff --git a/schedule.xls b/test-data/engels-polytechnic.xls similarity index 100% rename from schedule.xls rename to test-data/engels-polytechnic.xls