mirror of
https://github.com/n08i40k/schedule-parser-rusted.git
synced 2025-12-06 09:47:50 +03:00
Скачивание XLS документа по ссылке
This commit is contained in:
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@@ -22,4 +22,4 @@ jobs:
|
|||||||
- name: Build
|
- name: Build
|
||||||
run: cargo build --verbose
|
run: cargo build --verbose
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
run: cargo test --verbose -p schedule_parser
|
run: cargo test --verbose -p schedule-parser-rusted -p schedule_parser
|
||||||
|
|||||||
1249
Cargo.lock
generated
1249
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -3,11 +3,14 @@ members = ["lib/schedule_parser"]
|
|||||||
|
|
||||||
[package]
|
[package]
|
||||||
name = "schedule-parser-rusted"
|
name = "schedule-parser-rusted"
|
||||||
version = "0.2.0"
|
version = "0.3.0"
|
||||||
edition = "2024"
|
edition = "2024"
|
||||||
publish = false
|
publish = false
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
serde = { version = "1.0.219", features = ["derive"] }
|
serde = { version = "1.0.219", features = ["derive"] }
|
||||||
serde_json = "1.0.140"
|
serde_json = "1.0.140"
|
||||||
schedule_parser = { path = "./lib/schedule_parser" }
|
schedule_parser = { path = "./lib/schedule_parser" }
|
||||||
|
chrono = "0.4.40"
|
||||||
|
reqwest = "0.12.15"
|
||||||
|
tokio = { version = "1.44.1", features = ["macros", "rt-multi-thread"] }
|
||||||
|
|||||||
@@ -1,11 +1,10 @@
|
|||||||
use criterion::{Criterion, criterion_group, criterion_main};
|
use criterion::{Criterion, criterion_group, criterion_main};
|
||||||
use schedule_parser::parse_xls;
|
use schedule_parser::parse_xls;
|
||||||
use std::path::Path;
|
|
||||||
|
|
||||||
pub fn bench_parse_xls(c: &mut Criterion) {
|
pub fn bench_parse_xls(c: &mut Criterion) {
|
||||||
c.bench_function("parse_xls", |b| {
|
let buffer: Vec<u8> = include_bytes!("../../../schedule.xls").to_vec();
|
||||||
b.iter(|| parse_xls(Path::new("../../schedule.xls")))
|
|
||||||
});
|
c.bench_function("parse_xls", |b| b.iter(|| parse_xls(&buffer)));
|
||||||
}
|
}
|
||||||
|
|
||||||
criterion_group!(benches, bench_parse_xls);
|
criterion_group!(benches, bench_parse_xls);
|
||||||
|
|||||||
@@ -1,13 +1,13 @@
|
|||||||
use crate::LessonParseResult::{Lessons, Street};
|
|
||||||
use crate::schema::LessonType::Break;
|
use crate::schema::LessonType::Break;
|
||||||
use crate::schema::{Day, Lesson, LessonSubGroup, LessonTime, LessonType, ScheduleEntity};
|
use crate::schema::{Day, Lesson, LessonSubGroup, LessonTime, LessonType, ScheduleEntity};
|
||||||
use calamine::{Reader, Xls, open_workbook};
|
use crate::LessonParseResult::{Lessons, Street};
|
||||||
|
use calamine::{open_workbook_from_rs, Reader, Xls};
|
||||||
use chrono::{Duration, NaiveDateTime};
|
use chrono::{Duration, NaiveDateTime};
|
||||||
use fuzzy_matcher::FuzzyMatcher;
|
|
||||||
use fuzzy_matcher::skim::SkimMatcherV2;
|
use fuzzy_matcher::skim::SkimMatcherV2;
|
||||||
|
use fuzzy_matcher::FuzzyMatcher;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::path::Path;
|
use std::io::Cursor;
|
||||||
use std::sync::LazyLock;
|
use std::sync::LazyLock;
|
||||||
|
|
||||||
mod schema;
|
mod schema;
|
||||||
@@ -539,12 +539,13 @@ fn convert_groups_to_teachers(
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse_xls(
|
pub fn parse_xls(
|
||||||
path: &Path,
|
buffer: &Vec<u8>,
|
||||||
) -> (
|
) -> (
|
||||||
HashMap<String, ScheduleEntity>,
|
HashMap<String, ScheduleEntity>,
|
||||||
HashMap<String, ScheduleEntity>,
|
HashMap<String, ScheduleEntity>,
|
||||||
) {
|
) {
|
||||||
let mut workbook: Xls<_> = open_workbook(path).expect("Can't open workbook");
|
let cursor = Cursor::new(&buffer);
|
||||||
|
let mut workbook: Xls<_> = open_workbook_from_rs(cursor).expect("Can't open workbook");
|
||||||
|
|
||||||
let worksheet: WorkSheet = workbook
|
let worksheet: WorkSheet = workbook
|
||||||
.worksheets()
|
.worksheets()
|
||||||
@@ -693,9 +694,10 @@ mod tests {
|
|||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn it_works() {
|
fn read() {
|
||||||
let result = parse_xls(Path::new("../../schedule.xls"));
|
let buffer: Vec<u8> = include_bytes!("../../../../schedule.xls").to_vec();
|
||||||
|
let result = parse_xls(&buffer);
|
||||||
|
|
||||||
assert_ne!(result.0.len(), 0);
|
assert_ne!(result.0.len(), 0);
|
||||||
assert_ne!(result.1.len(), 0);
|
assert_ne!(result.1.len(), 0);
|
||||||
}
|
}
|
||||||
|
|||||||
26
src/main.rs
26
src/main.rs
@@ -1,9 +1,25 @@
|
|||||||
use std::fs;
|
use crate::xls_downloader::basic_impl::BasicXlsDownloader;
|
||||||
use std::path::Path;
|
use crate::xls_downloader::interface::XLSDownloader;
|
||||||
use schedule_parser::parse_xls;
|
use schedule_parser::parse_xls;
|
||||||
|
use std::{env, fs};
|
||||||
|
|
||||||
fn main() {
|
mod xls_downloader;
|
||||||
let (teachers, groups) = parse_xls(Path::new("./schedule.xls"));
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() {
|
||||||
|
let args: Vec<String> = env::args().collect();
|
||||||
|
assert_ne!(args.len(), 1);
|
||||||
|
|
||||||
|
let mut downloader = BasicXlsDownloader::new();
|
||||||
|
|
||||||
|
downloader
|
||||||
|
.set_url(args[1].to_string())
|
||||||
|
.await
|
||||||
|
.expect("Failed to set url");
|
||||||
|
|
||||||
|
let fetch_res = downloader.fetch(false).await.expect("Failed to fetch xls");
|
||||||
|
|
||||||
|
let (teachers, groups) = parse_xls(fetch_res.data.as_ref().unwrap());
|
||||||
|
|
||||||
fs::write(
|
fs::write(
|
||||||
"./schedule.json",
|
"./schedule.json",
|
||||||
@@ -12,7 +28,7 @@ fn main() {
|
|||||||
.as_bytes(),
|
.as_bytes(),
|
||||||
)
|
)
|
||||||
.expect("Failed to write schedule");
|
.expect("Failed to write schedule");
|
||||||
|
|
||||||
fs::write(
|
fs::write(
|
||||||
"./teachers.json",
|
"./teachers.json",
|
||||||
serde_json::to_string_pretty(&teachers)
|
serde_json::to_string_pretty(&teachers)
|
||||||
|
|||||||
219
src/xls_downloader/basic_impl.rs
Normal file
219
src/xls_downloader/basic_impl.rs
Normal file
@@ -0,0 +1,219 @@
|
|||||||
|
use crate::xls_downloader::interface::{FetchError, FetchOk, FetchResult, XLSDownloader};
|
||||||
|
use chrono::{DateTime, Utc};
|
||||||
|
|
||||||
|
pub struct BasicXlsDownloader {
|
||||||
|
url: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn fetch_specified(url: &String, user_agent: String, head: bool) -> FetchResult {
|
||||||
|
let client = reqwest::Client::new();
|
||||||
|
|
||||||
|
let response = if head {
|
||||||
|
client.head(url)
|
||||||
|
} else {
|
||||||
|
client.get(url)
|
||||||
|
}
|
||||||
|
.header("User-Agent", user_agent)
|
||||||
|
.send()
|
||||||
|
.await;
|
||||||
|
|
||||||
|
match response {
|
||||||
|
Ok(r) => {
|
||||||
|
if r.status().as_u16() != 200 {
|
||||||
|
return Err(FetchError::BadStatusCode);
|
||||||
|
}
|
||||||
|
|
||||||
|
let headers = r.headers();
|
||||||
|
|
||||||
|
let content_type = headers.get("Content-Type");
|
||||||
|
let etag = headers.get("etag");
|
||||||
|
let last_modified = headers.get("last-modified");
|
||||||
|
let date = headers.get("date");
|
||||||
|
|
||||||
|
if content_type.is_none() || etag.is_none() || last_modified.is_none() || date.is_none()
|
||||||
|
{
|
||||||
|
Err(FetchError::BadHeaders)
|
||||||
|
} else if content_type.unwrap() != "application/vnd.ms-excel" {
|
||||||
|
Err(FetchError::BadContentType)
|
||||||
|
} else {
|
||||||
|
let etag = etag.unwrap().to_str().unwrap().to_string();
|
||||||
|
let last_modified =
|
||||||
|
DateTime::parse_from_rfc2822(&last_modified.unwrap().to_str().unwrap())
|
||||||
|
.unwrap()
|
||||||
|
.with_timezone(&Utc);
|
||||||
|
|
||||||
|
Ok(if head {
|
||||||
|
FetchOk::head(etag, last_modified)
|
||||||
|
} else {
|
||||||
|
FetchOk::get(etag, last_modified, r.bytes().await.unwrap().to_vec())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(_) => Err(FetchError::Unknown),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BasicXlsDownloader {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
BasicXlsDownloader { url: None }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl XLSDownloader for BasicXlsDownloader {
|
||||||
|
async fn fetch(&self, head: bool) -> FetchResult {
|
||||||
|
if self.url.is_none() {
|
||||||
|
Err(FetchError::NoUrlProvided)
|
||||||
|
} else {
|
||||||
|
fetch_specified(
|
||||||
|
self.url.as_ref().unwrap(),
|
||||||
|
"t.me/polytechnic_next".to_string(),
|
||||||
|
head,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn set_url(&mut self, url: String) -> Result<(), FetchError> {
|
||||||
|
let result = fetch_specified(&url, "t.me/polytechnic_next".to_string(), true).await;
|
||||||
|
|
||||||
|
if let Ok(_) = result {
|
||||||
|
Ok(self.url = Some(url))
|
||||||
|
} else {
|
||||||
|
Err(result.err().unwrap())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use crate::xls_downloader::basic_impl::{BasicXlsDownloader, fetch_specified};
|
||||||
|
use crate::xls_downloader::interface::{FetchError, XLSDownloader};
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn bad_url() {
|
||||||
|
let url = "bad_url".to_string();
|
||||||
|
let user_agent = String::new();
|
||||||
|
|
||||||
|
let results = [
|
||||||
|
fetch_specified(&url, user_agent.clone(), true).await,
|
||||||
|
fetch_specified(&url, user_agent.clone(), false).await,
|
||||||
|
];
|
||||||
|
|
||||||
|
assert!(results[0].is_err());
|
||||||
|
assert!(results[1].is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn bad_status_code() {
|
||||||
|
let url = "https://www.google.com/not-found".to_string();
|
||||||
|
let user_agent = String::new();
|
||||||
|
|
||||||
|
let results = [
|
||||||
|
fetch_specified(&url, user_agent.clone(), true).await,
|
||||||
|
fetch_specified(&url, user_agent.clone(), false).await,
|
||||||
|
];
|
||||||
|
|
||||||
|
assert!(results[0].is_err());
|
||||||
|
assert!(results[1].is_err());
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
*results[0].as_ref().err().unwrap(),
|
||||||
|
FetchError::BadStatusCode
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
*results[1].as_ref().err().unwrap(),
|
||||||
|
FetchError::BadStatusCode
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn bad_headers() {
|
||||||
|
let url = "https://www.google.com/favicon.ico".to_string();
|
||||||
|
let user_agent = String::new();
|
||||||
|
|
||||||
|
let results = [
|
||||||
|
fetch_specified(&url, user_agent.clone(), true).await,
|
||||||
|
fetch_specified(&url, user_agent.clone(), false).await,
|
||||||
|
];
|
||||||
|
|
||||||
|
assert!(results[0].is_err());
|
||||||
|
assert!(results[1].is_err());
|
||||||
|
|
||||||
|
assert_eq!(*results[0].as_ref().err().unwrap(), FetchError::BadHeaders);
|
||||||
|
assert_eq!(*results[1].as_ref().err().unwrap(), FetchError::BadHeaders);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn bad_content_type() {
|
||||||
|
let url = "https://s3.aero-storage.ldragol.ru/679e5d1145a6ad00843ad3f1/67ddb59fd46303008396ac96%2Fexample.txt".to_string();
|
||||||
|
let user_agent = String::new();
|
||||||
|
|
||||||
|
let results = [
|
||||||
|
fetch_specified(&url, user_agent.clone(), true).await,
|
||||||
|
fetch_specified(&url, user_agent.clone(), false).await,
|
||||||
|
];
|
||||||
|
|
||||||
|
assert!(results[0].is_err());
|
||||||
|
assert!(results[1].is_err());
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
*results[0].as_ref().err().unwrap(),
|
||||||
|
FetchError::BadContentType
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
*results[1].as_ref().err().unwrap(),
|
||||||
|
FetchError::BadContentType
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn ok() {
|
||||||
|
let url = "https://s3.aero-storage.ldragol.ru/679e5d1145a6ad00843ad3f1/67ddb5fad46303008396ac97%2Fschedule.xls".to_string();
|
||||||
|
let user_agent = String::new();
|
||||||
|
|
||||||
|
let results = [
|
||||||
|
fetch_specified(&url, user_agent.clone(), true).await,
|
||||||
|
fetch_specified(&url, user_agent.clone(), false).await,
|
||||||
|
];
|
||||||
|
|
||||||
|
assert!(results[0].is_ok());
|
||||||
|
assert!(results[1].is_ok());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn downloader_set_ok() {
|
||||||
|
let url = "https://s3.aero-storage.ldragol.ru/679e5d1145a6ad00843ad3f1/67ddb5fad46303008396ac97%2Fschedule.xls".to_string();
|
||||||
|
|
||||||
|
let mut downloader = BasicXlsDownloader::new();
|
||||||
|
|
||||||
|
assert!(downloader.set_url(url).await.is_ok());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn downloader_set_err() {
|
||||||
|
let url = "bad_url".to_string();
|
||||||
|
|
||||||
|
let mut downloader = BasicXlsDownloader::new();
|
||||||
|
|
||||||
|
assert!(downloader.set_url(url).await.is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn downloader_ok() {
|
||||||
|
let url = "https://s3.aero-storage.ldragol.ru/679e5d1145a6ad00843ad3f1/67ddb5fad46303008396ac97%2Fschedule.xls".to_string();
|
||||||
|
|
||||||
|
let mut downloader = BasicXlsDownloader::new();
|
||||||
|
|
||||||
|
assert!(downloader.set_url(url).await.is_ok());
|
||||||
|
assert!(downloader.fetch(false).await.is_ok());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn downloader_no_url_provided() {
|
||||||
|
let downloader = BasicXlsDownloader::new();
|
||||||
|
let result = downloader.fetch(false).await;
|
||||||
|
|
||||||
|
assert!(result.is_err());
|
||||||
|
assert_eq!(result.err().unwrap(), FetchError::NoUrlProvided);
|
||||||
|
}
|
||||||
|
}
|
||||||
44
src/xls_downloader/interface.rs
Normal file
44
src/xls_downloader/interface.rs
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
use chrono::{DateTime, Utc};
|
||||||
|
|
||||||
|
#[derive(PartialEq, Debug)]
|
||||||
|
pub enum FetchError {
|
||||||
|
NoUrlProvided,
|
||||||
|
Unknown,
|
||||||
|
BadStatusCode,
|
||||||
|
BadContentType,
|
||||||
|
BadHeaders,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct FetchOk {
|
||||||
|
pub etag: String,
|
||||||
|
pub uploaded_at: DateTime<Utc>,
|
||||||
|
pub requested_at: DateTime<Utc>,
|
||||||
|
pub data: Option<Vec<u8>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FetchOk {
|
||||||
|
pub fn head(etag: String, uploaded_at: DateTime<Utc>) -> Self {
|
||||||
|
FetchOk {
|
||||||
|
etag,
|
||||||
|
uploaded_at,
|
||||||
|
requested_at: Utc::now(),
|
||||||
|
data: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get(etag: String, uploaded_at: DateTime<Utc>, data: Vec<u8>) -> Self {
|
||||||
|
FetchOk {
|
||||||
|
etag,
|
||||||
|
uploaded_at,
|
||||||
|
requested_at: Utc::now(),
|
||||||
|
data: Some(data),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type FetchResult = Result<FetchOk, FetchError>;
|
||||||
|
|
||||||
|
pub trait XLSDownloader {
|
||||||
|
async fn fetch(&self, head: bool) -> FetchResult;
|
||||||
|
async fn set_url(&mut self, url: String) -> Result<(), FetchError>;
|
||||||
|
}
|
||||||
2
src/xls_downloader/mod.rs
Normal file
2
src/xls_downloader/mod.rs
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
pub mod basic_impl;
|
||||||
|
pub mod interface;
|
||||||
Reference in New Issue
Block a user