mirror of
https://github.com/n08i40k/schedule-parser-rusted.git
synced 2025-12-06 09:47:50 +03:00
Скачивание XLS документа по ссылке
This commit is contained in:
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@@ -22,4 +22,4 @@ jobs:
|
||||
- name: Build
|
||||
run: cargo build --verbose
|
||||
- name: Run tests
|
||||
run: cargo test --verbose -p schedule_parser
|
||||
run: cargo test --verbose -p schedule-parser-rusted -p schedule_parser
|
||||
|
||||
1249
Cargo.lock
generated
1249
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -3,11 +3,14 @@ members = ["lib/schedule_parser"]
|
||||
|
||||
[package]
|
||||
name = "schedule-parser-rusted"
|
||||
version = "0.2.0"
|
||||
version = "0.3.0"
|
||||
edition = "2024"
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = "1.0.140"
|
||||
schedule_parser = { path = "./lib/schedule_parser" }
|
||||
schedule_parser = { path = "./lib/schedule_parser" }
|
||||
chrono = "0.4.40"
|
||||
reqwest = "0.12.15"
|
||||
tokio = { version = "1.44.1", features = ["macros", "rt-multi-thread"] }
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
use criterion::{Criterion, criterion_group, criterion_main};
|
||||
use schedule_parser::parse_xls;
|
||||
use std::path::Path;
|
||||
|
||||
pub fn bench_parse_xls(c: &mut Criterion) {
|
||||
c.bench_function("parse_xls", |b| {
|
||||
b.iter(|| parse_xls(Path::new("../../schedule.xls")))
|
||||
});
|
||||
let buffer: Vec<u8> = include_bytes!("../../../schedule.xls").to_vec();
|
||||
|
||||
c.bench_function("parse_xls", |b| b.iter(|| parse_xls(&buffer)));
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_parse_xls);
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
use crate::LessonParseResult::{Lessons, Street};
|
||||
use crate::schema::LessonType::Break;
|
||||
use crate::schema::{Day, Lesson, LessonSubGroup, LessonTime, LessonType, ScheduleEntity};
|
||||
use calamine::{Reader, Xls, open_workbook};
|
||||
use crate::LessonParseResult::{Lessons, Street};
|
||||
use calamine::{open_workbook_from_rs, Reader, Xls};
|
||||
use chrono::{Duration, NaiveDateTime};
|
||||
use fuzzy_matcher::FuzzyMatcher;
|
||||
use fuzzy_matcher::skim::SkimMatcherV2;
|
||||
use fuzzy_matcher::FuzzyMatcher;
|
||||
use regex::Regex;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::io::Cursor;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
mod schema;
|
||||
@@ -539,12 +539,13 @@ fn convert_groups_to_teachers(
|
||||
}
|
||||
|
||||
pub fn parse_xls(
|
||||
path: &Path,
|
||||
buffer: &Vec<u8>,
|
||||
) -> (
|
||||
HashMap<String, ScheduleEntity>,
|
||||
HashMap<String, ScheduleEntity>,
|
||||
) {
|
||||
let mut workbook: Xls<_> = open_workbook(path).expect("Can't open workbook");
|
||||
let cursor = Cursor::new(&buffer);
|
||||
let mut workbook: Xls<_> = open_workbook_from_rs(cursor).expect("Can't open workbook");
|
||||
|
||||
let worksheet: WorkSheet = workbook
|
||||
.worksheets()
|
||||
@@ -693,9 +694,10 @@ mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn it_works() {
|
||||
let result = parse_xls(Path::new("../../schedule.xls"));
|
||||
|
||||
fn read() {
|
||||
let buffer: Vec<u8> = include_bytes!("../../../../schedule.xls").to_vec();
|
||||
let result = parse_xls(&buffer);
|
||||
|
||||
assert_ne!(result.0.len(), 0);
|
||||
assert_ne!(result.1.len(), 0);
|
||||
}
|
||||
|
||||
26
src/main.rs
26
src/main.rs
@@ -1,9 +1,25 @@
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use crate::xls_downloader::basic_impl::BasicXlsDownloader;
|
||||
use crate::xls_downloader::interface::XLSDownloader;
|
||||
use schedule_parser::parse_xls;
|
||||
use std::{env, fs};
|
||||
|
||||
fn main() {
|
||||
let (teachers, groups) = parse_xls(Path::new("./schedule.xls"));
|
||||
mod xls_downloader;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let args: Vec<String> = env::args().collect();
|
||||
assert_ne!(args.len(), 1);
|
||||
|
||||
let mut downloader = BasicXlsDownloader::new();
|
||||
|
||||
downloader
|
||||
.set_url(args[1].to_string())
|
||||
.await
|
||||
.expect("Failed to set url");
|
||||
|
||||
let fetch_res = downloader.fetch(false).await.expect("Failed to fetch xls");
|
||||
|
||||
let (teachers, groups) = parse_xls(fetch_res.data.as_ref().unwrap());
|
||||
|
||||
fs::write(
|
||||
"./schedule.json",
|
||||
@@ -12,7 +28,7 @@ fn main() {
|
||||
.as_bytes(),
|
||||
)
|
||||
.expect("Failed to write schedule");
|
||||
|
||||
|
||||
fs::write(
|
||||
"./teachers.json",
|
||||
serde_json::to_string_pretty(&teachers)
|
||||
|
||||
219
src/xls_downloader/basic_impl.rs
Normal file
219
src/xls_downloader/basic_impl.rs
Normal file
@@ -0,0 +1,219 @@
|
||||
use crate::xls_downloader::interface::{FetchError, FetchOk, FetchResult, XLSDownloader};
|
||||
use chrono::{DateTime, Utc};
|
||||
|
||||
pub struct BasicXlsDownloader {
|
||||
url: Option<String>,
|
||||
}
|
||||
|
||||
async fn fetch_specified(url: &String, user_agent: String, head: bool) -> FetchResult {
|
||||
let client = reqwest::Client::new();
|
||||
|
||||
let response = if head {
|
||||
client.head(url)
|
||||
} else {
|
||||
client.get(url)
|
||||
}
|
||||
.header("User-Agent", user_agent)
|
||||
.send()
|
||||
.await;
|
||||
|
||||
match response {
|
||||
Ok(r) => {
|
||||
if r.status().as_u16() != 200 {
|
||||
return Err(FetchError::BadStatusCode);
|
||||
}
|
||||
|
||||
let headers = r.headers();
|
||||
|
||||
let content_type = headers.get("Content-Type");
|
||||
let etag = headers.get("etag");
|
||||
let last_modified = headers.get("last-modified");
|
||||
let date = headers.get("date");
|
||||
|
||||
if content_type.is_none() || etag.is_none() || last_modified.is_none() || date.is_none()
|
||||
{
|
||||
Err(FetchError::BadHeaders)
|
||||
} else if content_type.unwrap() != "application/vnd.ms-excel" {
|
||||
Err(FetchError::BadContentType)
|
||||
} else {
|
||||
let etag = etag.unwrap().to_str().unwrap().to_string();
|
||||
let last_modified =
|
||||
DateTime::parse_from_rfc2822(&last_modified.unwrap().to_str().unwrap())
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
|
||||
Ok(if head {
|
||||
FetchOk::head(etag, last_modified)
|
||||
} else {
|
||||
FetchOk::get(etag, last_modified, r.bytes().await.unwrap().to_vec())
|
||||
})
|
||||
}
|
||||
}
|
||||
Err(_) => Err(FetchError::Unknown),
|
||||
}
|
||||
}
|
||||
|
||||
impl BasicXlsDownloader {
|
||||
pub fn new() -> Self {
|
||||
BasicXlsDownloader { url: None }
|
||||
}
|
||||
}
|
||||
|
||||
impl XLSDownloader for BasicXlsDownloader {
|
||||
async fn fetch(&self, head: bool) -> FetchResult {
|
||||
if self.url.is_none() {
|
||||
Err(FetchError::NoUrlProvided)
|
||||
} else {
|
||||
fetch_specified(
|
||||
self.url.as_ref().unwrap(),
|
||||
"t.me/polytechnic_next".to_string(),
|
||||
head,
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
async fn set_url(&mut self, url: String) -> Result<(), FetchError> {
|
||||
let result = fetch_specified(&url, "t.me/polytechnic_next".to_string(), true).await;
|
||||
|
||||
if let Ok(_) = result {
|
||||
Ok(self.url = Some(url))
|
||||
} else {
|
||||
Err(result.err().unwrap())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::xls_downloader::basic_impl::{BasicXlsDownloader, fetch_specified};
|
||||
use crate::xls_downloader::interface::{FetchError, XLSDownloader};
|
||||
|
||||
#[tokio::test]
|
||||
async fn bad_url() {
|
||||
let url = "bad_url".to_string();
|
||||
let user_agent = String::new();
|
||||
|
||||
let results = [
|
||||
fetch_specified(&url, user_agent.clone(), true).await,
|
||||
fetch_specified(&url, user_agent.clone(), false).await,
|
||||
];
|
||||
|
||||
assert!(results[0].is_err());
|
||||
assert!(results[1].is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn bad_status_code() {
|
||||
let url = "https://www.google.com/not-found".to_string();
|
||||
let user_agent = String::new();
|
||||
|
||||
let results = [
|
||||
fetch_specified(&url, user_agent.clone(), true).await,
|
||||
fetch_specified(&url, user_agent.clone(), false).await,
|
||||
];
|
||||
|
||||
assert!(results[0].is_err());
|
||||
assert!(results[1].is_err());
|
||||
|
||||
assert_eq!(
|
||||
*results[0].as_ref().err().unwrap(),
|
||||
FetchError::BadStatusCode
|
||||
);
|
||||
assert_eq!(
|
||||
*results[1].as_ref().err().unwrap(),
|
||||
FetchError::BadStatusCode
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn bad_headers() {
|
||||
let url = "https://www.google.com/favicon.ico".to_string();
|
||||
let user_agent = String::new();
|
||||
|
||||
let results = [
|
||||
fetch_specified(&url, user_agent.clone(), true).await,
|
||||
fetch_specified(&url, user_agent.clone(), false).await,
|
||||
];
|
||||
|
||||
assert!(results[0].is_err());
|
||||
assert!(results[1].is_err());
|
||||
|
||||
assert_eq!(*results[0].as_ref().err().unwrap(), FetchError::BadHeaders);
|
||||
assert_eq!(*results[1].as_ref().err().unwrap(), FetchError::BadHeaders);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn bad_content_type() {
|
||||
let url = "https://s3.aero-storage.ldragol.ru/679e5d1145a6ad00843ad3f1/67ddb59fd46303008396ac96%2Fexample.txt".to_string();
|
||||
let user_agent = String::new();
|
||||
|
||||
let results = [
|
||||
fetch_specified(&url, user_agent.clone(), true).await,
|
||||
fetch_specified(&url, user_agent.clone(), false).await,
|
||||
];
|
||||
|
||||
assert!(results[0].is_err());
|
||||
assert!(results[1].is_err());
|
||||
|
||||
assert_eq!(
|
||||
*results[0].as_ref().err().unwrap(),
|
||||
FetchError::BadContentType
|
||||
);
|
||||
assert_eq!(
|
||||
*results[1].as_ref().err().unwrap(),
|
||||
FetchError::BadContentType
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn ok() {
|
||||
let url = "https://s3.aero-storage.ldragol.ru/679e5d1145a6ad00843ad3f1/67ddb5fad46303008396ac97%2Fschedule.xls".to_string();
|
||||
let user_agent = String::new();
|
||||
|
||||
let results = [
|
||||
fetch_specified(&url, user_agent.clone(), true).await,
|
||||
fetch_specified(&url, user_agent.clone(), false).await,
|
||||
];
|
||||
|
||||
assert!(results[0].is_ok());
|
||||
assert!(results[1].is_ok());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn downloader_set_ok() {
|
||||
let url = "https://s3.aero-storage.ldragol.ru/679e5d1145a6ad00843ad3f1/67ddb5fad46303008396ac97%2Fschedule.xls".to_string();
|
||||
|
||||
let mut downloader = BasicXlsDownloader::new();
|
||||
|
||||
assert!(downloader.set_url(url).await.is_ok());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn downloader_set_err() {
|
||||
let url = "bad_url".to_string();
|
||||
|
||||
let mut downloader = BasicXlsDownloader::new();
|
||||
|
||||
assert!(downloader.set_url(url).await.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn downloader_ok() {
|
||||
let url = "https://s3.aero-storage.ldragol.ru/679e5d1145a6ad00843ad3f1/67ddb5fad46303008396ac97%2Fschedule.xls".to_string();
|
||||
|
||||
let mut downloader = BasicXlsDownloader::new();
|
||||
|
||||
assert!(downloader.set_url(url).await.is_ok());
|
||||
assert!(downloader.fetch(false).await.is_ok());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn downloader_no_url_provided() {
|
||||
let downloader = BasicXlsDownloader::new();
|
||||
let result = downloader.fetch(false).await;
|
||||
|
||||
assert!(result.is_err());
|
||||
assert_eq!(result.err().unwrap(), FetchError::NoUrlProvided);
|
||||
}
|
||||
}
|
||||
44
src/xls_downloader/interface.rs
Normal file
44
src/xls_downloader/interface.rs
Normal file
@@ -0,0 +1,44 @@
|
||||
use chrono::{DateTime, Utc};
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub enum FetchError {
|
||||
NoUrlProvided,
|
||||
Unknown,
|
||||
BadStatusCode,
|
||||
BadContentType,
|
||||
BadHeaders,
|
||||
}
|
||||
|
||||
pub struct FetchOk {
|
||||
pub etag: String,
|
||||
pub uploaded_at: DateTime<Utc>,
|
||||
pub requested_at: DateTime<Utc>,
|
||||
pub data: Option<Vec<u8>>,
|
||||
}
|
||||
|
||||
impl FetchOk {
|
||||
pub fn head(etag: String, uploaded_at: DateTime<Utc>) -> Self {
|
||||
FetchOk {
|
||||
etag,
|
||||
uploaded_at,
|
||||
requested_at: Utc::now(),
|
||||
data: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get(etag: String, uploaded_at: DateTime<Utc>, data: Vec<u8>) -> Self {
|
||||
FetchOk {
|
||||
etag,
|
||||
uploaded_at,
|
||||
requested_at: Utc::now(),
|
||||
data: Some(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub type FetchResult = Result<FetchOk, FetchError>;
|
||||
|
||||
pub trait XLSDownloader {
|
||||
async fn fetch(&self, head: bool) -> FetchResult;
|
||||
async fn set_url(&mut self, url: String) -> Result<(), FetchError>;
|
||||
}
|
||||
2
src/xls_downloader/mod.rs
Normal file
2
src/xls_downloader/mod.rs
Normal file
@@ -0,0 +1,2 @@
|
||||
pub mod basic_impl;
|
||||
pub mod interface;
|
||||
Reference in New Issue
Block a user