Скачивание XLS документа по ссылке

This commit is contained in:
2025-03-21 23:55:16 +04:00
parent d75d3fbc97
commit 126ba23001
9 changed files with 1541 additions and 35 deletions

View File

@@ -1,9 +1,25 @@
use std::fs;
use std::path::Path;
use crate::xls_downloader::basic_impl::BasicXlsDownloader;
use crate::xls_downloader::interface::XLSDownloader;
use schedule_parser::parse_xls;
use std::{env, fs};
fn main() {
let (teachers, groups) = parse_xls(Path::new("./schedule.xls"));
mod xls_downloader;
#[tokio::main]
async fn main() {
let args: Vec<String> = env::args().collect();
assert_ne!(args.len(), 1);
let mut downloader = BasicXlsDownloader::new();
downloader
.set_url(args[1].to_string())
.await
.expect("Failed to set url");
let fetch_res = downloader.fetch(false).await.expect("Failed to fetch xls");
let (teachers, groups) = parse_xls(fetch_res.data.as_ref().unwrap());
fs::write(
"./schedule.json",
@@ -12,7 +28,7 @@ fn main() {
.as_bytes(),
)
.expect("Failed to write schedule");
fs::write(
"./teachers.json",
serde_json::to_string_pretty(&teachers)

View File

@@ -0,0 +1,219 @@
use crate::xls_downloader::interface::{FetchError, FetchOk, FetchResult, XLSDownloader};
use chrono::{DateTime, Utc};
pub struct BasicXlsDownloader {
url: Option<String>,
}
async fn fetch_specified(url: &String, user_agent: String, head: bool) -> FetchResult {
let client = reqwest::Client::new();
let response = if head {
client.head(url)
} else {
client.get(url)
}
.header("User-Agent", user_agent)
.send()
.await;
match response {
Ok(r) => {
if r.status().as_u16() != 200 {
return Err(FetchError::BadStatusCode);
}
let headers = r.headers();
let content_type = headers.get("Content-Type");
let etag = headers.get("etag");
let last_modified = headers.get("last-modified");
let date = headers.get("date");
if content_type.is_none() || etag.is_none() || last_modified.is_none() || date.is_none()
{
Err(FetchError::BadHeaders)
} else if content_type.unwrap() != "application/vnd.ms-excel" {
Err(FetchError::BadContentType)
} else {
let etag = etag.unwrap().to_str().unwrap().to_string();
let last_modified =
DateTime::parse_from_rfc2822(&last_modified.unwrap().to_str().unwrap())
.unwrap()
.with_timezone(&Utc);
Ok(if head {
FetchOk::head(etag, last_modified)
} else {
FetchOk::get(etag, last_modified, r.bytes().await.unwrap().to_vec())
})
}
}
Err(_) => Err(FetchError::Unknown),
}
}
impl BasicXlsDownloader {
pub fn new() -> Self {
BasicXlsDownloader { url: None }
}
}
impl XLSDownloader for BasicXlsDownloader {
async fn fetch(&self, head: bool) -> FetchResult {
if self.url.is_none() {
Err(FetchError::NoUrlProvided)
} else {
fetch_specified(
self.url.as_ref().unwrap(),
"t.me/polytechnic_next".to_string(),
head,
)
.await
}
}
async fn set_url(&mut self, url: String) -> Result<(), FetchError> {
let result = fetch_specified(&url, "t.me/polytechnic_next".to_string(), true).await;
if let Ok(_) = result {
Ok(self.url = Some(url))
} else {
Err(result.err().unwrap())
}
}
}
#[cfg(test)]
mod tests {
use crate::xls_downloader::basic_impl::{BasicXlsDownloader, fetch_specified};
use crate::xls_downloader::interface::{FetchError, XLSDownloader};
#[tokio::test]
async fn bad_url() {
let url = "bad_url".to_string();
let user_agent = String::new();
let results = [
fetch_specified(&url, user_agent.clone(), true).await,
fetch_specified(&url, user_agent.clone(), false).await,
];
assert!(results[0].is_err());
assert!(results[1].is_err());
}
#[tokio::test]
async fn bad_status_code() {
let url = "https://www.google.com/not-found".to_string();
let user_agent = String::new();
let results = [
fetch_specified(&url, user_agent.clone(), true).await,
fetch_specified(&url, user_agent.clone(), false).await,
];
assert!(results[0].is_err());
assert!(results[1].is_err());
assert_eq!(
*results[0].as_ref().err().unwrap(),
FetchError::BadStatusCode
);
assert_eq!(
*results[1].as_ref().err().unwrap(),
FetchError::BadStatusCode
);
}
#[tokio::test]
async fn bad_headers() {
let url = "https://www.google.com/favicon.ico".to_string();
let user_agent = String::new();
let results = [
fetch_specified(&url, user_agent.clone(), true).await,
fetch_specified(&url, user_agent.clone(), false).await,
];
assert!(results[0].is_err());
assert!(results[1].is_err());
assert_eq!(*results[0].as_ref().err().unwrap(), FetchError::BadHeaders);
assert_eq!(*results[1].as_ref().err().unwrap(), FetchError::BadHeaders);
}
#[tokio::test]
async fn bad_content_type() {
let url = "https://s3.aero-storage.ldragol.ru/679e5d1145a6ad00843ad3f1/67ddb59fd46303008396ac96%2Fexample.txt".to_string();
let user_agent = String::new();
let results = [
fetch_specified(&url, user_agent.clone(), true).await,
fetch_specified(&url, user_agent.clone(), false).await,
];
assert!(results[0].is_err());
assert!(results[1].is_err());
assert_eq!(
*results[0].as_ref().err().unwrap(),
FetchError::BadContentType
);
assert_eq!(
*results[1].as_ref().err().unwrap(),
FetchError::BadContentType
);
}
#[tokio::test]
async fn ok() {
let url = "https://s3.aero-storage.ldragol.ru/679e5d1145a6ad00843ad3f1/67ddb5fad46303008396ac97%2Fschedule.xls".to_string();
let user_agent = String::new();
let results = [
fetch_specified(&url, user_agent.clone(), true).await,
fetch_specified(&url, user_agent.clone(), false).await,
];
assert!(results[0].is_ok());
assert!(results[1].is_ok());
}
#[tokio::test]
async fn downloader_set_ok() {
let url = "https://s3.aero-storage.ldragol.ru/679e5d1145a6ad00843ad3f1/67ddb5fad46303008396ac97%2Fschedule.xls".to_string();
let mut downloader = BasicXlsDownloader::new();
assert!(downloader.set_url(url).await.is_ok());
}
#[tokio::test]
async fn downloader_set_err() {
let url = "bad_url".to_string();
let mut downloader = BasicXlsDownloader::new();
assert!(downloader.set_url(url).await.is_err());
}
#[tokio::test]
async fn downloader_ok() {
let url = "https://s3.aero-storage.ldragol.ru/679e5d1145a6ad00843ad3f1/67ddb5fad46303008396ac97%2Fschedule.xls".to_string();
let mut downloader = BasicXlsDownloader::new();
assert!(downloader.set_url(url).await.is_ok());
assert!(downloader.fetch(false).await.is_ok());
}
#[tokio::test]
async fn downloader_no_url_provided() {
let downloader = BasicXlsDownloader::new();
let result = downloader.fetch(false).await;
assert!(result.is_err());
assert_eq!(result.err().unwrap(), FetchError::NoUrlProvided);
}
}

View File

@@ -0,0 +1,44 @@
use chrono::{DateTime, Utc};
#[derive(PartialEq, Debug)]
pub enum FetchError {
NoUrlProvided,
Unknown,
BadStatusCode,
BadContentType,
BadHeaders,
}
pub struct FetchOk {
pub etag: String,
pub uploaded_at: DateTime<Utc>,
pub requested_at: DateTime<Utc>,
pub data: Option<Vec<u8>>,
}
impl FetchOk {
pub fn head(etag: String, uploaded_at: DateTime<Utc>) -> Self {
FetchOk {
etag,
uploaded_at,
requested_at: Utc::now(),
data: None,
}
}
pub fn get(etag: String, uploaded_at: DateTime<Utc>, data: Vec<u8>) -> Self {
FetchOk {
etag,
uploaded_at,
requested_at: Utc::now(),
data: Some(data),
}
}
}
pub type FetchResult = Result<FetchOk, FetchError>;
pub trait XLSDownloader {
async fn fetch(&self, head: bool) -> FetchResult;
async fn set_url(&mut self, url: String) -> Result<(), FetchError>;
}

View File

@@ -0,0 +1,2 @@
pub mod basic_impl;
pub mod interface;