mirror of
https://github.com/n08i40k/schedule-parser-next.git
synced 2025-12-06 17:57:45 +03:00
Обход бана по IP за частые обращения к сайту политехникума за счёт скачивания кода страницы на стороне клиентов.
This commit is contained in:
@@ -5,47 +5,57 @@ import {
|
||||
} from "./xls-downloader.base";
|
||||
import axios from "axios";
|
||||
import { JSDOM } from "jsdom";
|
||||
import { NotAcceptableException } from "@nestjs/common";
|
||||
|
||||
export class BasicXlsDownloader extends XlsDownloaderBase {
|
||||
cache: XlsDownloaderResult | null = null;
|
||||
preparedData: { downloadLink: string; updateDate: string } | null = null;
|
||||
private lastUpdate: number = 0;
|
||||
|
||||
private async getDOM(): Promise<JSDOM> {
|
||||
const response = await axios.get(this.url);
|
||||
|
||||
if (response.status !== 200) {
|
||||
throw new Error(`Не удалось получить данные с основной страницы!
|
||||
Статус код: ${response.status}
|
||||
${response.statusText}`);
|
||||
private async getDOM(preparedData: any): Promise<JSDOM | null> {
|
||||
try {
|
||||
return new JSDOM(atob(preparedData), {
|
||||
url: this.url,
|
||||
contentType: "text/html",
|
||||
});
|
||||
} catch {
|
||||
throw new NotAcceptableException(
|
||||
"Передан некорректный код страницы",
|
||||
);
|
||||
}
|
||||
|
||||
return new JSDOM(response.data, {
|
||||
url: this.url,
|
||||
contentType: "text/html",
|
||||
});
|
||||
}
|
||||
|
||||
private parseData(dom: JSDOM): {
|
||||
downloadLink: string;
|
||||
updateDate: string;
|
||||
} {
|
||||
const scheduleBlock = dom.window.document.getElementById("cont-i");
|
||||
if (scheduleBlock === null)
|
||||
throw new Error("Не удалось найти блок расписаний!");
|
||||
try {
|
||||
const scheduleBlock = dom.window.document.getElementById("cont-i");
|
||||
if (scheduleBlock === null)
|
||||
// noinspection ExceptionCaughtLocallyJS
|
||||
throw new Error("Не удалось найти блок расписаний!");
|
||||
|
||||
const schedules = scheduleBlock.getElementsByTagName("div");
|
||||
if (schedules === null || schedules.length === 0)
|
||||
throw new Error("Не удалось найти строку с расписанием!");
|
||||
const schedules = scheduleBlock.getElementsByTagName("div");
|
||||
if (schedules === null || schedules.length === 0)
|
||||
// noinspection ExceptionCaughtLocallyJS
|
||||
throw new Error("Не удалось найти строку с расписанием!");
|
||||
|
||||
const poltavskaya = schedules[0];
|
||||
const link = poltavskaya.getElementsByTagName("a")[0]!;
|
||||
const poltavskaya = schedules[0];
|
||||
const link = poltavskaya.getElementsByTagName("a")[0]!;
|
||||
|
||||
const spans = poltavskaya.getElementsByTagName("span");
|
||||
const updateDate = spans[3].textContent!.trimStart();
|
||||
const spans = poltavskaya.getElementsByTagName("span");
|
||||
const updateDate = spans[3].textContent!.trimStart();
|
||||
|
||||
return {
|
||||
downloadLink: link.href,
|
||||
updateDate: updateDate,
|
||||
};
|
||||
return {
|
||||
downloadLink: link.href,
|
||||
updateDate: updateDate,
|
||||
};
|
||||
} catch (exception) {
|
||||
console.error(exception);
|
||||
throw new NotAcceptableException(
|
||||
"Передан некорректный код страницы",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
public async getCachedXLS(): Promise<XlsDownloaderResult | null> {
|
||||
@@ -56,6 +66,17 @@ ${response.statusText}`);
|
||||
return this.cache;
|
||||
}
|
||||
|
||||
public isUpdateRequired(): boolean {
|
||||
return (Date.now() - this.lastUpdate) / 1000 / 60 > 5;
|
||||
}
|
||||
|
||||
public async setPreparedData(preparedData: string): Promise<void> {
|
||||
const dom = await this.getDOM(preparedData);
|
||||
this.preparedData = this.parseData(dom);
|
||||
|
||||
this.lastUpdate = Date.now();
|
||||
}
|
||||
|
||||
public async downloadXLS(): Promise<XlsDownloaderResult> {
|
||||
if (
|
||||
this.cacheMode === XlsDownloaderCacheMode.HARD &&
|
||||
@@ -63,12 +84,18 @@ ${response.statusText}`);
|
||||
)
|
||||
return this.getCachedXLS();
|
||||
|
||||
const dom = await this.getDOM();
|
||||
const parseData = this.parseData(dom);
|
||||
if (!this.preparedData) {
|
||||
return {
|
||||
updateRequired: true,
|
||||
etag: "",
|
||||
new: true,
|
||||
fileData: new ArrayBuffer(1),
|
||||
updateDate: "",
|
||||
};
|
||||
}
|
||||
|
||||
// FIX-ME: Что такое Annotator и почему он выдаёт пустое предупреждение?
|
||||
// noinspection Annotator
|
||||
const response = await axios.get(parseData.downloadLink, {
|
||||
const response = await axios.get(this.preparedData.downloadLink, {
|
||||
responseType: "arraybuffer",
|
||||
});
|
||||
if (response.status !== 200) {
|
||||
@@ -79,12 +106,13 @@ ${response.statusText}`);
|
||||
|
||||
const result: XlsDownloaderResult = {
|
||||
fileData: response.data.buffer,
|
||||
updateDate: parseData.updateDate,
|
||||
updateDate: this.preparedData.updateDate,
|
||||
etag: response.headers["etag"],
|
||||
new:
|
||||
this.cacheMode === XlsDownloaderCacheMode.NONE
|
||||
? true
|
||||
: this.cache?.etag !== response.headers["etag"],
|
||||
updateRequired: this.isUpdateRequired(),
|
||||
};
|
||||
|
||||
if (this.cacheMode !== XlsDownloaderCacheMode.NONE) this.cache = result;
|
||||
|
||||
Reference in New Issue
Block a user