mirror of
https://github.com/n08i40k/schedule-parser-next.git
synced 2025-12-06 09:47:46 +03:00
Обход бана по IP за частые обращения к сайту политехникума за счёт скачивания кода страницы на стороне клиентов.
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
import {
|
||||
IsArray,
|
||||
IsBase64,
|
||||
IsBoolean,
|
||||
IsDate,
|
||||
IsEnum,
|
||||
IsNumber,
|
||||
@@ -240,6 +242,14 @@ export class ScheduleDto {
|
||||
})
|
||||
@Type(() => Object)
|
||||
lastChangedDays: Array<Array<number>>;
|
||||
|
||||
@ApiProperty({
|
||||
example: false,
|
||||
description:
|
||||
"Требуется ли пользовательское обновление ссылки для скачивания расписания",
|
||||
})
|
||||
@IsBoolean()
|
||||
updateRequired: boolean;
|
||||
}
|
||||
|
||||
export class GroupScheduleRequestDto extends PickType(GroupDto, ["name"]) {}
|
||||
@@ -269,4 +279,21 @@ export class GroupScheduleDto extends OmitType(ScheduleDto, [
|
||||
@ValidateNested({ each: true })
|
||||
@Type(() => Number)
|
||||
lastChangedDays: Array<number>;
|
||||
|
||||
@ApiProperty({
|
||||
example: false,
|
||||
description:
|
||||
"Требуется ли пользовательское обновление ссылки для скачивания расписания",
|
||||
})
|
||||
@IsBoolean()
|
||||
updateRequired: boolean;
|
||||
}
|
||||
|
||||
export class SiteMainPageDto {
|
||||
@ApiProperty({
|
||||
example: "<div></div>",
|
||||
description: "Код страницы политехникума для скачивания",
|
||||
})
|
||||
@IsBase64()
|
||||
mainPage: string;
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@ export class ScheduleParseResult {
|
||||
etag: string;
|
||||
groups: Array<GroupDto>;
|
||||
affectedDays: Array<Array<number>>;
|
||||
updateRequired: boolean;
|
||||
}
|
||||
|
||||
export class ScheduleParser {
|
||||
@@ -114,6 +115,10 @@ export class ScheduleParser {
|
||||
return { daySkeletons: days, groupSkeletons: groups };
|
||||
}
|
||||
|
||||
getXlsDownloader(): XlsDownloaderBase {
|
||||
return this.xlsDownloader;
|
||||
}
|
||||
|
||||
async getSchedule(
|
||||
forceCached: boolean = false,
|
||||
): Promise<ScheduleParseResult> {
|
||||
@@ -121,6 +126,15 @@ export class ScheduleParser {
|
||||
|
||||
const downloadData = await this.xlsDownloader.downloadXLS();
|
||||
|
||||
if (downloadData.updateRequired && downloadData.etag.length === 0) {
|
||||
return {
|
||||
updateRequired: true,
|
||||
groups: [],
|
||||
etag: "",
|
||||
affectedDays: [],
|
||||
};
|
||||
}
|
||||
|
||||
if (
|
||||
!downloadData.new &&
|
||||
this.lastResult &&
|
||||
@@ -241,6 +255,7 @@ export class ScheduleParser {
|
||||
etag: downloadData.etag,
|
||||
groups: groups,
|
||||
affectedDays: this.getAffectedDays(this.lastResult?.groups, groups),
|
||||
updateRequired: downloadData.updateRequired,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -5,47 +5,57 @@ import {
|
||||
} from "./xls-downloader.base";
|
||||
import axios from "axios";
|
||||
import { JSDOM } from "jsdom";
|
||||
import { NotAcceptableException } from "@nestjs/common";
|
||||
|
||||
export class BasicXlsDownloader extends XlsDownloaderBase {
|
||||
cache: XlsDownloaderResult | null = null;
|
||||
preparedData: { downloadLink: string; updateDate: string } | null = null;
|
||||
private lastUpdate: number = 0;
|
||||
|
||||
private async getDOM(): Promise<JSDOM> {
|
||||
const response = await axios.get(this.url);
|
||||
|
||||
if (response.status !== 200) {
|
||||
throw new Error(`Не удалось получить данные с основной страницы!
|
||||
Статус код: ${response.status}
|
||||
${response.statusText}`);
|
||||
private async getDOM(preparedData: any): Promise<JSDOM | null> {
|
||||
try {
|
||||
return new JSDOM(atob(preparedData), {
|
||||
url: this.url,
|
||||
contentType: "text/html",
|
||||
});
|
||||
} catch {
|
||||
throw new NotAcceptableException(
|
||||
"Передан некорректный код страницы",
|
||||
);
|
||||
}
|
||||
|
||||
return new JSDOM(response.data, {
|
||||
url: this.url,
|
||||
contentType: "text/html",
|
||||
});
|
||||
}
|
||||
|
||||
private parseData(dom: JSDOM): {
|
||||
downloadLink: string;
|
||||
updateDate: string;
|
||||
} {
|
||||
const scheduleBlock = dom.window.document.getElementById("cont-i");
|
||||
if (scheduleBlock === null)
|
||||
throw new Error("Не удалось найти блок расписаний!");
|
||||
try {
|
||||
const scheduleBlock = dom.window.document.getElementById("cont-i");
|
||||
if (scheduleBlock === null)
|
||||
// noinspection ExceptionCaughtLocallyJS
|
||||
throw new Error("Не удалось найти блок расписаний!");
|
||||
|
||||
const schedules = scheduleBlock.getElementsByTagName("div");
|
||||
if (schedules === null || schedules.length === 0)
|
||||
throw new Error("Не удалось найти строку с расписанием!");
|
||||
const schedules = scheduleBlock.getElementsByTagName("div");
|
||||
if (schedules === null || schedules.length === 0)
|
||||
// noinspection ExceptionCaughtLocallyJS
|
||||
throw new Error("Не удалось найти строку с расписанием!");
|
||||
|
||||
const poltavskaya = schedules[0];
|
||||
const link = poltavskaya.getElementsByTagName("a")[0]!;
|
||||
const poltavskaya = schedules[0];
|
||||
const link = poltavskaya.getElementsByTagName("a")[0]!;
|
||||
|
||||
const spans = poltavskaya.getElementsByTagName("span");
|
||||
const updateDate = spans[3].textContent!.trimStart();
|
||||
const spans = poltavskaya.getElementsByTagName("span");
|
||||
const updateDate = spans[3].textContent!.trimStart();
|
||||
|
||||
return {
|
||||
downloadLink: link.href,
|
||||
updateDate: updateDate,
|
||||
};
|
||||
return {
|
||||
downloadLink: link.href,
|
||||
updateDate: updateDate,
|
||||
};
|
||||
} catch (exception) {
|
||||
console.error(exception);
|
||||
throw new NotAcceptableException(
|
||||
"Передан некорректный код страницы",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
public async getCachedXLS(): Promise<XlsDownloaderResult | null> {
|
||||
@@ -56,6 +66,17 @@ ${response.statusText}`);
|
||||
return this.cache;
|
||||
}
|
||||
|
||||
public isUpdateRequired(): boolean {
|
||||
return (Date.now() - this.lastUpdate) / 1000 / 60 > 5;
|
||||
}
|
||||
|
||||
public async setPreparedData(preparedData: string): Promise<void> {
|
||||
const dom = await this.getDOM(preparedData);
|
||||
this.preparedData = this.parseData(dom);
|
||||
|
||||
this.lastUpdate = Date.now();
|
||||
}
|
||||
|
||||
public async downloadXLS(): Promise<XlsDownloaderResult> {
|
||||
if (
|
||||
this.cacheMode === XlsDownloaderCacheMode.HARD &&
|
||||
@@ -63,12 +84,18 @@ ${response.statusText}`);
|
||||
)
|
||||
return this.getCachedXLS();
|
||||
|
||||
const dom = await this.getDOM();
|
||||
const parseData = this.parseData(dom);
|
||||
if (!this.preparedData) {
|
||||
return {
|
||||
updateRequired: true,
|
||||
etag: "",
|
||||
new: true,
|
||||
fileData: new ArrayBuffer(1),
|
||||
updateDate: "",
|
||||
};
|
||||
}
|
||||
|
||||
// FIX-ME: Что такое Annotator и почему он выдаёт пустое предупреждение?
|
||||
// noinspection Annotator
|
||||
const response = await axios.get(parseData.downloadLink, {
|
||||
const response = await axios.get(this.preparedData.downloadLink, {
|
||||
responseType: "arraybuffer",
|
||||
});
|
||||
if (response.status !== 200) {
|
||||
@@ -79,12 +106,13 @@ ${response.statusText}`);
|
||||
|
||||
const result: XlsDownloaderResult = {
|
||||
fileData: response.data.buffer,
|
||||
updateDate: parseData.updateDate,
|
||||
updateDate: this.preparedData.updateDate,
|
||||
etag: response.headers["etag"],
|
||||
new:
|
||||
this.cacheMode === XlsDownloaderCacheMode.NONE
|
||||
? true
|
||||
: this.cache?.etag !== response.headers["etag"],
|
||||
updateRequired: this.isUpdateRequired(),
|
||||
};
|
||||
|
||||
if (this.cacheMode !== XlsDownloaderCacheMode.NONE) this.cache = result;
|
||||
|
||||
@@ -3,6 +3,7 @@ export type XlsDownloaderResult = {
|
||||
updateDate: string;
|
||||
etag: string;
|
||||
new: boolean;
|
||||
updateRequired: boolean;
|
||||
};
|
||||
|
||||
export enum XlsDownloaderCacheMode {
|
||||
@@ -21,6 +22,10 @@ export abstract class XlsDownloaderBase {
|
||||
|
||||
public abstract getCachedXLS(): Promise<XlsDownloaderResult | null>;
|
||||
|
||||
public abstract isUpdateRequired(): boolean;
|
||||
|
||||
public abstract setPreparedData(preparedData: string): Promise<void>;
|
||||
|
||||
public getCacheMode(): XlsDownloaderCacheMode {
|
||||
return this.cacheMode;
|
||||
}
|
||||
|
||||
@@ -14,10 +14,12 @@ import {
|
||||
GroupScheduleRequestDto,
|
||||
ScheduleDto,
|
||||
ScheduleGroupsDto,
|
||||
SiteMainPageDto,
|
||||
} from "../dto/schedule.dto";
|
||||
import { ResultDto } from "../utility/validation/class-validator.interceptor";
|
||||
import {
|
||||
ApiExtraModels,
|
||||
ApiNotAcceptableResponse,
|
||||
ApiNotFoundResponse,
|
||||
ApiOkResponse,
|
||||
ApiOperation,
|
||||
@@ -38,8 +40,8 @@ export class ScheduleController {
|
||||
@ResultDto(ScheduleDto)
|
||||
@HttpCode(HttpStatus.OK)
|
||||
@Get("get")
|
||||
getSchedule(): Promise<ScheduleDto> {
|
||||
return this.scheduleService.getSchedule();
|
||||
async getSchedule(): Promise<ScheduleDto> {
|
||||
return await this.scheduleService.getSchedule();
|
||||
}
|
||||
|
||||
@ApiExtraModels(GroupScheduleDto)
|
||||
@@ -55,10 +57,10 @@ export class ScheduleController {
|
||||
@ResultDto(GroupScheduleDto)
|
||||
@HttpCode(HttpStatus.OK)
|
||||
@Post("get-group")
|
||||
getGroupSchedule(
|
||||
async getGroupSchedule(
|
||||
@Body() groupDto: GroupScheduleRequestDto,
|
||||
): Promise<GroupScheduleDto> {
|
||||
return this.scheduleService.getGroup(groupDto.name);
|
||||
return await this.scheduleService.getGroup(groupDto.name);
|
||||
}
|
||||
|
||||
@ApiExtraModels(ScheduleGroupsDto)
|
||||
@@ -75,6 +77,24 @@ export class ScheduleController {
|
||||
@HttpCode(HttpStatus.OK)
|
||||
@Get("get-group-names")
|
||||
async getGroupNames(): Promise<ScheduleGroupsDto> {
|
||||
return this.scheduleService.getGroupNames();
|
||||
return await this.scheduleService.getGroupNames();
|
||||
}
|
||||
|
||||
@ApiExtraModels(SiteMainPageDto)
|
||||
@ApiOperation({
|
||||
summary: "Обновление данных основной страницы политехникума",
|
||||
tags: ["schedule"],
|
||||
})
|
||||
@ApiOkResponse({ description: "Данные обновлены успешно" })
|
||||
@ApiNotAcceptableResponse({
|
||||
description: "Передан некорректный код страницы",
|
||||
})
|
||||
@ResultDto(null)
|
||||
@HttpCode(HttpStatus.OK)
|
||||
@Post("update-site-main-page")
|
||||
async updateSiteMainPage(
|
||||
@Body() siteMainPageDto: SiteMainPageDto,
|
||||
): Promise<void> {
|
||||
return await this.scheduleService.updateSiteMainPage(siteMainPageDto);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,9 @@
|
||||
import { Inject, Injectable, NotFoundException } from "@nestjs/common";
|
||||
import {
|
||||
Inject,
|
||||
Injectable,
|
||||
NotFoundException,
|
||||
ServiceUnavailableException,
|
||||
} from "@nestjs/common";
|
||||
import {
|
||||
ScheduleParser,
|
||||
ScheduleParseResult,
|
||||
@@ -10,6 +15,7 @@ import {
|
||||
GroupScheduleDto,
|
||||
ScheduleDto,
|
||||
ScheduleGroupsDto,
|
||||
SiteMainPageDto,
|
||||
} from "../dto/schedule.dto";
|
||||
import { Cache, CACHE_MANAGER } from "@nestjs/cache-manager";
|
||||
import { instanceToPlain } from "class-transformer";
|
||||
@@ -38,6 +44,11 @@ export class ScheduleService {
|
||||
schedule.groups,
|
||||
) as Array<GroupDto>;
|
||||
|
||||
if (schedule.updateRequired && schedule.etag.length === 0)
|
||||
throw new ServiceUnavailableException(
|
||||
"Отсутствует начальная ссылка на скачивание!",
|
||||
);
|
||||
|
||||
return schedule;
|
||||
});
|
||||
}
|
||||
@@ -66,6 +77,7 @@ export class ScheduleService {
|
||||
groups: ScheduleService.toObject(sourceSchedule.groups),
|
||||
etag: sourceSchedule.etag,
|
||||
lastChangedDays: this.lastChangedDays,
|
||||
updateRequired: sourceSchedule.updateRequired,
|
||||
};
|
||||
});
|
||||
}
|
||||
@@ -84,6 +96,7 @@ export class ScheduleService {
|
||||
group: schedule.groups[group],
|
||||
etag: schedule.etag,
|
||||
lastChangedDays: this.lastChangedDays[group] ?? [],
|
||||
updateRequired: schedule.updateRequired,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -107,4 +120,12 @@ export class ScheduleService {
|
||||
|
||||
return groupNames;
|
||||
}
|
||||
|
||||
async updateSiteMainPage(siteMainPageDto: SiteMainPageDto): Promise<void> {
|
||||
await this.scheduleParser
|
||||
.getXlsDownloader()
|
||||
.setPreparedData(siteMainPageDto.mainPage);
|
||||
|
||||
await this.cacheManager.reset();
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user