This commit is contained in:
2024-09-06 23:13:44 +04:00
parent 2b2018c317
commit 31906fbbd1
29 changed files with 2061 additions and 90 deletions

View File

@@ -0,0 +1,295 @@
import {
XlsDownloaderBase,
XlsDownloaderCacheMode,
XlsDownloaderResult,
} from "../xls-downloader/xls-downloader.base";
import * as XLSX from "xlsx";
import {
DayDto,
GroupDto,
LessonDto,
LessonTimeDto,
LessonTypeDto,
} from "../../../dto/schedule.dto";
import { trimAll } from "../../../utility/string.util";
type InternalId = { row: number; column: number; name: string };
type InternalDay = InternalId & { lessons: Array<InternalId> };
export type ScheduleParseResult = {
etag: string;
group: GroupDto;
affectedDays: Array<number>;
};
export class ScheduleParser {
private lastResult: ScheduleParseResult | null = null;
public constructor(
private readonly xlsDownloader: XlsDownloaderBase,
private readonly group: string,
) {}
private static getCellName(
worksheet: XLSX.Sheet,
row: number,
column: number,
): any | null {
const cell = worksheet[XLSX.utils.encode_cell({ r: row, c: column })];
return cell ? cell.v : null;
}
private parseTeacherFullNames(lessonName: string): {
name: string;
teacherFullNames: Array<string>;
} {
const firstRegex =
/(?:[А-ЯЁ][а-яё]+\s[А-ЯЁ]\.[А-ЯЁ]\.(?:\s\([0-9] подгруппа\))?(?:,\s)?)+$/gm;
const secondRegex =
/(?:[А-ЯЁ][а-яё]+\s[А-ЯЁ]\.[А-ЯЁ]\.(?:\s\([0-9] подгруппа\))?)+/gm;
const fm = firstRegex.exec(lessonName);
if (fm === null) return { name: lessonName, teacherFullNames: [] };
const teacherFullNames: Array<string> = [];
let teacherFullNameMatch: RegExpExecArray;
while ((teacherFullNameMatch = secondRegex.exec(fm[0])) !== null) {
if (teacherFullNameMatch.index === secondRegex.lastIndex)
secondRegex.lastIndex++;
teacherFullNames.push(teacherFullNameMatch[0].trim());
}
if (teacherFullNames.length === 0)
return { name: lessonName, teacherFullNames: [] };
return {
name: lessonName.substring(0, fm.index).trim(),
teacherFullNames: teacherFullNames,
};
}
parseSkeleton(worksheet: XLSX.Sheet): {
groupSkeleton: InternalId;
daySkeletons: Array<InternalDay>;
} {
const range = XLSX.utils.decode_range(worksheet["!ref"] || "");
let isHeaderParsed: boolean = false;
let group: InternalId = null;
const days: Array<InternalDay> = [];
for (let row = range.s.r + 1; row <= range.e.r; ++row) {
const dayName = ScheduleParser.getCellName(worksheet, row, 0);
if (!dayName) continue;
if (!isHeaderParsed) {
isHeaderParsed = true;
--row;
for (
let column = range.s.c + 2;
column <= range.e.c;
++column
) {
const groupName = ScheduleParser.getCellName(
worksheet,
row,
column,
);
if (!groupName || this.group !== groupName) continue;
group = { row: row, column: column, name: groupName };
break;
}
++row;
}
days.push({ row: row, column: 0, name: dayName, lessons: [] });
if (
days.length > 2 &&
days[days.length - 2].name.startsWith("Суббота")
)
break;
}
return { daySkeletons: days, groupSkeleton: group };
}
async getSchedule(
forceCached: boolean = false,
): Promise<ScheduleParseResult> {
let downloadData: XlsDownloaderResult;
if (
!forceCached ||
(downloadData = await this.xlsDownloader.getCachedXLS()) === null
) {
console.debug("Обновление кеша...");
downloadData = await this.xlsDownloader.downloadXLS();
if (
!downloadData.new &&
this.lastResult &&
this.xlsDownloader.getCacheMode() != XlsDownloaderCacheMode.NONE
) {
console.debug(
"Так как скачанный XLS не новый, присутствует уже готовый результат и кеширование не отключено...",
);
console.debug("будет возвращён предыдущий результат.");
return this.lastResult;
}
}
console.debug("Чтение кешированного XLS документа...");
const workBook = XLSX.read(downloadData.fileData);
const workSheet = workBook.Sheets[workBook.SheetNames[0]];
const { groupSkeleton, daySkeletons } = this.parseSkeleton(workSheet);
const group = new GroupDto(groupSkeleton.name);
for (let dayIdx = 0; dayIdx < daySkeletons.length - 1; ++dayIdx) {
const daySkeleton = daySkeletons[dayIdx];
const day = new DayDto(daySkeleton.name);
const lessonTimeColumn = daySkeletons[0].column + 1;
const rowDistance = daySkeletons[dayIdx + 1].row - daySkeleton.row;
for (
let row = daySkeleton.row;
row < daySkeleton.row + rowDistance;
++row
) {
const time = ScheduleParser.getCellName(
workSheet,
row,
lessonTimeColumn,
)?.replaceAll(" ", "");
if (!time || typeof time !== "string") continue;
const rawName = ScheduleParser.getCellName(
workSheet,
row,
groupSkeleton.column,
);
const cabinets: Array<string> = [];
const rawCabinets = String(
ScheduleParser.getCellName(
workSheet,
row,
groupSkeleton.column + 1,
),
);
if (rawCabinets !== "null") {
const rawLessonCabinetParts = rawCabinets.split(/(\n|\s)/g);
for (const cabinet of rawLessonCabinetParts) {
if (
cabinet.length === 0 ||
cabinet === " " ||
cabinet === "\n"
)
continue;
cabinets.push(cabinet);
}
}
const type =
!rawName || rawName.length === 0
? LessonTypeDto.NONE
: time?.includes("пара")
? LessonTypeDto.DEFAULT
: LessonTypeDto.CUSTOM;
const { name, teacherFullNames } = this.parseTeacherFullNames(
trimAll(rawName?.replace("\n", "") ?? ""),
);
day.lessons.push(
new LessonDto(
type,
LessonTimeDto.fromString(
type === LessonTypeDto.DEFAULT
? time.substring(5)
: time,
),
name,
cabinets,
teacherFullNames,
),
);
}
day.fillIndices();
group.days.push(day);
}
return (this.lastResult = {
etag: downloadData.etag,
group: group,
affectedDays: this.getAffectedDays(this.lastResult?.group, group),
});
}
public getLastResult(): ScheduleParseResult | null {
return this.lastResult;
}
private getAffectedDays(
cachedGroup: GroupDto | null,
group: GroupDto,
): Array<number> {
const affectedDays: Array<number> = [];
if (!cachedGroup) return affectedDays;
// noinspection SpellCheckingInspection
const dayEquals = (lday: DayDto | null, rday: DayDto): boolean => {
if (
rday === undefined ||
rday.lessons.length != lday.lessons.length
)
return false;
for (const lessonIdx in lday.lessons) {
// noinspection SpellCheckingInspection
const llesson = lday.lessons[lessonIdx];
// noinspection SpellCheckingInspection
const rlesson = rday.lessons[lessonIdx];
if (
llesson.name.length > 0 &&
(llesson.name !== rlesson.name ||
llesson.time.start !== rlesson.time.start ||
llesson.time.end !== rlesson.time.end ||
llesson.cabinets.toString() !==
rlesson.cabinets.toString() ||
llesson.teacherNames.toString() !==
rlesson.teacherNames.toString())
)
return false;
}
return true;
};
for (const dayIdx in group.days) {
// noinspection SpellCheckingInspection
const lday = group.days[dayIdx];
// noinspection SpellCheckingInspection
const rday = cachedGroup.days[dayIdx];
if (!dayEquals(lday, rday))
affectedDays.push(Number.parseInt(dayIdx));
}
return affectedDays;
}
}

View File

@@ -0,0 +1,92 @@
import {
XlsDownloaderBase,
XlsDownloaderCacheMode,
XlsDownloaderResult,
} from "./xls-downloader.base";
import axios from "axios";
import { JSDOM } from "jsdom";
export class BasicXlsDownloader extends XlsDownloaderBase {
cache: XlsDownloaderResult | null = null;
private async getDOM(): Promise<JSDOM> {
const response = await axios.get(this.url);
if (response.status !== 200) {
throw new Error(`Не удалось получить данные с основной страницы!
Статус код: ${response.status}
${response.statusText}`);
}
return new JSDOM(response.data, {
url: this.url,
contentType: "text/html",
});
}
private parseData(dom: JSDOM): {
downloadLink: string;
updateDate: string;
} {
const schedule_block = dom.window.document.getElementById("cont-i");
if (schedule_block === null)
throw new Error("Не удалось найти блок расписаний!");
const schedules = schedule_block.getElementsByTagName("div");
if (schedules === null || schedules.length === 0)
throw new Error("Не удалось найти строку с расписанием!");
const poltavskaya = schedules[0];
const link = poltavskaya.getElementsByTagName("a")[0]!;
const spans = poltavskaya.getElementsByTagName("span");
const update_date = spans[3].textContent!.trimStart();
return {
downloadLink: link.href,
updateDate: update_date,
};
}
public async getCachedXLS(): Promise<XlsDownloaderResult | null> {
if (this.cache === null) return null;
this.cache.new = this.cacheMode === XlsDownloaderCacheMode.HARD;
return this.cache;
}
public async downloadXLS(): Promise<XlsDownloaderResult> {
if (
this.cacheMode === XlsDownloaderCacheMode.HARD &&
this.cache !== null
)
return this.getCachedXLS();
const dom = await this.getDOM();
const parse_data = this.parseData(dom);
const response = await axios.get(parse_data.downloadLink, {
responseType: "arraybuffer",
});
if (response.status !== 200) {
throw new Error(`Не удалось получить excel файл!
Статус код: ${response.status}
${response.statusText}`);
}
const result: XlsDownloaderResult = {
fileData: response.data.buffer,
updateDate: parse_data.updateDate,
etag: response.headers["etag"],
new:
this.cacheMode === XlsDownloaderCacheMode.NONE
? true
: this.cache?.etag !== response.headers["etag"],
};
if (this.cacheMode !== XlsDownloaderCacheMode.NONE) this.cache = result;
return result;
}
}

View File

@@ -0,0 +1,27 @@
export type XlsDownloaderResult = {
fileData: ArrayBuffer;
updateDate: string;
etag: string;
new: boolean;
};
export enum XlsDownloaderCacheMode {
NONE = 0,
SOFT, // читать кеш только если не был изменён etag.
HARD, // читать кеш всегда, кроме случаев его отсутствия
}
export abstract class XlsDownloaderBase {
public constructor(
protected readonly url: string,
protected readonly cacheMode = XlsDownloaderCacheMode.NONE,
) {}
public abstract downloadXLS(): Promise<XlsDownloaderResult>;
public abstract getCachedXLS(): Promise<XlsDownloaderResult | null>;
public getCacheMode(): XlsDownloaderCacheMode {
return this.cacheMode;
}
}

View File

@@ -0,0 +1,36 @@
import {
Controller,
Get,
HttpCode,
HttpStatus,
UseGuards,
} from "@nestjs/common";
import { AuthGuard } from "../auth/auth.guard";
import { ScheduleService } from "./schedule.service";
import { ScheduleDto } from "../dto/schedule.dto";
import { ResultDto } from "../utility/validation/class-validator.interceptor";
import {
ApiExtraModels,
ApiOkResponse,
ApiOperation,
refs,
} from "@nestjs/swagger";
@Controller("api/v1/schedule")
@UseGuards(AuthGuard)
export class ScheduleController {
constructor(private scheduleService: ScheduleService) {}
@ApiExtraModels(ScheduleDto)
@ApiOperation({ summary: "Получение расписания", tags: ["schedule"] })
@ApiOkResponse({
description: "Расписание получено успешно",
schema: refs(ScheduleDto)[0],
})
@ResultDto(ScheduleDto)
@HttpCode(HttpStatus.OK)
@Get("get")
getSchedule(): Promise<ScheduleDto> {
return this.scheduleService.getSchedule();
}
}

View File

@@ -0,0 +1,13 @@
import { Module } from "@nestjs/common";
import { ScheduleService } from "./schedule.service";
import { ScheduleController } from "./schedule.controller";
import { UsersService } from "../users/users.service";
import { PrismaService } from "../prisma/prisma.service";
@Module({
imports: [],
providers: [ScheduleService, UsersService, PrismaService],
controllers: [ScheduleController],
exports: [ScheduleService],
})
export class ScheduleModule {}

View File

@@ -0,0 +1,43 @@
import { Injectable } from "@nestjs/common";
import {
ScheduleParser,
ScheduleParseResult,
} from "./internal/schedule-parser/schedule-parser";
import { BasicXlsDownloader } from "./internal/xls-downloader/basic-xls-downloader";
import { XlsDownloaderCacheMode } from "./internal/xls-downloader/xls-downloader.base";
import { ScheduleDto } from "../dto/schedule.dto";
@Injectable()
export class ScheduleService {
private readonly scheduleParser = new ScheduleParser(
new BasicXlsDownloader(
"https://politehnikum-eng.ru/index/raspisanie_zanjatij/0-409",
XlsDownloaderCacheMode.SOFT,
),
"ИС-214/23",
);
private lastCacheUpdate: Date = new Date(0);
private lastChangedDays: Array<number> = [];
constructor() {}
async getSchedule(): Promise<ScheduleDto> {
const now = new Date();
const cacheExpired =
(this.lastCacheUpdate.valueOf() - now.valueOf()) / 1000 / 60 > 5;
if (cacheExpired) this.lastCacheUpdate = now;
const schedule = await this.scheduleParser.getSchedule(!cacheExpired);
if (schedule.affectedDays.length !== 0)
this.lastChangedDays = schedule.affectedDays;
return {
updatedAt: this.lastCacheUpdate,
data: schedule.group,
etag: schedule.etag,
lastChangedDays: this.lastChangedDays,
};
}
}