mirror of
https://github.com/n08i40k/schedule-parser-next.git
synced 2025-12-06 09:47:46 +03:00
1.0.0
This commit is contained in:
295
src/schedule/internal/schedule-parser/schedule-parser.ts
Normal file
295
src/schedule/internal/schedule-parser/schedule-parser.ts
Normal file
@@ -0,0 +1,295 @@
|
||||
import {
|
||||
XlsDownloaderBase,
|
||||
XlsDownloaderCacheMode,
|
||||
XlsDownloaderResult,
|
||||
} from "../xls-downloader/xls-downloader.base";
|
||||
|
||||
import * as XLSX from "xlsx";
|
||||
import {
|
||||
DayDto,
|
||||
GroupDto,
|
||||
LessonDto,
|
||||
LessonTimeDto,
|
||||
LessonTypeDto,
|
||||
} from "../../../dto/schedule.dto";
|
||||
import { trimAll } from "../../../utility/string.util";
|
||||
|
||||
type InternalId = { row: number; column: number; name: string };
|
||||
type InternalDay = InternalId & { lessons: Array<InternalId> };
|
||||
|
||||
export type ScheduleParseResult = {
|
||||
etag: string;
|
||||
group: GroupDto;
|
||||
affectedDays: Array<number>;
|
||||
};
|
||||
|
||||
export class ScheduleParser {
|
||||
private lastResult: ScheduleParseResult | null = null;
|
||||
|
||||
public constructor(
|
||||
private readonly xlsDownloader: XlsDownloaderBase,
|
||||
private readonly group: string,
|
||||
) {}
|
||||
|
||||
private static getCellName(
|
||||
worksheet: XLSX.Sheet,
|
||||
row: number,
|
||||
column: number,
|
||||
): any | null {
|
||||
const cell = worksheet[XLSX.utils.encode_cell({ r: row, c: column })];
|
||||
return cell ? cell.v : null;
|
||||
}
|
||||
|
||||
private parseTeacherFullNames(lessonName: string): {
|
||||
name: string;
|
||||
teacherFullNames: Array<string>;
|
||||
} {
|
||||
const firstRegex =
|
||||
/(?:[А-ЯЁ][а-яё]+\s[А-ЯЁ]\.[А-ЯЁ]\.(?:\s\([0-9] подгруппа\))?(?:,\s)?)+$/gm;
|
||||
const secondRegex =
|
||||
/(?:[А-ЯЁ][а-яё]+\s[А-ЯЁ]\.[А-ЯЁ]\.(?:\s\([0-9] подгруппа\))?)+/gm;
|
||||
|
||||
const fm = firstRegex.exec(lessonName);
|
||||
if (fm === null) return { name: lessonName, teacherFullNames: [] };
|
||||
|
||||
const teacherFullNames: Array<string> = [];
|
||||
|
||||
let teacherFullNameMatch: RegExpExecArray;
|
||||
while ((teacherFullNameMatch = secondRegex.exec(fm[0])) !== null) {
|
||||
if (teacherFullNameMatch.index === secondRegex.lastIndex)
|
||||
secondRegex.lastIndex++;
|
||||
|
||||
teacherFullNames.push(teacherFullNameMatch[0].trim());
|
||||
}
|
||||
|
||||
if (teacherFullNames.length === 0)
|
||||
return { name: lessonName, teacherFullNames: [] };
|
||||
|
||||
return {
|
||||
name: lessonName.substring(0, fm.index).trim(),
|
||||
teacherFullNames: teacherFullNames,
|
||||
};
|
||||
}
|
||||
|
||||
parseSkeleton(worksheet: XLSX.Sheet): {
|
||||
groupSkeleton: InternalId;
|
||||
daySkeletons: Array<InternalDay>;
|
||||
} {
|
||||
const range = XLSX.utils.decode_range(worksheet["!ref"] || "");
|
||||
let isHeaderParsed: boolean = false;
|
||||
|
||||
let group: InternalId = null;
|
||||
const days: Array<InternalDay> = [];
|
||||
|
||||
for (let row = range.s.r + 1; row <= range.e.r; ++row) {
|
||||
const dayName = ScheduleParser.getCellName(worksheet, row, 0);
|
||||
if (!dayName) continue;
|
||||
|
||||
if (!isHeaderParsed) {
|
||||
isHeaderParsed = true;
|
||||
|
||||
--row;
|
||||
for (
|
||||
let column = range.s.c + 2;
|
||||
column <= range.e.c;
|
||||
++column
|
||||
) {
|
||||
const groupName = ScheduleParser.getCellName(
|
||||
worksheet,
|
||||
row,
|
||||
column,
|
||||
);
|
||||
if (!groupName || this.group !== groupName) continue;
|
||||
|
||||
group = { row: row, column: column, name: groupName };
|
||||
break;
|
||||
}
|
||||
++row;
|
||||
}
|
||||
|
||||
days.push({ row: row, column: 0, name: dayName, lessons: [] });
|
||||
|
||||
if (
|
||||
days.length > 2 &&
|
||||
days[days.length - 2].name.startsWith("Суббота")
|
||||
)
|
||||
break;
|
||||
}
|
||||
|
||||
return { daySkeletons: days, groupSkeleton: group };
|
||||
}
|
||||
|
||||
async getSchedule(
|
||||
forceCached: boolean = false,
|
||||
): Promise<ScheduleParseResult> {
|
||||
let downloadData: XlsDownloaderResult;
|
||||
|
||||
if (
|
||||
!forceCached ||
|
||||
(downloadData = await this.xlsDownloader.getCachedXLS()) === null
|
||||
) {
|
||||
console.debug("Обновление кеша...");
|
||||
downloadData = await this.xlsDownloader.downloadXLS();
|
||||
|
||||
if (
|
||||
!downloadData.new &&
|
||||
this.lastResult &&
|
||||
this.xlsDownloader.getCacheMode() != XlsDownloaderCacheMode.NONE
|
||||
) {
|
||||
console.debug(
|
||||
"Так как скачанный XLS не новый, присутствует уже готовый результат и кеширование не отключено...",
|
||||
);
|
||||
console.debug("будет возвращён предыдущий результат.");
|
||||
|
||||
return this.lastResult;
|
||||
}
|
||||
}
|
||||
|
||||
console.debug("Чтение кешированного XLS документа...");
|
||||
|
||||
const workBook = XLSX.read(downloadData.fileData);
|
||||
const workSheet = workBook.Sheets[workBook.SheetNames[0]];
|
||||
|
||||
const { groupSkeleton, daySkeletons } = this.parseSkeleton(workSheet);
|
||||
|
||||
const group = new GroupDto(groupSkeleton.name);
|
||||
|
||||
for (let dayIdx = 0; dayIdx < daySkeletons.length - 1; ++dayIdx) {
|
||||
const daySkeleton = daySkeletons[dayIdx];
|
||||
const day = new DayDto(daySkeleton.name);
|
||||
|
||||
const lessonTimeColumn = daySkeletons[0].column + 1;
|
||||
const rowDistance = daySkeletons[dayIdx + 1].row - daySkeleton.row;
|
||||
|
||||
for (
|
||||
let row = daySkeleton.row;
|
||||
row < daySkeleton.row + rowDistance;
|
||||
++row
|
||||
) {
|
||||
const time = ScheduleParser.getCellName(
|
||||
workSheet,
|
||||
row,
|
||||
lessonTimeColumn,
|
||||
)?.replaceAll(" ", "");
|
||||
if (!time || typeof time !== "string") continue;
|
||||
|
||||
const rawName = ScheduleParser.getCellName(
|
||||
workSheet,
|
||||
row,
|
||||
groupSkeleton.column,
|
||||
);
|
||||
const cabinets: Array<string> = [];
|
||||
|
||||
const rawCabinets = String(
|
||||
ScheduleParser.getCellName(
|
||||
workSheet,
|
||||
row,
|
||||
groupSkeleton.column + 1,
|
||||
),
|
||||
);
|
||||
if (rawCabinets !== "null") {
|
||||
const rawLessonCabinetParts = rawCabinets.split(/(\n|\s)/g);
|
||||
|
||||
for (const cabinet of rawLessonCabinetParts) {
|
||||
if (
|
||||
cabinet.length === 0 ||
|
||||
cabinet === " " ||
|
||||
cabinet === "\n"
|
||||
)
|
||||
continue;
|
||||
|
||||
cabinets.push(cabinet);
|
||||
}
|
||||
}
|
||||
|
||||
const type =
|
||||
!rawName || rawName.length === 0
|
||||
? LessonTypeDto.NONE
|
||||
: time?.includes("пара")
|
||||
? LessonTypeDto.DEFAULT
|
||||
: LessonTypeDto.CUSTOM;
|
||||
|
||||
const { name, teacherFullNames } = this.parseTeacherFullNames(
|
||||
trimAll(rawName?.replace("\n", "") ?? ""),
|
||||
);
|
||||
|
||||
day.lessons.push(
|
||||
new LessonDto(
|
||||
type,
|
||||
LessonTimeDto.fromString(
|
||||
type === LessonTypeDto.DEFAULT
|
||||
? time.substring(5)
|
||||
: time,
|
||||
),
|
||||
name,
|
||||
cabinets,
|
||||
teacherFullNames,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
day.fillIndices();
|
||||
group.days.push(day);
|
||||
}
|
||||
|
||||
return (this.lastResult = {
|
||||
etag: downloadData.etag,
|
||||
group: group,
|
||||
affectedDays: this.getAffectedDays(this.lastResult?.group, group),
|
||||
});
|
||||
}
|
||||
|
||||
public getLastResult(): ScheduleParseResult | null {
|
||||
return this.lastResult;
|
||||
}
|
||||
|
||||
private getAffectedDays(
|
||||
cachedGroup: GroupDto | null,
|
||||
group: GroupDto,
|
||||
): Array<number> {
|
||||
const affectedDays: Array<number> = [];
|
||||
|
||||
if (!cachedGroup) return affectedDays;
|
||||
|
||||
// noinspection SpellCheckingInspection
|
||||
const dayEquals = (lday: DayDto | null, rday: DayDto): boolean => {
|
||||
if (
|
||||
rday === undefined ||
|
||||
rday.lessons.length != lday.lessons.length
|
||||
)
|
||||
return false;
|
||||
|
||||
for (const lessonIdx in lday.lessons) {
|
||||
// noinspection SpellCheckingInspection
|
||||
const llesson = lday.lessons[lessonIdx];
|
||||
// noinspection SpellCheckingInspection
|
||||
const rlesson = rday.lessons[lessonIdx];
|
||||
if (
|
||||
llesson.name.length > 0 &&
|
||||
(llesson.name !== rlesson.name ||
|
||||
llesson.time.start !== rlesson.time.start ||
|
||||
llesson.time.end !== rlesson.time.end ||
|
||||
llesson.cabinets.toString() !==
|
||||
rlesson.cabinets.toString() ||
|
||||
llesson.teacherNames.toString() !==
|
||||
rlesson.teacherNames.toString())
|
||||
)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
for (const dayIdx in group.days) {
|
||||
// noinspection SpellCheckingInspection
|
||||
const lday = group.days[dayIdx];
|
||||
// noinspection SpellCheckingInspection
|
||||
const rday = cachedGroup.days[dayIdx];
|
||||
|
||||
if (!dayEquals(lday, rday))
|
||||
affectedDays.push(Number.parseInt(dayIdx));
|
||||
}
|
||||
|
||||
return affectedDays;
|
||||
}
|
||||
}
|
||||
92
src/schedule/internal/xls-downloader/basic-xls-downloader.ts
Normal file
92
src/schedule/internal/xls-downloader/basic-xls-downloader.ts
Normal file
@@ -0,0 +1,92 @@
|
||||
import {
|
||||
XlsDownloaderBase,
|
||||
XlsDownloaderCacheMode,
|
||||
XlsDownloaderResult,
|
||||
} from "./xls-downloader.base";
|
||||
import axios from "axios";
|
||||
import { JSDOM } from "jsdom";
|
||||
|
||||
export class BasicXlsDownloader extends XlsDownloaderBase {
|
||||
cache: XlsDownloaderResult | null = null;
|
||||
|
||||
private async getDOM(): Promise<JSDOM> {
|
||||
const response = await axios.get(this.url);
|
||||
|
||||
if (response.status !== 200) {
|
||||
throw new Error(`Не удалось получить данные с основной страницы!
|
||||
Статус код: ${response.status}
|
||||
${response.statusText}`);
|
||||
}
|
||||
|
||||
return new JSDOM(response.data, {
|
||||
url: this.url,
|
||||
contentType: "text/html",
|
||||
});
|
||||
}
|
||||
|
||||
private parseData(dom: JSDOM): {
|
||||
downloadLink: string;
|
||||
updateDate: string;
|
||||
} {
|
||||
const schedule_block = dom.window.document.getElementById("cont-i");
|
||||
if (schedule_block === null)
|
||||
throw new Error("Не удалось найти блок расписаний!");
|
||||
|
||||
const schedules = schedule_block.getElementsByTagName("div");
|
||||
if (schedules === null || schedules.length === 0)
|
||||
throw new Error("Не удалось найти строку с расписанием!");
|
||||
|
||||
const poltavskaya = schedules[0];
|
||||
const link = poltavskaya.getElementsByTagName("a")[0]!;
|
||||
|
||||
const spans = poltavskaya.getElementsByTagName("span");
|
||||
const update_date = spans[3].textContent!.trimStart();
|
||||
|
||||
return {
|
||||
downloadLink: link.href,
|
||||
updateDate: update_date,
|
||||
};
|
||||
}
|
||||
|
||||
public async getCachedXLS(): Promise<XlsDownloaderResult | null> {
|
||||
if (this.cache === null) return null;
|
||||
|
||||
this.cache.new = this.cacheMode === XlsDownloaderCacheMode.HARD;
|
||||
|
||||
return this.cache;
|
||||
}
|
||||
|
||||
public async downloadXLS(): Promise<XlsDownloaderResult> {
|
||||
if (
|
||||
this.cacheMode === XlsDownloaderCacheMode.HARD &&
|
||||
this.cache !== null
|
||||
)
|
||||
return this.getCachedXLS();
|
||||
|
||||
const dom = await this.getDOM();
|
||||
const parse_data = this.parseData(dom);
|
||||
|
||||
const response = await axios.get(parse_data.downloadLink, {
|
||||
responseType: "arraybuffer",
|
||||
});
|
||||
if (response.status !== 200) {
|
||||
throw new Error(`Не удалось получить excel файл!
|
||||
Статус код: ${response.status}
|
||||
${response.statusText}`);
|
||||
}
|
||||
|
||||
const result: XlsDownloaderResult = {
|
||||
fileData: response.data.buffer,
|
||||
updateDate: parse_data.updateDate,
|
||||
etag: response.headers["etag"],
|
||||
new:
|
||||
this.cacheMode === XlsDownloaderCacheMode.NONE
|
||||
? true
|
||||
: this.cache?.etag !== response.headers["etag"],
|
||||
};
|
||||
|
||||
if (this.cacheMode !== XlsDownloaderCacheMode.NONE) this.cache = result;
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
27
src/schedule/internal/xls-downloader/xls-downloader.base.ts
Normal file
27
src/schedule/internal/xls-downloader/xls-downloader.base.ts
Normal file
@@ -0,0 +1,27 @@
|
||||
export type XlsDownloaderResult = {
|
||||
fileData: ArrayBuffer;
|
||||
updateDate: string;
|
||||
etag: string;
|
||||
new: boolean;
|
||||
};
|
||||
|
||||
export enum XlsDownloaderCacheMode {
|
||||
NONE = 0,
|
||||
SOFT, // читать кеш только если не был изменён etag.
|
||||
HARD, // читать кеш всегда, кроме случаев его отсутствия
|
||||
}
|
||||
|
||||
export abstract class XlsDownloaderBase {
|
||||
public constructor(
|
||||
protected readonly url: string,
|
||||
protected readonly cacheMode = XlsDownloaderCacheMode.NONE,
|
||||
) {}
|
||||
|
||||
public abstract downloadXLS(): Promise<XlsDownloaderResult>;
|
||||
|
||||
public abstract getCachedXLS(): Promise<XlsDownloaderResult | null>;
|
||||
|
||||
public getCacheMode(): XlsDownloaderCacheMode {
|
||||
return this.cacheMode;
|
||||
}
|
||||
}
|
||||
36
src/schedule/schedule.controller.ts
Normal file
36
src/schedule/schedule.controller.ts
Normal file
@@ -0,0 +1,36 @@
|
||||
import {
|
||||
Controller,
|
||||
Get,
|
||||
HttpCode,
|
||||
HttpStatus,
|
||||
UseGuards,
|
||||
} from "@nestjs/common";
|
||||
import { AuthGuard } from "../auth/auth.guard";
|
||||
import { ScheduleService } from "./schedule.service";
|
||||
import { ScheduleDto } from "../dto/schedule.dto";
|
||||
import { ResultDto } from "../utility/validation/class-validator.interceptor";
|
||||
import {
|
||||
ApiExtraModels,
|
||||
ApiOkResponse,
|
||||
ApiOperation,
|
||||
refs,
|
||||
} from "@nestjs/swagger";
|
||||
|
||||
@Controller("api/v1/schedule")
|
||||
@UseGuards(AuthGuard)
|
||||
export class ScheduleController {
|
||||
constructor(private scheduleService: ScheduleService) {}
|
||||
|
||||
@ApiExtraModels(ScheduleDto)
|
||||
@ApiOperation({ summary: "Получение расписания", tags: ["schedule"] })
|
||||
@ApiOkResponse({
|
||||
description: "Расписание получено успешно",
|
||||
schema: refs(ScheduleDto)[0],
|
||||
})
|
||||
@ResultDto(ScheduleDto)
|
||||
@HttpCode(HttpStatus.OK)
|
||||
@Get("get")
|
||||
getSchedule(): Promise<ScheduleDto> {
|
||||
return this.scheduleService.getSchedule();
|
||||
}
|
||||
}
|
||||
13
src/schedule/schedule.module.ts
Normal file
13
src/schedule/schedule.module.ts
Normal file
@@ -0,0 +1,13 @@
|
||||
import { Module } from "@nestjs/common";
|
||||
import { ScheduleService } from "./schedule.service";
|
||||
import { ScheduleController } from "./schedule.controller";
|
||||
import { UsersService } from "../users/users.service";
|
||||
import { PrismaService } from "../prisma/prisma.service";
|
||||
|
||||
@Module({
|
||||
imports: [],
|
||||
providers: [ScheduleService, UsersService, PrismaService],
|
||||
controllers: [ScheduleController],
|
||||
exports: [ScheduleService],
|
||||
})
|
||||
export class ScheduleModule {}
|
||||
43
src/schedule/schedule.service.ts
Normal file
43
src/schedule/schedule.service.ts
Normal file
@@ -0,0 +1,43 @@
|
||||
import { Injectable } from "@nestjs/common";
|
||||
import {
|
||||
ScheduleParser,
|
||||
ScheduleParseResult,
|
||||
} from "./internal/schedule-parser/schedule-parser";
|
||||
import { BasicXlsDownloader } from "./internal/xls-downloader/basic-xls-downloader";
|
||||
import { XlsDownloaderCacheMode } from "./internal/xls-downloader/xls-downloader.base";
|
||||
import { ScheduleDto } from "../dto/schedule.dto";
|
||||
|
||||
@Injectable()
|
||||
export class ScheduleService {
|
||||
private readonly scheduleParser = new ScheduleParser(
|
||||
new BasicXlsDownloader(
|
||||
"https://politehnikum-eng.ru/index/raspisanie_zanjatij/0-409",
|
||||
XlsDownloaderCacheMode.SOFT,
|
||||
),
|
||||
"ИС-214/23",
|
||||
);
|
||||
|
||||
private lastCacheUpdate: Date = new Date(0);
|
||||
private lastChangedDays: Array<number> = [];
|
||||
|
||||
constructor() {}
|
||||
|
||||
async getSchedule(): Promise<ScheduleDto> {
|
||||
const now = new Date();
|
||||
const cacheExpired =
|
||||
(this.lastCacheUpdate.valueOf() - now.valueOf()) / 1000 / 60 > 5;
|
||||
|
||||
if (cacheExpired) this.lastCacheUpdate = now;
|
||||
|
||||
const schedule = await this.scheduleParser.getSchedule(!cacheExpired);
|
||||
if (schedule.affectedDays.length !== 0)
|
||||
this.lastChangedDays = schedule.affectedDays;
|
||||
|
||||
return {
|
||||
updatedAt: this.lastCacheUpdate,
|
||||
data: schedule.group,
|
||||
etag: schedule.etag,
|
||||
lastChangedDays: this.lastChangedDays,
|
||||
};
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user