Files
schedule-parser-next/src/schedule/internal/schedule-parser/v2-schedule-parser.ts
n08i40k 5fe5d56ca9 2.0.0
Я пока перечислю - умру.
Надо научиться писать changelog постепенно.
2024-10-19 02:12:37 +04:00

674 lines
17 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { XlsDownloaderInterface } from "../xls-downloader/xls-downloader.interface";
import * as XLSX from "xlsx";
import { Range, WorkSheet } from "xlsx";
import { toNormalString, trimAll } from "../../../utility/string.util";
import { plainToClass, plainToInstance } from "class-transformer";
import * as objectHash from "object-hash";
import { V2LessonTimeDto } from "../../dto/v2/v2-lesson-time.dto";
import { V2LessonType } from "../../enum/v2-lesson-type.enum";
import { V2LessonSubGroupDto } from "../../dto/v2/v2-lesson-sub-group.dto";
import { V2LessonDto } from "../../dto/v2/v2-lesson.dto";
import { V2DayDto } from "../../dto/v2/v2-day.dto";
import { V2GroupDto } from "../../dto/v2/v2-group.dto";
import * as assert from "node:assert";
import { ScheduleReplacerService } from "../../schedule-replacer.service";
type InternalId = {
/**
* Индекс строки
*/
row: number;
/**
* Индекс столбца
*/
column: number;
/**
* Текст записи
*/
name: string;
};
type InternalTime = {
/**
* Временной отрезок
*/
timeRange: V2LessonTimeDto;
/**
* Тип пары на этой строке
*/
lessonType: V2LessonType;
/**
* Индекс пары на этой строке
*/
defaultIndex?: number;
/**
* Позиции начальной и конечной записи
*/
xlsxRange: Range;
};
export class V2ScheduleParseResult {
/**
* ETag расписания
*/
etag: string;
/**
* Идентификатор заменённого расписания (ObjectId)
*/
replacerId?: string;
/**
* Дата загрузки расписания на сайт политехникума
*/
uploadedAt: Date;
/**
* Дата загрузки расписания с сайта политехникума
*/
downloadedAt: Date;
/**
* Расписание групп в виде списка.
* Ключ - название группы.
*/
groups: Array<V2GroupDto>;
/**
* Список групп у которых было обновлено расписание с момента последнего обновления файла.
* Ключ - название группы.
*/
updatedGroups: Array<Array<number>>;
}
export class V2ScheduleParser {
private lastResult: V2ScheduleParseResult | null = null;
/**
* @param xlsDownloader - класс для загрузки расписания с сайта политехникума
* @param scheduleReplacerService - сервис для подмены расписания
*/
public constructor(
private readonly xlsDownloader: XlsDownloaderInterface,
private readonly scheduleReplacerService?: ScheduleReplacerService,
) {}
/**
* Получает позиции начальной и конечной записи относительно начальной записи
* @param workSheet - xls лист
* @param topRow - индекс начальной строки
* @param leftColumn - индекс начального столбца
* @returns {Range} - позиции начальной и конечной записи
* @private
* @static
*/
private static getMergeFromStart(
workSheet: XLSX.WorkSheet,
topRow: number,
leftColumn: number,
): Range {
for (const range of workSheet["!merges"]) {
if (topRow === range.s.r && leftColumn === range.s.c) return range;
}
return {
s: { r: topRow, c: leftColumn },
e: { r: topRow, c: leftColumn },
};
}
/**
* Получает текст из требуемой записи
* @param worksheet - xls лист
* @param row - индекс строки
* @param column - индекс столбца
* @returns {string | null} - текст записи, если присутствует
* @private
* @static
*/
private static getCellData(
worksheet: XLSX.WorkSheet,
row: number,
column: number,
): string | null {
const cell: XLSX.CellObject | null =
worksheet[XLSX.utils.encode_cell({ r: row, c: column })];
return toNormalString(cell?.w);
}
/**
* Парсит информацию о паре исходя из текста в записи
* @param lessonName - текст в записи
* @returns {{
* name: string;
* subGroups: Array<V2LessonSubGroupDto>;
* }} - название пары и список подгрупп
* @private
* @static
*/
private static parseNameAndSubGroups(lessonName: string): {
name: string;
subGroups: Array<V2LessonSubGroupDto>;
} {
// хд
const allRegex =
/(?:[А-ЯЁ][а-яё]+\s[А-ЯЁ]\.\s?[А-ЯЁ]\.(?:\s?\([0-9]\s?подгруппа\))?(?:,\s)?)+$/gm;
const teacherAndSubGroupRegex =
/(?:[А-ЯЁ][а-яё]+\s[А-ЯЁ]\.\s?[А-ЯЁ]\.(?:\s?\([0-9]\s?подгруппа\))?)+/gm;
const allMatch = allRegex.exec(lessonName);
// если не ничё не найдено
if (allMatch === null) return { name: lessonName, subGroups: [] };
const all: Array<string> = [];
let allInnerMatch: RegExpExecArray;
while (
(allInnerMatch = teacherAndSubGroupRegex.exec(allMatch[0])) !== null
) {
if (allInnerMatch.index === teacherAndSubGroupRegex.lastIndex)
teacherAndSubGroupRegex.lastIndex++;
all.push(allInnerMatch[0].trim());
}
// парадокс
if (all.length === 0) {
throw new Error("Парадокс");
}
const subGroups: Array<V2LessonSubGroupDto> = [];
for (const teacherAndSubGroup of all) {
const teacherRegex = /[А-ЯЁ][а-яё]+\s[А-ЯЁ]\.\s?[А-ЯЁ]\./g;
const subGroupRegex = /\([0-9]\s?подгруппа\)/g;
const teacherMatch = teacherRegex.exec(teacherAndSubGroup);
if (teacherMatch === null) throw new Error("Парадокс");
let teacherFIO = teacherMatch[0];
const teacherSpaceIndex = teacherFIO.indexOf(" ") + 1;
const teacherIO = teacherFIO
.substring(teacherSpaceIndex)
.replaceAll("s", "");
teacherFIO = `${teacherFIO.substring(0, teacherSpaceIndex)}${teacherIO}`;
const subGroupMatch = subGroupRegex.exec(teacherAndSubGroup);
const subGroup = subGroupMatch
? Number.parseInt(subGroupMatch[0][1])
: 1;
subGroups.push(
plainToClass(V2LessonSubGroupDto, {
teacher: teacherFIO,
number: subGroup,
cabinet: "",
}),
);
}
for (const index in subGroups) {
if (subGroups.length === 1) {
break;
}
// бляздец
switch (index) {
case "0":
subGroups[index].number =
subGroups[+index + 1].number === 2 ? 1 : 2;
continue;
case "1":
subGroups[index].number =
subGroups[+index - 1].number === 1 ? 2 : 1;
continue;
default:
subGroups[index].number = +index;
}
}
return {
name: lessonName.substring(0, allMatch.index).trim(),
subGroups: subGroups,
};
}
/**
* Парсит информацию о группах и днях недели
* @param workSheet - xls лист
* @returns {{
* groupSkeletons: Array<InternalId>;
* daySkeletons: Array<InternalId>;
* }} - список с индексами и текстом записей групп и дней недели
* @private
* @static
*/
private static parseSkeleton(workSheet: XLSX.WorkSheet): {
groupSkeletons: Array<InternalId>;
daySkeletons: Array<InternalId>;
} {
const range = XLSX.utils.decode_range(workSheet["!ref"] || "");
let isHeaderParsed: boolean = false;
const groups: Array<InternalId> = [];
const days: Array<InternalId> = [];
for (let row = range.s.r + 1; row <= range.e.r; ++row) {
const dayName = V2ScheduleParser.getCellData(workSheet, row, 0);
if (!dayName) continue;
if (!isHeaderParsed) {
isHeaderParsed = true;
--row;
for (
let column = range.s.c + 2;
column <= range.e.c;
++column
) {
const groupName = V2ScheduleParser.getCellData(
workSheet,
row,
column,
);
if (!groupName) continue;
groups.push({ row: row, column: column, name: groupName });
}
++row;
}
if (
days.length == 0 ||
!days[days.length - 1].name.startsWith("Суббота")
) {
const dayMonthIdx = /[А-Яа-я]+\s(\d+)\.\d+\.\d+/.exec(
trimAll(dayName),
);
if (dayMonthIdx === null) continue;
}
days.push({
row: row,
column: 0,
name: dayName,
});
if (
days.length > 2 &&
days[days.length - 2].name.startsWith("Суббота")
)
break;
}
return { daySkeletons: days, groupSkeletons: groups };
}
/**
* Возвращает текущий класс для скачивания xls файлов
* @returns {XlsDownloaderInterface} - класс для скачивания xls файлов
*/
getXlsDownloader(): XlsDownloaderInterface {
return this.xlsDownloader;
}
/**
* Возвращает текущее расписание
* @returns {V2ScheduleParseResult} - расписание
* @async
*/
async getSchedule(): Promise<V2ScheduleParseResult> {
const headData = await this.xlsDownloader.fetch(true);
this.xlsDownloader.verifyFetchResult(headData);
assert(headData.type === "success");
const replacer = this.scheduleReplacerService
? await this.scheduleReplacerService.getByEtag(headData.etag)
: null;
if (this.lastResult && this.lastResult.etag === headData.etag) {
if (!replacer) return this.lastResult;
if (this.lastResult.replacerId === replacer.id)
return this.lastResult;
}
const buffer = async () => {
if (replacer) return replacer.data;
const downloadData = await this.xlsDownloader.fetch(false);
this.xlsDownloader.verifyFetchResult(downloadData);
assert(downloadData.type === "success");
return downloadData.data;
};
const workBook = XLSX.read(await buffer());
const workSheet = workBook.Sheets[workBook.SheetNames[0]];
const { groupSkeletons, daySkeletons } =
V2ScheduleParser.parseSkeleton(workSheet);
const groups: Array<V2GroupDto> = [];
const daysTimes: Array<Array<InternalTime>> = [];
let daysTimesFilled = false;
for (const groupSkeleton of groupSkeletons) {
const group = new V2GroupDto();
group.name = groupSkeleton.name;
group.days = [];
for (let dayIdx = 0; dayIdx < daySkeletons.length - 1; ++dayIdx) {
const daySkeleton = daySkeletons[dayIdx];
const day = new V2DayDto();
{
const daySpaceIndex = daySkeleton.name.indexOf(" ");
day.name = daySkeleton.name.substring(0, daySpaceIndex);
const dateString = daySkeleton.name.substring(
daySpaceIndex + 1,
);
const parseableDateString = `${dateString.substring(3, 5)}.${dateString.substring(0, 2)}.${dateString.substring(6)}`;
day.date = new Date(Date.parse(parseableDateString));
day.lessons = [];
}
const lessonTimeColumn = daySkeletons[0].column + 1;
const rowDistance =
daySkeletons[dayIdx + 1].row - daySkeleton.row;
const dayTimes: Array<InternalTime> = daysTimesFilled
? daysTimes[day.name]
: [];
if (!daysTimesFilled) {
for (
let row = daySkeleton.row;
row < daySkeleton.row + rowDistance;
++row
) {
const time = V2ScheduleParser.getCellData(
workSheet,
row,
lessonTimeColumn,
)?.replaceAll(/[\s\t\n\r]/g, "");
if (!time) continue;
// type
const lessonType = time.includes("пара")
? V2LessonType.DEFAULT
: V2LessonType.ADDITIONAL;
const defaultIndex =
lessonType === V2LessonType.DEFAULT
? +time[0]
: null;
// time
const timeRange = new V2LessonTimeDto();
timeRange.start = new Date(day.date);
timeRange.end = new Date(day.date);
const timeString = time.replaceAll(".", ":");
const timeRegex = /(\d+:\d+)-(\d+:\d+)/g;
const parseResult = timeRegex.exec(timeString);
if (!parseResult) {
throw new Error(
"Не удалось узнать начало и конец пар!",
);
}
const startStrings = parseResult[1].split(":");
timeRange.start.setHours(+startStrings[0]);
timeRange.start.setMinutes(+startStrings[1]);
const endStrings = parseResult[2].split(":");
timeRange.end.setHours(+endStrings[0]);
timeRange.end.setMinutes(+endStrings[1]);
dayTimes.push({
timeRange: timeRange,
lessonType: lessonType,
defaultIndex: defaultIndex,
xlsxRange: V2ScheduleParser.getMergeFromStart(
workSheet,
row,
lessonTimeColumn,
),
} as InternalTime);
}
daysTimes[day.name] = dayTimes;
}
for (const time of dayTimes) {
// if (day.name === "Четверг" && group.name === "ИС-214/23") {
// console.log("-------------------");
// console.log(groupSkeleton.column);
// console.log(time.xlsxRange);
// }
const lessons = V2ScheduleParser.parseLesson(
workSheet,
day,
dayTimes,
time,
groupSkeleton.column,
);
for (const lesson of lessons) day.lessons.push(lesson);
}
group.days.push(day);
}
if (!daysTimesFilled) daysTimesFilled = true;
groups[group.name] = group;
}
const updatedGroups = V2ScheduleParser.getUpdatedGroups(
this.lastResult?.groups,
groups,
);
return (this.lastResult = {
downloadedAt: headData.requestedAt,
uploadedAt: headData.uploadedAt,
etag: headData.etag,
replacerId: replacer?.id,
groups: groups,
updatedGroups:
updatedGroups.length === 0
? (this.lastResult?.updatedGroups ?? [])
: updatedGroups,
});
}
private static parseLesson(
workSheet: XLSX.Sheet,
day: V2DayDto,
dayTimes: Array<InternalTime>,
time: InternalTime,
column: number,
): Array<V2LessonDto> {
const row = time.xlsxRange.s.r;
if (typeof column !== "number") {
console.log(typeof column);
console.log(column);
}
// name
const rawName = trimAll(
V2ScheduleParser.getCellData(workSheet, row, column)?.replaceAll(
/[\n\r]/g,
"",
) ?? "",
);
if (rawName.length === 0) return [];
const lesson = new V2LessonDto();
lesson.type = time.lessonType;
lesson.defaultRange =
time.defaultIndex !== null
? [time.defaultIndex, time.defaultIndex]
: null;
lesson.time = new V2LessonTimeDto();
lesson.time.start = time.timeRange.start;
// check if multi-lesson
const range = this.getMergeFromStart(workSheet, row, column);
const endTime = dayTimes.filter((dayTime) => {
return dayTime.xlsxRange.e.r === range.e.r;
})[0];
lesson.time.end = endTime?.timeRange.end ?? time.timeRange.end;
if (lesson.defaultRange !== null)
lesson.defaultRange[1] = endTime?.defaultIndex ?? time.defaultIndex;
// name and subGroups (subGroups unfilled)
{
const nameAndGroups = V2ScheduleParser.parseNameAndSubGroups(
trimAll(rawName?.replaceAll(/[\n\r]/g, "") ?? ""),
);
lesson.name = nameAndGroups.name;
lesson.subGroups = nameAndGroups.subGroups;
}
// cabinets
{
const cabinets = V2ScheduleParser.parseCabinets(
workSheet,
row,
column + 1,
);
if (cabinets.length === 1) {
for (const index in lesson.subGroups)
lesson.subGroups[index].cabinet = cabinets[0];
} else if (cabinets.length === lesson.subGroups.length) {
for (const index in lesson.subGroups)
lesson.subGroups[index].cabinet = cabinets[index];
} else if (cabinets.length !== 0) {
if (cabinets.length > lesson.subGroups.length) {
for (const index in cabinets) {
if (lesson.subGroups[index] === undefined) {
lesson.subGroups.push(
plainToInstance(V2LessonSubGroupDto, {
number: +index + 1,
teacher: "Ошибка в расписании",
cabinet: cabinets[index],
} as V2LessonSubGroupDto),
);
continue;
}
lesson.subGroups[index].cabinet = cabinets[index];
}
} else throw new Error("Разное кол-во кабинетов и подгрупп!");
}
}
const prevLesson =
(day.lessons?.length ?? 0) === 0
? null
: day.lessons[day.lessons.length - 1];
if (!prevLesson) return [lesson];
return [
plainToInstance(V2LessonDto, {
type: V2LessonType.BREAK,
defaultRange: null,
name: null,
time: plainToInstance(V2LessonTimeDto, {
start: prevLesson.time.end,
end: lesson.time.start,
} as V2LessonTimeDto),
subGroups: [],
} as V2LessonDto),
lesson,
];
}
private static parseCabinets(
workSheet: WorkSheet,
row: number,
column: number,
) {
const cabinets: Array<string> = [];
{
const rawCabinets = V2ScheduleParser.getCellData(
workSheet,
row,
column,
);
if (rawCabinets) {
const parts = rawCabinets.split(/(\n|\s)/g);
for (const cabinet of parts) {
if (!toNormalString(cabinet)) continue;
cabinets.push(cabinet.replaceAll(/[\n\s\r]/g, " "));
}
}
}
return cabinets;
}
private static getUpdatedGroups(
cachedGroups: Array<V2GroupDto> | null,
currentGroups: Array<V2GroupDto>,
): Array<Array<number>> {
if (!cachedGroups) return [];
const updatedGroups = [];
for (const groupName in cachedGroups) {
const cachedGroup = cachedGroups[groupName];
const currentGroup = currentGroups[groupName];
const affectedGroupDays: Array<number> = [];
for (const dayIdx in currentGroup.days) {
if (
objectHash.sha1(currentGroup.days[dayIdx]) !==
objectHash.sha1(cachedGroup.days[dayIdx])
)
affectedGroupDays.push(Number.parseInt(dayIdx));
}
updatedGroups[groupName] = affectedGroupDays;
}
return updatedGroups;
}
}