import { XlsDownloaderInterface } from "../xls-downloader/xls-downloader.interface"; import * as XLSX from "xlsx"; import { Range, WorkSheet } from "xlsx"; import { toNormalString, trimAll } from "../../../utility/string.util"; import { plainToClass, plainToInstance } from "class-transformer"; import * as objectHash from "object-hash"; import { V2LessonTimeDto } from "../../dto/v2/v2-lesson-time.dto"; import { V2LessonType } from "../../enum/v2-lesson-type.enum"; import { V2LessonSubGroupDto } from "../../dto/v2/v2-lesson-sub-group.dto"; import { V2LessonDto } from "../../dto/v2/v2-lesson.dto"; import { V2DayDto } from "../../dto/v2/v2-day.dto"; import { V2GroupDto } from "../../dto/v2/v2-group.dto"; import * as assert from "node:assert"; import { ScheduleReplacerService } from "../../schedule-replacer.service"; type InternalId = { /** * Индекс строки */ row: number; /** * Индекс столбца */ column: number; /** * Текст записи */ name: string; }; type InternalTime = { /** * Временной отрезок */ timeRange: V2LessonTimeDto; /** * Тип пары на этой строке */ lessonType: V2LessonType; /** * Индекс пары на этой строке */ defaultIndex?: number; /** * Позиции начальной и конечной записи */ xlsxRange: Range; }; export class V2ScheduleParseResult { /** * ETag расписания */ etag: string; /** * Идентификатор заменённого расписания (ObjectId) */ replacerId?: string; /** * Дата загрузки расписания на сайт политехникума */ uploadedAt: Date; /** * Дата загрузки расписания с сайта политехникума */ downloadedAt: Date; /** * Расписание групп в виде списка. * Ключ - название группы. */ groups: Array; /** * Список групп у которых было обновлено расписание с момента последнего обновления файла. * Ключ - название группы. */ updatedGroups: Array>; } export class V2ScheduleParser { private lastResult: V2ScheduleParseResult | null = null; /** * @param xlsDownloader - класс для загрузки расписания с сайта политехникума * @param scheduleReplacerService - сервис для подмены расписания */ public constructor( private readonly xlsDownloader: XlsDownloaderInterface, private readonly scheduleReplacerService?: ScheduleReplacerService, ) {} /** * Получает позиции начальной и конечной записи относительно начальной записи * @param workSheet - xls лист * @param topRow - индекс начальной строки * @param leftColumn - индекс начального столбца * @returns {Range} - позиции начальной и конечной записи * @private * @static */ private static getMergeFromStart( workSheet: XLSX.WorkSheet, topRow: number, leftColumn: number, ): Range { for (const range of workSheet["!merges"]) { if (topRow === range.s.r && leftColumn === range.s.c) return range; } return { s: { r: topRow, c: leftColumn }, e: { r: topRow, c: leftColumn }, }; } /** * Получает текст из требуемой записи * @param worksheet - xls лист * @param row - индекс строки * @param column - индекс столбца * @returns {string | null} - текст записи, если присутствует * @private * @static */ private static getCellData( worksheet: XLSX.WorkSheet, row: number, column: number, ): string | null { const cell: XLSX.CellObject | null = worksheet[XLSX.utils.encode_cell({ r: row, c: column })]; return toNormalString(cell?.w); } /** * Парсит информацию о паре исходя из текста в записи * @param lessonName - текст в записи * @returns {{ * name: string; * subGroups: Array; * }} - название пары и список подгрупп * @private * @static */ private static parseNameAndSubGroups(lessonName: string): { name: string; subGroups: Array; } { // хд const allRegex = /(?:[А-ЯЁ][а-яё]+\s[А-ЯЁ]\.\s?[А-ЯЁ]\.(?:\s?\([0-9]\s?подгруппа\))?(?:,\s)?)+$/gm; const teacherAndSubGroupRegex = /(?:[А-ЯЁ][а-яё]+\s[А-ЯЁ]\.\s?[А-ЯЁ]\.(?:\s?\([0-9]\s?подгруппа\))?)+/gm; const allMatch = allRegex.exec(lessonName); // если не ничё не найдено if (allMatch === null) return { name: lessonName, subGroups: [] }; const all: Array = []; let allInnerMatch: RegExpExecArray; while ( (allInnerMatch = teacherAndSubGroupRegex.exec(allMatch[0])) !== null ) { if (allInnerMatch.index === teacherAndSubGroupRegex.lastIndex) teacherAndSubGroupRegex.lastIndex++; all.push(allInnerMatch[0].trim()); } // парадокс if (all.length === 0) { throw new Error("Парадокс"); } const subGroups: Array = []; for (const teacherAndSubGroup of all) { const teacherRegex = /[А-ЯЁ][а-яё]+\s[А-ЯЁ]\.\s?[А-ЯЁ]\./g; const subGroupRegex = /\([0-9]\s?подгруппа\)/g; const teacherMatch = teacherRegex.exec(teacherAndSubGroup); if (teacherMatch === null) throw new Error("Парадокс"); let teacherFIO = teacherMatch[0]; const teacherSpaceIndex = teacherFIO.indexOf(" ") + 1; const teacherIO = teacherFIO .substring(teacherSpaceIndex) .replaceAll("s", ""); teacherFIO = `${teacherFIO.substring(0, teacherSpaceIndex)}${teacherIO}`; const subGroupMatch = subGroupRegex.exec(teacherAndSubGroup); const subGroup = subGroupMatch ? Number.parseInt(subGroupMatch[0][1]) : 1; subGroups.push( plainToClass(V2LessonSubGroupDto, { teacher: teacherFIO, number: subGroup, cabinet: "", }), ); } for (const index in subGroups) { if (subGroups.length === 1) { break; } // бляздец switch (index) { case "0": subGroups[index].number = subGroups[+index + 1].number === 2 ? 1 : 2; continue; case "1": subGroups[index].number = subGroups[+index - 1].number === 1 ? 2 : 1; continue; default: subGroups[index].number = +index; } } return { name: lessonName.substring(0, allMatch.index).trim(), subGroups: subGroups, }; } /** * Парсит информацию о группах и днях недели * @param workSheet - xls лист * @returns {{ * groupSkeletons: Array; * daySkeletons: Array; * }} - список с индексами и текстом записей групп и дней недели * @private * @static */ private static parseSkeleton(workSheet: XLSX.WorkSheet): { groupSkeletons: Array; daySkeletons: Array; } { const range = XLSX.utils.decode_range(workSheet["!ref"] || ""); let isHeaderParsed: boolean = false; const groups: Array = []; const days: Array = []; for (let row = range.s.r + 1; row <= range.e.r; ++row) { const dayName = V2ScheduleParser.getCellData(workSheet, row, 0); if (!dayName) continue; if (!isHeaderParsed) { isHeaderParsed = true; --row; for ( let column = range.s.c + 2; column <= range.e.c; ++column ) { const groupName = V2ScheduleParser.getCellData( workSheet, row, column, ); if (!groupName) continue; groups.push({ row: row, column: column, name: groupName }); } ++row; } if ( days.length == 0 || !days[days.length - 1].name.startsWith("Суббота") ) { const dayMonthIdx = /[А-Яа-я]+\s(\d+)\.\d+\.\d+/.exec( trimAll(dayName), ); if (dayMonthIdx === null) continue; } days.push({ row: row, column: 0, name: dayName, }); if ( days.length > 2 && days[days.length - 2].name.startsWith("Суббота") ) break; } return { daySkeletons: days, groupSkeletons: groups }; } /** * Возвращает текущий класс для скачивания xls файлов * @returns {XlsDownloaderInterface} - класс для скачивания xls файлов */ getXlsDownloader(): XlsDownloaderInterface { return this.xlsDownloader; } /** * Возвращает текущее расписание * @returns {V2ScheduleParseResult} - расписание * @async */ async getSchedule(): Promise { const headData = await this.xlsDownloader.fetch(true); this.xlsDownloader.verifyFetchResult(headData); assert(headData.type === "success"); const replacer = this.scheduleReplacerService ? await this.scheduleReplacerService.getByEtag(headData.etag) : null; if (this.lastResult && this.lastResult.etag === headData.etag) { if (!replacer) return this.lastResult; if (this.lastResult.replacerId === replacer.id) return this.lastResult; } const buffer = async () => { if (replacer) return replacer.data; const downloadData = await this.xlsDownloader.fetch(false); this.xlsDownloader.verifyFetchResult(downloadData); assert(downloadData.type === "success"); return downloadData.data; }; const workBook = XLSX.read(await buffer()); const workSheet = workBook.Sheets[workBook.SheetNames[0]]; const { groupSkeletons, daySkeletons } = V2ScheduleParser.parseSkeleton(workSheet); const groups: Array = []; const daysTimes: Array> = []; let daysTimesFilled = false; for (const groupSkeleton of groupSkeletons) { const group = new V2GroupDto(); group.name = groupSkeleton.name; group.days = []; for (let dayIdx = 0; dayIdx < daySkeletons.length - 1; ++dayIdx) { const daySkeleton = daySkeletons[dayIdx]; const day = new V2DayDto(); { const daySpaceIndex = daySkeleton.name.indexOf(" "); day.name = daySkeleton.name.substring(0, daySpaceIndex); const dateString = daySkeleton.name.substring( daySpaceIndex + 1, ); const parseableDateString = `${dateString.substring(3, 5)}.${dateString.substring(0, 2)}.${dateString.substring(6)}`; day.date = new Date(Date.parse(parseableDateString)); day.lessons = []; } const lessonTimeColumn = daySkeletons[0].column + 1; const rowDistance = daySkeletons[dayIdx + 1].row - daySkeleton.row; const dayTimes: Array = daysTimesFilled ? daysTimes[day.name] : []; if (!daysTimesFilled) { for ( let row = daySkeleton.row; row < daySkeleton.row + rowDistance; ++row ) { const time = V2ScheduleParser.getCellData( workSheet, row, lessonTimeColumn, )?.replaceAll(/[\s\t\n\r]/g, ""); if (!time) continue; // type const lessonType = time.includes("пара") ? V2LessonType.DEFAULT : V2LessonType.ADDITIONAL; const defaultIndex = lessonType === V2LessonType.DEFAULT ? +time[0] : null; // time const timeRange = new V2LessonTimeDto(); timeRange.start = new Date(day.date); timeRange.end = new Date(day.date); const timeString = time.replaceAll(".", ":"); const timeRegex = /(\d+:\d+)-(\d+:\d+)/g; const parseResult = timeRegex.exec(timeString); if (!parseResult) { throw new Error( "Не удалось узнать начало и конец пар!", ); } const startStrings = parseResult[1].split(":"); timeRange.start.setHours(+startStrings[0]); timeRange.start.setMinutes(+startStrings[1]); const endStrings = parseResult[2].split(":"); timeRange.end.setHours(+endStrings[0]); timeRange.end.setMinutes(+endStrings[1]); dayTimes.push({ timeRange: timeRange, lessonType: lessonType, defaultIndex: defaultIndex, xlsxRange: V2ScheduleParser.getMergeFromStart( workSheet, row, lessonTimeColumn, ), } as InternalTime); } daysTimes[day.name] = dayTimes; } for (const time of dayTimes) { // if (day.name === "Четверг" && group.name === "ИС-214/23") { // console.log("-------------------"); // console.log(groupSkeleton.column); // console.log(time.xlsxRange); // } const lessons = V2ScheduleParser.parseLesson( workSheet, day, dayTimes, time, groupSkeleton.column, ); for (const lesson of lessons) day.lessons.push(lesson); } group.days.push(day); } if (!daysTimesFilled) daysTimesFilled = true; groups[group.name] = group; } const updatedGroups = V2ScheduleParser.getUpdatedGroups( this.lastResult?.groups, groups, ); return (this.lastResult = { downloadedAt: headData.requestedAt, uploadedAt: headData.uploadedAt, etag: headData.etag, replacerId: replacer?.id, groups: groups, updatedGroups: updatedGroups.length === 0 ? (this.lastResult?.updatedGroups ?? []) : updatedGroups, }); } private static parseLesson( workSheet: XLSX.Sheet, day: V2DayDto, dayTimes: Array, time: InternalTime, column: number, ): Array { const row = time.xlsxRange.s.r; if (typeof column !== "number") { console.log(typeof column); console.log(column); } // name const rawName = trimAll( V2ScheduleParser.getCellData(workSheet, row, column)?.replaceAll( /[\n\r]/g, "", ) ?? "", ); if (rawName.length === 0) return []; const lesson = new V2LessonDto(); lesson.type = time.lessonType; lesson.defaultRange = time.defaultIndex !== null ? [time.defaultIndex, time.defaultIndex] : null; lesson.time = new V2LessonTimeDto(); lesson.time.start = time.timeRange.start; // check if multi-lesson const range = this.getMergeFromStart(workSheet, row, column); const endTime = dayTimes.filter((dayTime) => { return dayTime.xlsxRange.e.r === range.e.r; })[0]; lesson.time.end = endTime?.timeRange.end ?? time.timeRange.end; if (lesson.defaultRange !== null) lesson.defaultRange[1] = endTime?.defaultIndex ?? time.defaultIndex; // name and subGroups (subGroups unfilled) { const nameAndGroups = V2ScheduleParser.parseNameAndSubGroups( trimAll(rawName?.replaceAll(/[\n\r]/g, "") ?? ""), ); lesson.name = nameAndGroups.name; lesson.subGroups = nameAndGroups.subGroups; } // cabinets { const cabinets = V2ScheduleParser.parseCabinets( workSheet, row, column + 1, ); if (cabinets.length === 1) { for (const index in lesson.subGroups) lesson.subGroups[index].cabinet = cabinets[0]; } else if (cabinets.length === lesson.subGroups.length) { for (const index in lesson.subGroups) lesson.subGroups[index].cabinet = cabinets[index]; } else if (cabinets.length !== 0) { if (cabinets.length > lesson.subGroups.length) { for (const index in cabinets) { if (lesson.subGroups[index] === undefined) { lesson.subGroups.push( plainToInstance(V2LessonSubGroupDto, { number: +index + 1, teacher: "Ошибка в расписании", cabinet: cabinets[index], } as V2LessonSubGroupDto), ); continue; } lesson.subGroups[index].cabinet = cabinets[index]; } } else throw new Error("Разное кол-во кабинетов и подгрупп!"); } } const prevLesson = (day.lessons?.length ?? 0) === 0 ? null : day.lessons[day.lessons.length - 1]; if (!prevLesson) return [lesson]; return [ plainToInstance(V2LessonDto, { type: V2LessonType.BREAK, defaultRange: null, name: null, time: plainToInstance(V2LessonTimeDto, { start: prevLesson.time.end, end: lesson.time.start, } as V2LessonTimeDto), subGroups: [], } as V2LessonDto), lesson, ]; } private static parseCabinets( workSheet: WorkSheet, row: number, column: number, ) { const cabinets: Array = []; { const rawCabinets = V2ScheduleParser.getCellData( workSheet, row, column, ); if (rawCabinets) { const parts = rawCabinets.split(/(\n|\s)/g); for (const cabinet of parts) { if (!toNormalString(cabinet)) continue; cabinets.push(cabinet.replaceAll(/[\n\s\r]/g, " ")); } } } return cabinets; } private static getUpdatedGroups( cachedGroups: Array | null, currentGroups: Array, ): Array> { if (!cachedGroups) return []; const updatedGroups = []; for (const groupName in cachedGroups) { const cachedGroup = cachedGroups[groupName]; const currentGroup = currentGroups[groupName]; const affectedGroupDays: Array = []; for (const dayIdx in currentGroup.days) { if ( objectHash.sha1(currentGroup.days[dayIdx]) !== objectHash.sha1(cachedGroup.days[dayIdx]) ) affectedGroupDays.push(Number.parseInt(dayIdx)); } updatedGroups[groupName] = affectedGroupDays; } return updatedGroups; } }