feat(parser): add lesson types "course project" and "course project defense"

feat(parser): speed improvement, lesson type guessing and parsing of merged lesson cabinets
refactor(parser): improve readability
2025-12-06 17:57:47 +03:00 · 2025-05-27 02:06:13 +04:00 · 2025-05-27 02:03:54 +04:00 · 2025-05-26 21:12:23 +04:00
5 changed files with 323 additions and 261 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1258,15 +1258,6 @@ dependencies = [
 "slab",
 ]

-[[package]]
-name = "fuzzy-matcher"
-version = "0.3.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "54614a3312934d066701a80f20f15fa3b56d67ac7722b39eea5b4c9dd1d66c94"
-dependencies = [
- "thread_local",
-]
-
 [[package]]
 name = "gcc"
 version = "0.3.55"
@@ -2881,10 +2872,11 @@ dependencies = [
 "chrono",
 "criterion",
 "derive_more",
- "fuzzy-matcher",
 "regex",
+ "sentry",
 "serde",
 "serde_repr",
+ "strsim",
 "utoipa",
 ]

@@ -3383,16 +3375,6 @@ dependencies = [
 "syn 2.0.100",
 ]

-[[package]]
-name = "thread_local"
-version = "1.1.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c"
-dependencies = [
- "cfg-if",
- "once_cell",
-]
-
 [[package]]
 name = "time"
 version = "0.1.45"
--- a/schedule-parser/Cargo.toml
+++ b/schedule-parser/Cargo.toml
@@ -10,11 +10,12 @@ test-utils = []
 calamine = "0.26"
 chrono = { version = "0.4", features = ["serde"] }
 derive_more = { version = "2", features = ["full"] }
+sentry = "0.38"
 serde = { version = "1.0.219", features = ["derive"] }
 serde_repr = "0.1.20"
-fuzzy-matcher = "0.3.7"
 regex = "1.11.1"
 utoipa = { version = "5", features = ["chrono"] }
+strsim = "0.11.1"

 [dev-dependencies]
 criterion = "0.6"
--- a/schedule-parser/src/lib.rs
+++ b/schedule-parser/src/lib.rs
@@ -1,112 +1,21 @@
 use crate::LessonParseResult::{Lessons, Street};
 use crate::schema::LessonType::Break;
+use crate::schema::internal::{BoundariesCellInfo, DayCellInfo, GroupCellInfo};
 use crate::schema::{
    Day, ErrorCell, ErrorCellPos, Lesson, LessonBoundaries, LessonSubGroup, LessonType, ParseError,
    ParseResult, ScheduleEntry,
 };
+use crate::worksheet::WorkSheet;
 use calamine::{Reader, Xls, open_workbook_from_rs};
-use chrono::{DateTime, Duration, NaiveDateTime, Utc};
-use fuzzy_matcher::FuzzyMatcher;
-use fuzzy_matcher::skim::SkimMatcherV2;
+use chrono::{DateTime, Duration, NaiveDate, NaiveTime, Utc};
 use regex::Regex;
 use std::collections::HashMap;
 use std::io::Cursor;
-use std::ops::Deref;
 use std::sync::LazyLock;

 mod macros;
 pub mod schema;
-
-/// Data cell storing the group name.
-struct GroupCellInfo {
-    /// Column index.
-    column: u32,
-
-    /// Text in the cell.
-    name: String,
-}
-
-/// Data cell storing the line.
-struct DayCellInfo {
-    /// Line index.
-    row: u32,
-
-    /// Column index.
-    column: u32,
-
-    /// Day name.
-    name: String,
-
-    /// Date of the day.
-    date: DateTime<Utc>,
-}
-
-/// Data on the time of lessons from the second column of the schedule.
-struct BoundariesCellInfo {
-    /// Temporary segment of the lesson.
-    time_range: LessonBoundaries,
-
-    /// Type of lesson.
-    lesson_type: LessonType,
-
-    /// The lesson index.
-    default_index: Option<u32>,
-
-    /// The frame of the cell.
-    xls_range: ((u32, u32), (u32, u32)),
-}
-
-struct WorkSheet {
-    pub data: calamine::Range<calamine::Data>,
-    pub merges: Vec<calamine::Dimensions>,
-}
-
-impl Deref for WorkSheet {
-    type Target = calamine::Range<calamine::Data>;
-
-    fn deref(&self) -> &Self::Target {
-        &self.data
-    }
-}
-
-/// Getting a line from the required cell.
-fn get_string_from_cell(worksheet: &WorkSheet, row: u32, col: u32) -> Option<String> {
-    let cell_data = if let Some(data) = worksheet.get((row as usize, col as usize)) {
-        data.to_string()
-    } else {
-        return None;
-    };
-
-    if cell_data.trim().is_empty() {
-        return None;
-    }
-
-    static NL_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[\n\r]+").unwrap());
-    static SP_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\s+").unwrap());
-
-    let trimmed_data = SP_RE
-        .replace_all(&NL_RE.replace_all(&cell_data, " "), " ")
-        .trim()
-        .to_string();
-
-    if trimmed_data.is_empty() {
-        None
-    } else {
-        Some(trimmed_data)
-    }
-}
-
-/// Obtaining the boundaries of the cell along its upper left coordinate.
-fn get_merge_from_start(worksheet: &WorkSheet, row: u32, column: u32) -> ((u32, u32), (u32, u32)) {
-    return match worksheet
-        .merges
-        .iter()
-        .find(|merge| merge.start.0 == row && merge.start.1 == column)
-    {
-        Some(merge) => (merge.start, (merge.end.0 + 1, merge.end.1 + 1)),
-        None => ((row, column), (row + 1, column + 1))
-    };
-}
+mod worksheet;

 /// Obtaining a "skeleton" schedule from the working sheet.
 fn parse_skeleton(
@@ -123,7 +32,7 @@ fn parse_skeleton(
    while row < worksheet_end.0 {
        row += 1;

-        let day_full_name = or_continue!(get_string_from_cell(&worksheet, row, 0));
+        let day_full_name = or_continue!(worksheet.get_string_from_cell(row, 0));

        // parse groups row when days column will found
        if groups.is_empty() {
@@ -133,7 +42,7 @@ fn parse_skeleton(
            for column in (worksheet_start.1 + 2)..=worksheet_end.1 {
                groups.push(GroupCellInfo {
                    column,
-                    name: or_continue!(get_string_from_cell(&worksheet, row, column)),
+                    name: or_continue!(worksheet.get_string_from_cell(row, column)),
                });
            }

@@ -146,13 +55,12 @@ fn parse_skeleton(

            let name = day_full_name[..space_index].to_string();

-            let date_raw = day_full_name[space_index + 1..].to_string();
-            let date_add = format!("{} 00:00:00", date_raw);
+            let date_slice = &day_full_name[space_index + 1..];
+            let date = or_break!(NaiveDate::parse_from_str(date_slice, "%d.%m.%Y").ok())
+                .and_time(NaiveTime::default())
+                .and_utc();

-            let date =
-                or_break!(NaiveDateTime::parse_from_str(&*date_add, "%d.%m.%Y %H:%M:%S").ok());
-
-            (name, date.and_utc())
+            (name, date)
        };

        days.push(DayCellInfo {
@@ -178,103 +86,75 @@ enum LessonParseResult {
    Street(String),
 }

-trait StringInnerSlice {
-    /// Obtaining a line from the line on the initial and final index.
-    fn inner_slice(&self, from: usize, to: usize) -> Self;
-}
-
-impl StringInnerSlice for String {
-    fn inner_slice(&self, from: usize, to: usize) -> Self {
-        self.chars()
-            .take(from)
-            .chain(self.chars().skip(to))
-            .collect()
-    }
-}
-
 // noinspection GrazieInspection
 /// Obtaining a non-standard type of lesson by name.
-fn guess_lesson_type(name: &String) -> Option<(String, LessonType)> {
-    let map: HashMap<String, LessonType> = HashMap::from([
-        ("(консультация)".to_string(), LessonType::Consultation),
-        (
-            "самостоятельная работа".to_string(),
-            LessonType::IndependentWork,
-        ),
-        ("зачет".to_string(), LessonType::Exam),
-        ("зачет с оценкой".to_string(), LessonType::ExamWithGrade),
-        ("экзамен".to_string(), LessonType::ExamDefault),
-    ]);
+fn guess_lesson_type(text: &String) -> Option<LessonType> {
+    static MAP: LazyLock<HashMap<&str, LessonType>> = LazyLock::new(|| {
+        HashMap::from([
+            ("консультация", LessonType::Consultation),
+            ("самостоятельная работа", LessonType::IndependentWork),
+            ("зачет", LessonType::Exam),
+            ("зачет с оценкой", LessonType::ExamWithGrade),
+            ("экзамен", LessonType::ExamDefault),
+            ("курсовой проект", LessonType::CourseProject),
+            ("защита курсового проекта", LessonType::CourseProjectDefense),
+        ])
+    });

-    let matcher = SkimMatcherV2::default();
-    let name_lower = name.to_lowercase();
+    let name_lower = text.to_lowercase();

-    type SearchResult<'a> = (&'a LessonType, i64, Vec<usize>);
-
-    let mut search_results: Vec<SearchResult> = map
+    match MAP
        .iter()
-        .map(|entry| -> SearchResult {
-            if let Some((score, indices)) = matcher.fuzzy_indices(&*name_lower, entry.0) {
-                return (entry.1, score, indices);
-            }
-
-            (entry.1, 0, Vec::new())
-        })
-        .collect();
-    search_results.sort_by(|a, b| b.1.cmp(&a.1));
-
-    let guessed_type = search_results.first().unwrap();
-
-    if guessed_type.1 > 80 {
-        Some((
-            name.inner_slice(guessed_type.2[0], guessed_type.2[guessed_type.2.len() - 1]),
-            guessed_type.0.clone(),
-        ))
-    } else {
-        None
+        .map(|(text, lesson_type)| (lesson_type, strsim::levenshtein(text, &*name_lower)))
+        .filter(|x| x.1 <= 4)
+        .min_by_key(|(_, score)| *score)
+    {
+        None => None,
+        Some(v) => Some(v.0.clone()),
    }
 }

 /// Getting a pair or street from a cell.
 fn parse_lesson(
    worksheet: &WorkSheet,
-    day: &mut Day,
+    day: &Day,
    day_boundaries: &Vec<BoundariesCellInfo>,
    lesson_boundaries: &BoundariesCellInfo,
-    column: u32,
+    group_column: u32,
 ) -> Result<LessonParseResult, ParseError> {
    let row = lesson_boundaries.xls_range.0.0;

-    let (name, lesson_type) = {
-        let full_name = match get_string_from_cell(&worksheet, row, column) {
+    let name = {
+        let cell_data = match worksheet.get_string_from_cell(row, group_column) {
            Some(x) => x,
            None => return Ok(Lessons(Vec::new())),
        };

        static OTHER_STREET_RE: LazyLock<Regex> =
-            LazyLock::new(|| Regex::new(r"^[А-Я][а-я]+,?\s?[0-9]+$").unwrap());
+            LazyLock::new(|| Regex::new(r"^[А-Я][а-я]+[,\s]\d+$").unwrap());

-        if OTHER_STREET_RE.is_match(&full_name) {
-            return Ok(Street(full_name));
+        if OTHER_STREET_RE.is_match(&cell_data) {
+            return Ok(Street(cell_data));
        }

-        match guess_lesson_type(&full_name) {
-            Some(x) => x,
-            None => (full_name, lesson_boundaries.lesson_type.clone()),
-        }
+        cell_data
    };

+    let cell_range = worksheet.get_merge_from_start(row, group_column);
+
    let (default_range, lesson_time) = {
-        let cell_range = get_merge_from_start(worksheet, row, column);
-        
        let end_time_arr = day_boundaries
            .iter()
            .filter(|time| time.xls_range.1.0 == cell_range.1.0)
            .collect::<Vec<&BoundariesCellInfo>>();

-        let end_time = end_time_arr
-            .first()
-            .ok_or(ParseError::LessonTimeNotFound(ErrorCellPos { row, column }))?;
+        let end_time =
+            end_time_arr
+                .first()
+                .ok_or(ParseError::LessonTimeNotFound(ErrorCellPos {
+                    row,
+                    column: group_column,
+                }))?;

        let range: Option<[u8; 2]> = if lesson_boundaries.default_index != None {
            let default = lesson_boundaries.default_index.unwrap() as u8;
@@ -291,10 +171,14 @@ fn parse_lesson(
        Ok((range, time))
    }?;

-    let (name, mut subgroups) = parse_name_and_subgroups(&name)?;
+    let (name, mut subgroups, lesson_type) = parse_name_and_subgroups(&name)?;

    {
-        let cabinets: Vec<String> = parse_cabinets(worksheet, row, column + 1);
+        let cabinets: Vec<String> = parse_cabinets(
+            worksheet,
+            (cell_range.0.0, cell_range.1.0),
+            group_column + 1,
+        );

        match cabinets.len() {
            // Если кабинетов нет, но есть подгруппы, назначаем им кабинет "??"
@@ -340,7 +224,7 @@ fn parse_lesson(
    };

    let lesson = Lesson {
-        lesson_type,
+        lesson_type: lesson_type.unwrap_or(lesson_boundaries.lesson_type.clone()),
        default_range,
        name: Some(name),
        time: lesson_time,
@@ -371,10 +255,12 @@ fn parse_lesson(
 }

 /// Obtaining a list of cabinets to the right of the lesson cell.
-fn parse_cabinets(worksheet: &WorkSheet, row: u32, column: u32) -> Vec<String> {
+fn parse_cabinets(worksheet: &WorkSheet, row_range: (u32, u32), column: u32) -> Vec<String> {
    let mut cabinets: Vec<String> = Vec::new();

-    if let Some(raw) = get_string_from_cell(&worksheet, row, column) {
+    for row in row_range.0..row_range.1 {
+        let raw = or_continue!(worksheet.get_string_from_cell(row, column));
+
        let clean = raw.replace("\n", " ");
        let parts: Vec<&str> = clean.split(" ").collect();

@@ -383,59 +269,117 @@ fn parse_cabinets(worksheet: &WorkSheet, row: u32, column: u32) -> Vec<String> {

            cabinets.push(clean_part);
        }
+
+        break;
    }

    cabinets
 }

+//noinspection GrazieInspection
 /// Getting the "pure" name of the lesson and list of teachers from the text of the lesson cell.
-fn parse_name_and_subgroups(name: &String) -> Result<(String, Vec<LessonSubGroup>), ParseError> {
-    static LESSON_RE: LazyLock<Regex> =
-        LazyLock::new(|| Regex::new(r"(?:[А-Я][а-я]+[А-Я]{2}(?:\([0-9][а-я]+\))?)+$").unwrap());
-    static TEACHER_RE: LazyLock<Regex> =
-        LazyLock::new(|| Regex::new(r"([А-Я][а-я]+)([А-Я])([А-Я])(?:\(([0-9])[а-я]+\))?").unwrap());
-    static CLEAN_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[\s.,]+").unwrap());
-    static END_CLEAN_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[.\s]+$").unwrap());
+fn parse_name_and_subgroups(
+    text: &String,
+) -> Result<(String, Vec<LessonSubGroup>, Option<LessonType>), ParseError> {
+    // Части названия пары:
+    // 1. Само название.
+    // 2. Список преподавателей и подгрупп.
+    // 3. "Модификатор" (чаще всего).
+    //
+    // Регулярное выражение для получения ФИО преподавателей и номеров подгрупп (aka. второй части).
+    // (?:[А-Я][а-я]+\s?(?:[А-Я][\s.]*){2}(?:\(\d\s?[а-я]+\))?(?:, )?)+[\s.]*
+    //
+    // Подробнее:
+    // (?:
+    //     [А-Я][а-я]+         - Фамилия.
+    //     \s?                 - Кто знает, будет ли там пробел.
+    //     (?:[А-Я][\s.]*){2}  - Имя и отчество с учётом случайных пробелов и точек.
+    //     (?:
+    //         \(              - Открытие подгруппы.
+    //         \s?             - Кто знает, будет ли там пробел.
+    //         \d              - Номер подгруппы.
+    //         \s?             - Кто знает, будет ли там пробел.
+    //         [а-я\s]+        - Слово "подгруппа" с учётов ошибок.
+    //         \)              - Закрытие подгруппы.
+    //     )?                  - Явное указание подгруппы может отсутствовать по понятным причинам.
+    //     (?:, )?             - Разделители между отдельными частями.
+    // )+
+    // [\s.]*                  - Забираем с собой всякий мусор, что бы не передать его в третью часть.

-    let (teachers, lesson_name) = {
-        let clean_name = CLEAN_RE.replace_all(&name, "").to_string();
+    static NAMES_REGEX: LazyLock<Regex> = LazyLock::new(|| {
+        Regex::new(
+            r"(?:[А-Я][а-я]+\s?(?:[А-Я][\s.]*){2}(?:\(\s*\d\s*[а-я\s]+\))?(?:[\s,]+)?)+[\s.]*",
+        )
+        .unwrap()
+    });

-        if let Some(captures) = LESSON_RE.captures(&clean_name) {
+    // Отчистка
+    static CLEAN_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[\s\n\t]+").unwrap());
+
+    let text = CLEAN_RE
+        .replace(&text.replace(&[' ', '\t', '\n'], " "), " ")
+        .to_string();
+
+    let (lesson_name, mut subgroups, lesson_type) = match NAMES_REGEX.captures(&text) {
+        Some(captures) => {
            let capture = captures.get(0).unwrap();
-            let capture_str = capture.as_str().to_string();
-            let capture_name: String = capture_str.chars().take(5).collect();

-            (
-                END_CLEAN_RE.replace(&capture_str, "").to_string(),
-                END_CLEAN_RE
-                    .replace(&name[0..name.find(&*capture_name).unwrap()], "")
-                    .to_string(),
-            )
-        } else {
-            return Ok((END_CLEAN_RE.replace(&name, "").to_string(), Vec::new()));
+            let subgroups: Vec<LessonSubGroup> = {
+                let src = capture.as_str().replace(&[' ', '.'], "");
+
+                src.split(',')
+                    .map(|name| {
+                        let open_bracket_index = name.find('(');
+
+                        let subgroup_number = open_bracket_index.map_or(0, |index| {
+                            name[(index + 1)..(index + 2)].parse::<u8>().unwrap()
+                        });
+
+                        let teacher_name = {
+                            let name_end = open_bracket_index.unwrap_or_else(|| name.len());
+
+                            // Я ебал. Как же я долго до этого доходил.
+                            format!(
+                                "{} {}.{}.",
+                                name.get(..name_end - 4).unwrap(),
+                                name.get(name_end - 4..name_end - 2).unwrap(),
+                                name.get(name_end - 2..name_end).unwrap(),
+                            )
+                        };
+
+                        LessonSubGroup {
+                            number: subgroup_number,
+                            cabinet: None,
+                            teacher: teacher_name,
+                        }
+                    })
+                    .collect()
+            };
+
+            let name = text[..capture.start()].trim().to_string();
+            let extra = text[capture.end()..].trim().to_string();
+
+            let lesson_type = if extra.len() > 4 {
+                let result = guess_lesson_type(&extra);
+
+                #[cfg(not(debug_assertions))]
+                if result.is_none() {
+                    sentry::capture_message(
+                        &*format!("Не удалось угадать тип пары '{}'!", extra),
+                        sentry::Level::Warning,
+                    );
+                }
+
+                result
+            } else {
+                None
+            };
+
+            (name, subgroups, lesson_type)
        }
+        None => (text, Vec::new(), None),
    };

-    let mut subgroups: Vec<LessonSubGroup> = Vec::new();
-
-    let teacher_it = TEACHER_RE.captures_iter(&teachers);
-
-    for captures in teacher_it {
-        subgroups.push(LessonSubGroup {
-            number: match captures.get(4) {
-                Some(capture) => capture.as_str().to_string().parse::<u8>().unwrap(),
-                None => 0,
-            },
-            cabinet: None,
-            teacher: format!(
-                "{} {}.{}.",
-                captures.get(1).unwrap().as_str().to_string(),
-                captures.get(2).unwrap().as_str().to_string(),
-                captures.get(3).unwrap().as_str().to_string()
-            ),
-        });
-    }
-
    // фикс, если у кого-то отсутствует индекс подгруппы

    if subgroups.len() == 1 {
@@ -470,9 +414,15 @@ fn parse_name_and_subgroups(name: &String) -> Result<(String, Vec<LessonSubGroup
        subgroups.reverse()
    }

-    Ok((lesson_name, subgroups))
+    Ok((lesson_name, subgroups, lesson_type))
 }

+/// Getting the start and end of a pair from a cell in the first column of a document.
+///
+/// # Arguments
+///
+/// * `cell_data`: text in cell.
+/// * `date`: date of the current day.
 fn parse_lesson_boundaries_cell(
    cell_data: &String,
    date: DateTime<Utc>,
@@ -503,28 +453,31 @@ fn parse_lesson_boundaries_cell(
    })
 }

-fn parse_day_boundaries_column(
+/// Parse the column of the document to obtain a list of day's lesson boundaries.
+///
+/// # Arguments
+///
+/// * `worksheet`: document.
+/// * `date`: date of the current day.
+/// * `row_range`: row boundaries of the current day.
+/// * `column`: column with the required data.
+fn parse_day_boundaries(
    worksheet: &WorkSheet,
-    day_markup: &DayCellInfo,
-    lesson_time_column: u32,
-    row_distance: u32,
+    date: DateTime<Utc>,
+    row_range: (u32, u32),
+    column: u32,
 ) -> Result<Vec<BoundariesCellInfo>, ParseError> {
    let mut day_times: Vec<BoundariesCellInfo> = Vec::new();

-    for row in day_markup.row..(day_markup.row + row_distance) {
-        let time_cell = if let Some(str) = get_string_from_cell(&worksheet, row, lesson_time_column)
-        {
+    for row in row_range.0..row_range.1 {
+        let time_cell = if let Some(str) = worksheet.get_string_from_cell(row, column) {
            str
        } else {
            continue;
        };

-        let lesson_time = parse_lesson_boundaries_cell(&time_cell, day_markup.date.clone()).ok_or(
-            ParseError::LessonBoundaries(ErrorCell::new(
-                row,
-                lesson_time_column,
-                time_cell.clone(),
-            )),
+        let lesson_time = parse_lesson_boundaries_cell(&time_cell, date.clone()).ok_or(
+            ParseError::LessonBoundaries(ErrorCell::new(row, column, time_cell.clone())),
        )?;

        // type
@@ -553,14 +506,20 @@ fn parse_day_boundaries_column(
            time_range: lesson_time,
            lesson_type,
            default_index,
-            xls_range: get_merge_from_start(&worksheet, row, lesson_time_column),
+            xls_range: worksheet.get_merge_from_start(row, column),
        });
    }

-    return Ok(day_times);
+    Ok(day_times)
 }

-fn parse_week_boundaries_column(
+/// Parse the column of the document to obtain a list of week's lesson boundaries.
+///
+/// # Arguments
+///
+/// * `worksheet`: document.
+/// * `week_markup`: markup of the current week.
+fn parse_week_boundaries(
    worksheet: &WorkSheet,
    week_markup: &Vec<DayCellInfo>,
 ) -> Result<Vec<Vec<BoundariesCellInfo>>, ParseError> {
@@ -572,16 +531,20 @@ fn parse_week_boundaries_column(
    for day_index in 0..week_markup.len() {
        let day_markup = &week_markup[day_index];

-        // Если текущий день не последнему, то индекс строки следующего дня минус индекс строки текущего дня.
-        // Если текущий день - последний, то индекс последней строки документа минус индекс строки текущего дня.
-        let row_distance = if day_index != week_markup.len() - 1 {
+        // Если текущий день не последнему, то индекс строки следующего дня.
+        // Если текущий день - последний, то индекс последней строки документа.
+        let end_row = if day_index != week_markup.len() - 1 {
            week_markup[day_index + 1].row
        } else {
            worksheet_end_row
-        } - day_markup.row;
+        };

-        let day_boundaries =
-            parse_day_boundaries_column(&worksheet, day_markup, lesson_time_column, row_distance)?;
+        let day_boundaries = parse_day_boundaries(
+            &worksheet,
+            day_markup.date.clone(),
+            (day_markup.row, end_row),
+            lesson_time_column,
+        )?;

        result.push(day_boundaries);
    }
@@ -709,7 +672,7 @@ pub fn parse_xls(buffer: &Vec<u8>) -> Result<ParseResult, ParseError> {
    };

    let (week_markup, groups_markup) = parse_skeleton(&worksheet)?;
-    let week_boundaries = parse_week_boundaries_column(&worksheet, &week_markup)?;
+    let week_boundaries = parse_week_boundaries(&worksheet, &week_markup)?;

    let mut groups: HashMap<String, ScheduleEntry> = HashMap::new();

@@ -734,7 +697,7 @@ pub fn parse_xls(buffer: &Vec<u8>) -> Result<ParseResult, ParseError> {
            for lesson_boundaries in day_boundaries {
                match &mut parse_lesson(
                    &worksheet,
-                    &mut day,
+                    &day,
                    &day_boundaries,
                    &lesson_boundaries,
                    group_markup.column,
@@ -786,9 +749,17 @@ pub mod tests {
        assert!(result.groups.contains_key("ИС-214/23"));

        let group = result.groups.get("ИС-214/23").unwrap();
-        let thursday = group.days.get(3).unwrap();

+        let thursday = group.days.get(3).unwrap();
        assert_eq!(thursday.lessons.len(), 1);
-        assert_eq!(thursday.lessons[0].default_range.unwrap()[1], 3);
+
+        let lesson = &thursday.lessons[0];
+        assert_eq!(lesson.default_range.unwrap()[1], 3);
+        assert!(lesson.subgroups.is_some());
+
+        let subgroups = lesson.subgroups.as_ref().unwrap();
+        assert_eq!(subgroups.len(), 2);
+        assert_eq!(subgroups[0].cabinet, Some("44".to_string()));
+        assert_eq!(subgroups[1].cabinet, Some("43".to_string()));
    }
 }
--- a/schedule-parser/src/schema.rs
+++ b/schedule-parser/src/schema.rs
@@ -6,6 +6,50 @@ use std::collections::HashMap;
 use std::sync::Arc;
 use utoipa::ToSchema;

+pub(crate) mod internal {
+    use crate::schema::{LessonBoundaries, LessonType};
+    use chrono::{DateTime, Utc};
+
+    /// Data cell storing the group name.
+    pub struct GroupCellInfo {
+        /// Column index.
+        pub column: u32,
+
+        /// Text in the cell.
+        pub name: String,
+    }
+
+    /// Data cell storing the line.
+    pub struct DayCellInfo {
+        /// Line index.
+        pub row: u32,
+
+        /// Column index.
+        pub column: u32,
+
+        /// Day name.
+        pub name: String,
+
+        /// Date of the day.
+        pub date: DateTime<Utc>,
+    }
+
+    /// Data on the time of lessons from the second column of the schedule.
+    pub struct BoundariesCellInfo {
+        /// Temporary segment of the lesson.
+        pub time_range: LessonBoundaries,
+
+        /// Type of lesson.
+        pub lesson_type: LessonType,
+
+        /// The lesson index.
+        pub default_index: Option<u32>,
+
+        /// The frame of the cell.
+        pub xls_range: ((u32, u32), (u32, u32)),
+    }
+}
+
 /// The beginning and end of the lesson.
 #[derive(Clone, Hash, Debug, Serialize, Deserialize, ToSchema)]
 pub struct LessonBoundaries {
@@ -44,6 +88,12 @@ pub enum LessonType {

    /// Экзамен.
    ExamDefault,
+    
+    /// Курсовой проект.
+    CourseProject,
+    
+    /// Защита курсового проекта.
+    CourseProjectDefense,
 }

 #[derive(Clone, Hash, Debug, Serialize, Deserialize, ToSchema)]
--- a/schedule-parser/src/worksheet.rs
+++ b/schedule-parser/src/worksheet.rs
@@ -0,0 +1,58 @@
+use std::ops::Deref;
+use std::sync::LazyLock;
+use regex::Regex;
+
+/// XLS WorkSheet data.
+pub struct WorkSheet {
+    pub data: calamine::Range<calamine::Data>,
+    pub merges: Vec<calamine::Dimensions>,
+}
+
+impl Deref for WorkSheet {
+    type Target = calamine::Range<calamine::Data>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.data
+    }
+}
+
+impl WorkSheet {
+    /// Getting a line from the required cell.
+    pub fn get_string_from_cell(&self, row: u32, col: u32) -> Option<String> {
+        let cell_data = if let Some(data) = self.get((row as usize, col as usize)) {
+            data.to_string()
+        } else {
+            return None;
+        };
+
+        if cell_data.trim().is_empty() {
+            return None;
+        }
+
+        static NL_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[\n\r]+").unwrap());
+        static SP_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\s+").unwrap());
+
+        let trimmed_data = SP_RE
+            .replace_all(&NL_RE.replace_all(&cell_data, " "), " ")
+            .trim()
+            .to_string();
+
+        if trimmed_data.is_empty() {
+            None
+        } else {
+            Some(trimmed_data)
+        }
+    }
+
+    /// Obtaining the boundaries of the cell along its upper left coordinate.
+    pub fn get_merge_from_start(&self, row: u32, column: u32) -> ((u32, u32), (u32, u32)) {
+        match self
+            .merges
+            .iter()
+            .find(|merge| merge.start.0 == row && merge.start.1 == column)
+        {
+            Some(merge) => (merge.start, (merge.end.0 + 1, merge.end.1 + 1)),
+            None => ((row, column), (row + 1, column + 1)),
+        }
+    }
+}
Author	SHA1	Message	Date
n08i40k	d23092a32a	feat(parser): add lesson types "course project" and "course project defense"	2025-05-27 02:06:13 +04:00
n08i40k	01bfa38969	feat(parser): speed improvement, lesson type guessing and parsing of merged lesson cabinets	2025-05-27 02:03:54 +04:00
n08i40k	851ec9225f	refactor(parser): improve readability	2025-05-26 21:12:23 +04:00