From af53b0310f3e180b62bd62b709c32850be87b209 Mon Sep 17 00:00:00 2001 From: WorldTeacher Date: Mon, 1 Sep 2025 14:32:05 +0200 Subject: [PATCH] refactor: update import statement and enhance word_to_semap function with AI parameter --- src/logic/wordparser.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/logic/wordparser.py b/src/logic/wordparser.py index e3741c9..4cba286 100644 --- a/src/logic/wordparser.py +++ b/src/logic/wordparser.py @@ -9,7 +9,7 @@ from bs4 import BeautifulSoup from docx import Document from src import LOG_DIR -from src.backend import Semester +from src.backend.semester import Semester from src.logic.openai import name_tester, run_shortener, semester_converter log = loguru.logger @@ -18,7 +18,6 @@ log.add(sys.stdout, level="INFO") log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days") - letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" @@ -111,6 +110,7 @@ class SemapDocument: else: self.title_suggestions = [] pass + @property def renameSemester(self) -> None: if ", Dauer" in self.semester: @@ -141,8 +141,8 @@ def word_docx_to_csv(path: str) -> list[pd.DataFrame]: text = text.replace("\n", "") row_data.append(text) - if text == "Ihr Fach:": - row_data.append(get_fach(path)) + # if text == "Ihr Fach:": + # row_data.append(get_fach(path)) data.append(row_data) df = pd.DataFrame(data) df.columns = df.iloc[0] @@ -265,7 +265,7 @@ def elsa_word_to_csv(path: str): return tuple_to_dict(data, doctype), doctype -def word_to_semap(word_path: str) -> SemapDocument: +def word_to_semap(word_path: str, ai: bool = True) -> SemapDocument: log.info("Parsing Word Document {}", word_path) semap = SemapDocument() df = word_docx_to_csv(word_path) @@ -286,8 +286,9 @@ def word_to_semap(word_path: str) -> SemapDocument: appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys), 2)} semap.title = appdata["Veranstaltung:"] semap.semester = appdata["Semester:"] - semap.renameSemester - semap.nameSetter + if ai: + semap.renameSemester + semap.nameSetter books = df[2] booklist = [] @@ -309,7 +310,5 @@ def word_to_semap(word_path: str) -> SemapDocument: if __name__ == "__main__": - else_df = elsa_word_to_csv( - "C:/Users/aky547/Desktop/ELSA_Bestellung Scann Der Westen und der Rest.docx" - ) + else_df = word_to_semap("C:/Users/aky547/Desktop/semap/db/temp/tmpzsz_hgdr.docx") print(else_df)