From 80b96865e710327c03992248debbf10c266e2bab Mon Sep 17 00:00:00 2001 From: WorldTeacher Date: Fri, 25 Apr 2025 12:16:14 +0200 Subject: [PATCH] add SemapDocument and Book dataclasses, improve word document parsing --- src/__init__.py | 2 + src/backend/database.py | 4 +- src/logic/wordparser.py | 132 ++++++++++++++++++++++++++++++++++++++-- src/ui/userInterface.py | 68 ++++++++++----------- 4 files changed, 164 insertions(+), 42 deletions(-) diff --git a/src/__init__.py b/src/__init__.py index b1a49c8..517e85c 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -5,6 +5,8 @@ from loguru import logger as log from datetime import datetime settings = Config("config/config.yaml") +if not os.path.exists(settings.database.temp): + os.mkdir(settings.database.temp) from .utils.icon import Icon __version__ = "0.2.1" diff --git a/src/backend/database.py b/src/backend/database.py index 16c13a9..c8c7ccd 100644 --- a/src/backend/database.py +++ b/src/backend/database.py @@ -181,7 +181,7 @@ class Database: # log_message = f"Querying database with query {query}" if "INTO user" in query: log_message = f"Querying database with query {query}" - logger.debug(f"DB Query: {log_message}") + # logger.debug(f"DB Query: {log_message}") try: cursor.execute(query, args) rv = cursor.fetchall() @@ -487,7 +487,7 @@ class Database: str: The filename of the recreated file """ blob = self.getBlob(filename, app_id) - tempdir = self.database.tempdir + tempdir = self.database.temp tempdir = tempdir.replace("~", str(Path.home())) tempdir_path = Path(tempdir) if not os.path.exists(tempdir_path): diff --git a/src/logic/wordparser.py b/src/logic/wordparser.py index d160ccf..a1f94c2 100644 --- a/src/logic/wordparser.py +++ b/src/logic/wordparser.py @@ -1,12 +1,93 @@ import pandas as pd from docx import Document +from dataclasses import dataclass letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" -def word_docx_to_csv(path) -> pd.DataFrame: +@dataclass +class Book: + author: str = None + year: str = None + edition: str = None + title: str = None + location: str = None + publisher: str = None + signature: str = None + internal_notes: str = None + + @property + def has_signature(self) -> bool: + return self.signature is not None and self.signature != "" + + @property + def is_empty(self) -> bool: + return all( + [ + self.author == "", + self.year == "", + self.edition == "", + self.title == "", + self.location == "", + self.publisher == "", + self.signature == "", + self.internal_notes == "", + ] + ) + + def from_dict(self, data: dict): + for key, value in data.items(): + if value == "\u2002\u2002\u2002\u2002\u2002": + value = "" + + if key == "Autorenname(n):Nachname, Vorname": + self.author = value + elif key == "Jahr/Auflage": + self.year = value.split("/")[0] if "/" in value else value + self.edition = value.split("/")[1] if "/" in value else "" + elif key == "Titel": + self.title = value + elif key == "Ort und Verlag": + self.location = value.split(",")[0] if "," in value else value + self.publisher = value.split(",")[1] if "," in value else "" + elif key == "Standnummer": + self.signature = value + elif key == "Interne Vermerke": + self.internal_notes = value + + +@dataclass +class SemapDocument: + subject: str = None + phoneNumber: int = None + mail: str = None + title: str = None + semester: str = None + books: list[Book] = None + + @property + def renameSemester(self) -> None: + if self.semester is not None: + if "sommersemester" in self.semester.lower(): + year = self.semester.split(" ")[-1] + self.semester = f"SoSe {year}" + elif "wintersemester" in self.semester.lower(): + year = self.semester.split(" ")[-1] + self.semester = f"WiSe {year}" + + @property + def signatures(self) -> list[str]: + if self.books is not None: + return [book.signature for book in self.books if book.has_signature] + return [] + + +def word_docx_to_csv(path: str) -> list[pd.DataFrame]: doc = Document(path) tables = doc.tables + print("Tables: ", len(tables)) + + # print content of all tables m_data = [] for table in tables: @@ -24,8 +105,9 @@ def word_docx_to_csv(path) -> pd.DataFrame: m_data.append(df) - df = m_data[2] - return df + # for df[0, 1]: merge i and i+1 as key, value + + return m_data def makeDict(): @@ -122,6 +204,46 @@ def elsa_word_to_csv(path): return tuple_to_dict(data, doctype), doctype +def word_to_semap(word_path: str) -> SemapDocument: + semap = SemapDocument() + df = word_docx_to_csv(word_path) + apparatdata = df[0] + apparatdata = apparatdata.to_dict() + keys = list(apparatdata.keys()) + appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys), 2)} + print(appdata.keys()) + semap.phoneNumber = appdata["Telefon:"] + semap.subject = appdata["Ihr Fach:"] + semap.mail = appdata["Mailadresse:"] + apparatdata = df[1] + apparatdata = apparatdata.to_dict() + keys = list(apparatdata.keys()) + appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys), 2)} + semap.title = appdata["Veranstaltung:"] + semap.semester = appdata["Semester:"] + semap.renameSemester + books = df[2] + booklist = [] + for i in range(len(books)): + if books.iloc[i].isnull().all(): + continue + data = books.iloc[i].to_dict() + book = Book() + book.from_dict(data) + if book.is_empty: + continue + elif not book.has_signature: + continue + else: + booklist.append(book) + + semap.books = booklist + + return semap + + if __name__ == "__main__": - else_df = elsa_word_to_csv("C:/Users/aky547/Desktop/Antrag ELSA Schweitzer.docx") - # print(else_df) + else_df = word_to_semap( + "C:/Users/aky547/Desktop/SA 80 titelmeldung_SoSe2025 Burth.docx" + ) + print(else_df) diff --git a/src/ui/userInterface.py b/src/ui/userInterface.py index 68b61d3..e818390 100644 --- a/src/ui/userInterface.py +++ b/src/ui/userInterface.py @@ -25,6 +25,7 @@ from src.logic import ( BookData, csv_to_list, word_docx_to_csv, + word_to_semap, Prof, Apparat, ) @@ -52,6 +53,7 @@ from src.ui.widgets import ( ) from src.utils import SemesterDocument + valid_input = (0, 0, 0, 0, 0, 0) @@ -763,7 +765,6 @@ class Ui(Ui_Semesterapparat): return def check_availability(self): - def _update_progress(current, all_titles): self.avail_status.setText("{}/{}".format(current, all_titles)) @@ -911,7 +912,7 @@ class Ui(Ui_Semesterapparat): ).setToolTip("Das Medium wurde nicht im Apparat gefunden") # make table link clickable - #self.tableWidget_apparat_media.itemClicked.connect(self.open_link) + # self.tableWidget_apparat_media.itemClicked.connect(self.open_link) # self.tableWidget_apparat_media. def open_link(self, item): @@ -931,7 +932,7 @@ class Ui(Ui_Semesterapparat): link = __openLink(item.text()) if link is not None: webbrowser.open(link) - #os.system("start " + link) + # os.system("start " + link) return else: pass @@ -1004,11 +1005,11 @@ class Ui(Ui_Semesterapparat): app_id = self.active_apparat prof_id = self.db.getProfId(self.profdata) - def __open_dialog(signatures): + def __open_dialog(signatures: list[str]): dialog = QtWidgets.QDialog() frame = parsed_titles_ui() frame.setupUi(dialog) - dialog.show() + dialogger.show() frame.signatures = signatures frame.populate_table() frame.progressBar.setMaximum(len(signatures)) @@ -1069,12 +1070,8 @@ class Ui(Ui_Semesterapparat): bookdata=book, app_id=app_id, prof_id=prof_id ) if file_type == "docx": - data = word_docx_to_csv(file) - signatures = [ - i - for i in data["Standnummer"].values - if i != "\u2002\u2002\u2002\u2002\u2002" - ] + data = word_to_semap(file) + signatures = data.signatures data = __open_dialog(signatures) # if no data was returned, return if data == []: @@ -1105,21 +1102,20 @@ class Ui(Ui_Semesterapparat): # if app_id not in database, create apparat created = False if not self.db.checkApparatExistsById(app_id): + logger.info("Apparat does not exist, creating new apparat") # create apparat # #print("Creating apparat") if not self.btn_save_apparat(False): return created = True if self.document_list.rowCount() == 0: - # #print("No file selected") + logger.info("No file selected") self.tableWidget_apparate.setEnabled(True) self.tableWidget_apparate.setToolTip("") return else: # if file is selected, check for books in the file # #print("File selected") - file = self.document_list.item(self.document_list.currentRow(), 3).text() - file_type = self.document_list.item( self.document_list.currentRow(), 1 ).text() @@ -1129,8 +1125,10 @@ class Ui(Ui_Semesterapparat): file_name = self.document_list.item( self.document_list.currentRow(), 0 ).text() + logger.info("File selected: {}, {}", file_name, file_location) if file_location == "Database": file = recreateFile(file_name, app_id, file_type, open=False) + logger.debug("recreated file from database") else: if not created: self.add_files(prof_id) @@ -1144,13 +1142,11 @@ class Ui(Ui_Semesterapparat): signatures = csv_to_list(file) # add the data to the database if file_type == "docx": - data = word_docx_to_csv(file) - signatures = [ - i - for i in data["Standnummer"].values - if i != "\u2002\u2002\u2002\u2002\u2002" - ] - + data = word_to_semap(file) + logger.info("Converted data from semap file") + logger.debug("Got the data: {}", data) + signatures = data.signatures + logger.info("Got the signatures: {}", signatures) signatures = [i for i in signatures if i != ""] # logger.debug(signatures) # #print("starting thread") @@ -1377,7 +1373,7 @@ class Ui(Ui_Semesterapparat): reminder.exec() tableposition = self.tableWidget_apparate.currentRow() appnr = self.tableWidget_apparate.item(tableposition, 0).text() - if reminder.result() == QtWidgets.QDialog.DialogCode.Accepted: + if reminder.result() == QtWidgets.QDialogger.DialogCode.Accepted: data = reminder.return_message() # #print(data) self.db.addMessage( @@ -1407,14 +1403,16 @@ class Ui(Ui_Semesterapparat): dialog = CalendarEntry(messages=messages, date=selected_date) # append dialog to self.frame_2 self.calendarlayout.addWidget(dialog) - dialog.repaintSignal.connect(lambda: self.calendarWidget.reload(selected_date)) + dialogger.repaintSignal.connect( + lambda: self.calendarWidget.reload(selected_date) + ) def open_settings(self): # print(settings.dict()) settingsUI = Settings(self.active_user) settingsUI.exec() - if settingsUI.result() == QtWidgets.QDialog.DialogCode.Accepted: + if settingsUI.result() == QtWidgets.QDialogger.DialogCode.Accepted: settingsUI.save() # print(settings.dict()) @@ -1504,7 +1502,7 @@ class Ui(Ui_Semesterapparat): self.confirm_popup("Keine weiteren Apparate vorhanden", title="Fehler") return (None, None) dialog = QtWidgets.QDialog() - dialog.setWindowTitle(title) + dialogger.setWindowTitle(title) # add a label to the dialog label = QtWidgets.QLabel() label.setText(message) @@ -1523,12 +1521,12 @@ class Ui(Ui_Semesterapparat): cancel_button = QtWidgets.QPushButton("Abbrechen") layout.addWidget(okay_button) layout.addWidget(cancel_button) - okay_button.clicked.connect(dialog.accept) - cancel_button.clicked.connect(dialog.reject) + okay_button.clicked.connect(dialogger.accept) + cancel_button.clicked.connect(dialogger.reject) - dialog.setLayout(layout) + dialogger.setLayout(layout) - return dialog.exec(), self.db.getApparatId( + return dialogger.exec(), self.db.getApparatId( self.db.getApparatNameByAppNr(drpdwn.currentText()) ) @@ -1567,7 +1565,7 @@ class Ui(Ui_Semesterapparat): widget.setWindowTitle("Metadaten") bookedit.populate_fields(data) widget.exec() - if widget.result() == QtWidgets.QDialog.DialogCode.Accepted: + if widget.result() == QtWidgets.QDialogger.DialogCode.Accepted: data = bookedit.get_data() # #print(data) self.db.updateBookdata(bookdata=data, book_id=book_id) @@ -1626,7 +1624,7 @@ class Ui(Ui_Semesterapparat): framework = ApparatExtendDialog() framework.exec() # return data from dialog if ok is pressed - if framework.result() == QtWidgets.QDialog.DialogCode.Accepted: + if framework.result() == QtWidgets.QDialogger.DialogCode.Accepted: data = framework.get_data() # #print(data) # return data @@ -1750,10 +1748,10 @@ def launch_gui(): elif ui.lresult == 0: warning_dialog = QtWidgets.QMessageBox() - warning_dialog.setIcon(QtWidgets.QMessageBox.Icon.Warning) - warning_dialog.setText("Invalid username or password. Please try again.") - warning_dialog.setWindowTitle("Login Failed") - warning_dialog.exec() + warning_dialogger.setIcon(QtWidgets.QMessageBox.Icon.Warning) + warning_dialogger.setText("Invalid username or password. Please try again.") + warning_dialogger.setWindowTitle("Login Failed") + warning_dialogger.exec() atexit.register(tempdelete)