add SemapDocument and Book dataclasses, improve word document parsing

This commit is contained in:
2025-04-25 12:16:14 +02:00
parent da0e9e0725
commit 80b96865e7
4 changed files with 164 additions and 42 deletions

View File

@@ -5,6 +5,8 @@ from loguru import logger as log
from datetime import datetime
settings = Config("config/config.yaml")
if not os.path.exists(settings.database.temp):
os.mkdir(settings.database.temp)
from .utils.icon import Icon
__version__ = "0.2.1"

View File

@@ -181,7 +181,7 @@ class Database:
# log_message = f"Querying database with query {query}"
if "INTO user" in query:
log_message = f"Querying database with query {query}"
logger.debug(f"DB Query: {log_message}")
# logger.debug(f"DB Query: {log_message}")
try:
cursor.execute(query, args)
rv = cursor.fetchall()
@@ -487,7 +487,7 @@ class Database:
str: The filename of the recreated file
"""
blob = self.getBlob(filename, app_id)
tempdir = self.database.tempdir
tempdir = self.database.temp
tempdir = tempdir.replace("~", str(Path.home()))
tempdir_path = Path(tempdir)
if not os.path.exists(tempdir_path):

View File

@@ -1,12 +1,93 @@
import pandas as pd
from docx import Document
from dataclasses import dataclass
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
def word_docx_to_csv(path) -> pd.DataFrame:
@dataclass
class Book:
author: str = None
year: str = None
edition: str = None
title: str = None
location: str = None
publisher: str = None
signature: str = None
internal_notes: str = None
@property
def has_signature(self) -> bool:
return self.signature is not None and self.signature != ""
@property
def is_empty(self) -> bool:
return all(
[
self.author == "",
self.year == "",
self.edition == "",
self.title == "",
self.location == "",
self.publisher == "",
self.signature == "",
self.internal_notes == "",
]
)
def from_dict(self, data: dict):
for key, value in data.items():
if value == "\u2002\u2002\u2002\u2002\u2002":
value = ""
if key == "Autorenname(n):Nachname, Vorname":
self.author = value
elif key == "Jahr/Auflage":
self.year = value.split("/")[0] if "/" in value else value
self.edition = value.split("/")[1] if "/" in value else ""
elif key == "Titel":
self.title = value
elif key == "Ort und Verlag":
self.location = value.split(",")[0] if "," in value else value
self.publisher = value.split(",")[1] if "," in value else ""
elif key == "Standnummer":
self.signature = value
elif key == "Interne Vermerke":
self.internal_notes = value
@dataclass
class SemapDocument:
subject: str = None
phoneNumber: int = None
mail: str = None
title: str = None
semester: str = None
books: list[Book] = None
@property
def renameSemester(self) -> None:
if self.semester is not None:
if "sommersemester" in self.semester.lower():
year = self.semester.split(" ")[-1]
self.semester = f"SoSe {year}"
elif "wintersemester" in self.semester.lower():
year = self.semester.split(" ")[-1]
self.semester = f"WiSe {year}"
@property
def signatures(self) -> list[str]:
if self.books is not None:
return [book.signature for book in self.books if book.has_signature]
return []
def word_docx_to_csv(path: str) -> list[pd.DataFrame]:
doc = Document(path)
tables = doc.tables
print("Tables: ", len(tables))
# print content of all tables
m_data = []
for table in tables:
@@ -24,8 +105,9 @@ def word_docx_to_csv(path) -> pd.DataFrame:
m_data.append(df)
df = m_data[2]
return df
# for df[0, 1]: merge i and i+1 as key, value
return m_data
def makeDict():
@@ -122,6 +204,46 @@ def elsa_word_to_csv(path):
return tuple_to_dict(data, doctype), doctype
def word_to_semap(word_path: str) -> SemapDocument:
semap = SemapDocument()
df = word_docx_to_csv(word_path)
apparatdata = df[0]
apparatdata = apparatdata.to_dict()
keys = list(apparatdata.keys())
appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys), 2)}
print(appdata.keys())
semap.phoneNumber = appdata["Telefon:"]
semap.subject = appdata["Ihr Fach:"]
semap.mail = appdata["Mailadresse:"]
apparatdata = df[1]
apparatdata = apparatdata.to_dict()
keys = list(apparatdata.keys())
appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys), 2)}
semap.title = appdata["Veranstaltung:"]
semap.semester = appdata["Semester:"]
semap.renameSemester
books = df[2]
booklist = []
for i in range(len(books)):
if books.iloc[i].isnull().all():
continue
data = books.iloc[i].to_dict()
book = Book()
book.from_dict(data)
if book.is_empty:
continue
elif not book.has_signature:
continue
else:
booklist.append(book)
semap.books = booklist
return semap
if __name__ == "__main__":
else_df = elsa_word_to_csv("C:/Users/aky547/Desktop/Antrag ELSA Schweitzer.docx")
# print(else_df)
else_df = word_to_semap(
"C:/Users/aky547/Desktop/SA 80 titelmeldung_SoSe2025 Burth.docx"
)
print(else_df)

View File

@@ -25,6 +25,7 @@ from src.logic import (
BookData,
csv_to_list,
word_docx_to_csv,
word_to_semap,
Prof,
Apparat,
)
@@ -52,6 +53,7 @@ from src.ui.widgets import (
)
from src.utils import SemesterDocument
valid_input = (0, 0, 0, 0, 0, 0)
@@ -763,7 +765,6 @@ class Ui(Ui_Semesterapparat):
return
def check_availability(self):
def _update_progress(current, all_titles):
self.avail_status.setText("{}/{}".format(current, all_titles))
@@ -911,7 +912,7 @@ class Ui(Ui_Semesterapparat):
).setToolTip("Das Medium wurde nicht im Apparat gefunden")
# make table link clickable
#self.tableWidget_apparat_media.itemClicked.connect(self.open_link)
# self.tableWidget_apparat_media.itemClicked.connect(self.open_link)
# self.tableWidget_apparat_media.
def open_link(self, item):
@@ -931,7 +932,7 @@ class Ui(Ui_Semesterapparat):
link = __openLink(item.text())
if link is not None:
webbrowser.open(link)
#os.system("start " + link)
# os.system("start " + link)
return
else:
pass
@@ -1004,11 +1005,11 @@ class Ui(Ui_Semesterapparat):
app_id = self.active_apparat
prof_id = self.db.getProfId(self.profdata)
def __open_dialog(signatures):
def __open_dialog(signatures: list[str]):
dialog = QtWidgets.QDialog()
frame = parsed_titles_ui()
frame.setupUi(dialog)
dialog.show()
dialogger.show()
frame.signatures = signatures
frame.populate_table()
frame.progressBar.setMaximum(len(signatures))
@@ -1069,12 +1070,8 @@ class Ui(Ui_Semesterapparat):
bookdata=book, app_id=app_id, prof_id=prof_id
)
if file_type == "docx":
data = word_docx_to_csv(file)
signatures = [
i
for i in data["Standnummer"].values
if i != "\u2002\u2002\u2002\u2002\u2002"
]
data = word_to_semap(file)
signatures = data.signatures
data = __open_dialog(signatures)
# if no data was returned, return
if data == []:
@@ -1105,21 +1102,20 @@ class Ui(Ui_Semesterapparat):
# if app_id not in database, create apparat
created = False
if not self.db.checkApparatExistsById(app_id):
logger.info("Apparat does not exist, creating new apparat")
# create apparat
# #print("Creating apparat")
if not self.btn_save_apparat(False):
return
created = True
if self.document_list.rowCount() == 0:
# #print("No file selected")
logger.info("No file selected")
self.tableWidget_apparate.setEnabled(True)
self.tableWidget_apparate.setToolTip("")
return
else:
# if file is selected, check for books in the file
# #print("File selected")
file = self.document_list.item(self.document_list.currentRow(), 3).text()
file_type = self.document_list.item(
self.document_list.currentRow(), 1
).text()
@@ -1129,8 +1125,10 @@ class Ui(Ui_Semesterapparat):
file_name = self.document_list.item(
self.document_list.currentRow(), 0
).text()
logger.info("File selected: {}, {}", file_name, file_location)
if file_location == "Database":
file = recreateFile(file_name, app_id, file_type, open=False)
logger.debug("recreated file from database")
else:
if not created:
self.add_files(prof_id)
@@ -1144,13 +1142,11 @@ class Ui(Ui_Semesterapparat):
signatures = csv_to_list(file)
# add the data to the database
if file_type == "docx":
data = word_docx_to_csv(file)
signatures = [
i
for i in data["Standnummer"].values
if i != "\u2002\u2002\u2002\u2002\u2002"
]
data = word_to_semap(file)
logger.info("Converted data from semap file")
logger.debug("Got the data: {}", data)
signatures = data.signatures
logger.info("Got the signatures: {}", signatures)
signatures = [i for i in signatures if i != ""]
# logger.debug(signatures)
# #print("starting thread")
@@ -1377,7 +1373,7 @@ class Ui(Ui_Semesterapparat):
reminder.exec()
tableposition = self.tableWidget_apparate.currentRow()
appnr = self.tableWidget_apparate.item(tableposition, 0).text()
if reminder.result() == QtWidgets.QDialog.DialogCode.Accepted:
if reminder.result() == QtWidgets.QDialogger.DialogCode.Accepted:
data = reminder.return_message()
# #print(data)
self.db.addMessage(
@@ -1407,14 +1403,16 @@ class Ui(Ui_Semesterapparat):
dialog = CalendarEntry(messages=messages, date=selected_date)
# append dialog to self.frame_2
self.calendarlayout.addWidget(dialog)
dialog.repaintSignal.connect(lambda: self.calendarWidget.reload(selected_date))
dialogger.repaintSignal.connect(
lambda: self.calendarWidget.reload(selected_date)
)
def open_settings(self):
# print(settings.dict())
settingsUI = Settings(self.active_user)
settingsUI.exec()
if settingsUI.result() == QtWidgets.QDialog.DialogCode.Accepted:
if settingsUI.result() == QtWidgets.QDialogger.DialogCode.Accepted:
settingsUI.save()
# print(settings.dict())
@@ -1504,7 +1502,7 @@ class Ui(Ui_Semesterapparat):
self.confirm_popup("Keine weiteren Apparate vorhanden", title="Fehler")
return (None, None)
dialog = QtWidgets.QDialog()
dialog.setWindowTitle(title)
dialogger.setWindowTitle(title)
# add a label to the dialog
label = QtWidgets.QLabel()
label.setText(message)
@@ -1523,12 +1521,12 @@ class Ui(Ui_Semesterapparat):
cancel_button = QtWidgets.QPushButton("Abbrechen")
layout.addWidget(okay_button)
layout.addWidget(cancel_button)
okay_button.clicked.connect(dialog.accept)
cancel_button.clicked.connect(dialog.reject)
okay_button.clicked.connect(dialogger.accept)
cancel_button.clicked.connect(dialogger.reject)
dialog.setLayout(layout)
dialogger.setLayout(layout)
return dialog.exec(), self.db.getApparatId(
return dialogger.exec(), self.db.getApparatId(
self.db.getApparatNameByAppNr(drpdwn.currentText())
)
@@ -1567,7 +1565,7 @@ class Ui(Ui_Semesterapparat):
widget.setWindowTitle("Metadaten")
bookedit.populate_fields(data)
widget.exec()
if widget.result() == QtWidgets.QDialog.DialogCode.Accepted:
if widget.result() == QtWidgets.QDialogger.DialogCode.Accepted:
data = bookedit.get_data()
# #print(data)
self.db.updateBookdata(bookdata=data, book_id=book_id)
@@ -1626,7 +1624,7 @@ class Ui(Ui_Semesterapparat):
framework = ApparatExtendDialog()
framework.exec()
# return data from dialog if ok is pressed
if framework.result() == QtWidgets.QDialog.DialogCode.Accepted:
if framework.result() == QtWidgets.QDialogger.DialogCode.Accepted:
data = framework.get_data()
# #print(data)
# return data
@@ -1750,10 +1748,10 @@ def launch_gui():
elif ui.lresult == 0:
warning_dialog = QtWidgets.QMessageBox()
warning_dialog.setIcon(QtWidgets.QMessageBox.Icon.Warning)
warning_dialog.setText("Invalid username or password. Please try again.")
warning_dialog.setWindowTitle("Login Failed")
warning_dialog.exec()
warning_dialogger.setIcon(QtWidgets.QMessageBox.Icon.Warning)
warning_dialogger.setText("Invalid username or password. Please try again.")
warning_dialogger.setWindowTitle("Login Failed")
warning_dialogger.exec()
atexit.register(tempdelete)