add SemapDocument and Book dataclasses, improve word document parsing

This commit is contained in:
2025-04-25 12:16:14 +02:00
parent da0e9e0725
commit 80b96865e7
4 changed files with 164 additions and 42 deletions

View File

@@ -5,6 +5,8 @@ from loguru import logger as log
from datetime import datetime from datetime import datetime
settings = Config("config/config.yaml") settings = Config("config/config.yaml")
if not os.path.exists(settings.database.temp):
os.mkdir(settings.database.temp)
from .utils.icon import Icon from .utils.icon import Icon
__version__ = "0.2.1" __version__ = "0.2.1"

View File

@@ -181,7 +181,7 @@ class Database:
# log_message = f"Querying database with query {query}" # log_message = f"Querying database with query {query}"
if "INTO user" in query: if "INTO user" in query:
log_message = f"Querying database with query {query}" log_message = f"Querying database with query {query}"
logger.debug(f"DB Query: {log_message}") # logger.debug(f"DB Query: {log_message}")
try: try:
cursor.execute(query, args) cursor.execute(query, args)
rv = cursor.fetchall() rv = cursor.fetchall()
@@ -487,7 +487,7 @@ class Database:
str: The filename of the recreated file str: The filename of the recreated file
""" """
blob = self.getBlob(filename, app_id) blob = self.getBlob(filename, app_id)
tempdir = self.database.tempdir tempdir = self.database.temp
tempdir = tempdir.replace("~", str(Path.home())) tempdir = tempdir.replace("~", str(Path.home()))
tempdir_path = Path(tempdir) tempdir_path = Path(tempdir)
if not os.path.exists(tempdir_path): if not os.path.exists(tempdir_path):

View File

@@ -1,12 +1,93 @@
import pandas as pd import pandas as pd
from docx import Document from docx import Document
from dataclasses import dataclass
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
def word_docx_to_csv(path) -> pd.DataFrame: @dataclass
class Book:
author: str = None
year: str = None
edition: str = None
title: str = None
location: str = None
publisher: str = None
signature: str = None
internal_notes: str = None
@property
def has_signature(self) -> bool:
return self.signature is not None and self.signature != ""
@property
def is_empty(self) -> bool:
return all(
[
self.author == "",
self.year == "",
self.edition == "",
self.title == "",
self.location == "",
self.publisher == "",
self.signature == "",
self.internal_notes == "",
]
)
def from_dict(self, data: dict):
for key, value in data.items():
if value == "\u2002\u2002\u2002\u2002\u2002":
value = ""
if key == "Autorenname(n):Nachname, Vorname":
self.author = value
elif key == "Jahr/Auflage":
self.year = value.split("/")[0] if "/" in value else value
self.edition = value.split("/")[1] if "/" in value else ""
elif key == "Titel":
self.title = value
elif key == "Ort und Verlag":
self.location = value.split(",")[0] if "," in value else value
self.publisher = value.split(",")[1] if "," in value else ""
elif key == "Standnummer":
self.signature = value
elif key == "Interne Vermerke":
self.internal_notes = value
@dataclass
class SemapDocument:
subject: str = None
phoneNumber: int = None
mail: str = None
title: str = None
semester: str = None
books: list[Book] = None
@property
def renameSemester(self) -> None:
if self.semester is not None:
if "sommersemester" in self.semester.lower():
year = self.semester.split(" ")[-1]
self.semester = f"SoSe {year}"
elif "wintersemester" in self.semester.lower():
year = self.semester.split(" ")[-1]
self.semester = f"WiSe {year}"
@property
def signatures(self) -> list[str]:
if self.books is not None:
return [book.signature for book in self.books if book.has_signature]
return []
def word_docx_to_csv(path: str) -> list[pd.DataFrame]:
doc = Document(path) doc = Document(path)
tables = doc.tables tables = doc.tables
print("Tables: ", len(tables))
# print content of all tables
m_data = [] m_data = []
for table in tables: for table in tables:
@@ -24,8 +105,9 @@ def word_docx_to_csv(path) -> pd.DataFrame:
m_data.append(df) m_data.append(df)
df = m_data[2] # for df[0, 1]: merge i and i+1 as key, value
return df
return m_data
def makeDict(): def makeDict():
@@ -122,6 +204,46 @@ def elsa_word_to_csv(path):
return tuple_to_dict(data, doctype), doctype return tuple_to_dict(data, doctype), doctype
def word_to_semap(word_path: str) -> SemapDocument:
semap = SemapDocument()
df = word_docx_to_csv(word_path)
apparatdata = df[0]
apparatdata = apparatdata.to_dict()
keys = list(apparatdata.keys())
appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys), 2)}
print(appdata.keys())
semap.phoneNumber = appdata["Telefon:"]
semap.subject = appdata["Ihr Fach:"]
semap.mail = appdata["Mailadresse:"]
apparatdata = df[1]
apparatdata = apparatdata.to_dict()
keys = list(apparatdata.keys())
appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys), 2)}
semap.title = appdata["Veranstaltung:"]
semap.semester = appdata["Semester:"]
semap.renameSemester
books = df[2]
booklist = []
for i in range(len(books)):
if books.iloc[i].isnull().all():
continue
data = books.iloc[i].to_dict()
book = Book()
book.from_dict(data)
if book.is_empty:
continue
elif not book.has_signature:
continue
else:
booklist.append(book)
semap.books = booklist
return semap
if __name__ == "__main__": if __name__ == "__main__":
else_df = elsa_word_to_csv("C:/Users/aky547/Desktop/Antrag ELSA Schweitzer.docx") else_df = word_to_semap(
# print(else_df) "C:/Users/aky547/Desktop/SA 80 titelmeldung_SoSe2025 Burth.docx"
)
print(else_df)

View File

@@ -25,6 +25,7 @@ from src.logic import (
BookData, BookData,
csv_to_list, csv_to_list,
word_docx_to_csv, word_docx_to_csv,
word_to_semap,
Prof, Prof,
Apparat, Apparat,
) )
@@ -52,6 +53,7 @@ from src.ui.widgets import (
) )
from src.utils import SemesterDocument from src.utils import SemesterDocument
valid_input = (0, 0, 0, 0, 0, 0) valid_input = (0, 0, 0, 0, 0, 0)
@@ -763,7 +765,6 @@ class Ui(Ui_Semesterapparat):
return return
def check_availability(self): def check_availability(self):
def _update_progress(current, all_titles): def _update_progress(current, all_titles):
self.avail_status.setText("{}/{}".format(current, all_titles)) self.avail_status.setText("{}/{}".format(current, all_titles))
@@ -911,7 +912,7 @@ class Ui(Ui_Semesterapparat):
).setToolTip("Das Medium wurde nicht im Apparat gefunden") ).setToolTip("Das Medium wurde nicht im Apparat gefunden")
# make table link clickable # make table link clickable
#self.tableWidget_apparat_media.itemClicked.connect(self.open_link) # self.tableWidget_apparat_media.itemClicked.connect(self.open_link)
# self.tableWidget_apparat_media. # self.tableWidget_apparat_media.
def open_link(self, item): def open_link(self, item):
@@ -931,7 +932,7 @@ class Ui(Ui_Semesterapparat):
link = __openLink(item.text()) link = __openLink(item.text())
if link is not None: if link is not None:
webbrowser.open(link) webbrowser.open(link)
#os.system("start " + link) # os.system("start " + link)
return return
else: else:
pass pass
@@ -1004,11 +1005,11 @@ class Ui(Ui_Semesterapparat):
app_id = self.active_apparat app_id = self.active_apparat
prof_id = self.db.getProfId(self.profdata) prof_id = self.db.getProfId(self.profdata)
def __open_dialog(signatures): def __open_dialog(signatures: list[str]):
dialog = QtWidgets.QDialog() dialog = QtWidgets.QDialog()
frame = parsed_titles_ui() frame = parsed_titles_ui()
frame.setupUi(dialog) frame.setupUi(dialog)
dialog.show() dialogger.show()
frame.signatures = signatures frame.signatures = signatures
frame.populate_table() frame.populate_table()
frame.progressBar.setMaximum(len(signatures)) frame.progressBar.setMaximum(len(signatures))
@@ -1069,12 +1070,8 @@ class Ui(Ui_Semesterapparat):
bookdata=book, app_id=app_id, prof_id=prof_id bookdata=book, app_id=app_id, prof_id=prof_id
) )
if file_type == "docx": if file_type == "docx":
data = word_docx_to_csv(file) data = word_to_semap(file)
signatures = [ signatures = data.signatures
i
for i in data["Standnummer"].values
if i != "\u2002\u2002\u2002\u2002\u2002"
]
data = __open_dialog(signatures) data = __open_dialog(signatures)
# if no data was returned, return # if no data was returned, return
if data == []: if data == []:
@@ -1105,21 +1102,20 @@ class Ui(Ui_Semesterapparat):
# if app_id not in database, create apparat # if app_id not in database, create apparat
created = False created = False
if not self.db.checkApparatExistsById(app_id): if not self.db.checkApparatExistsById(app_id):
logger.info("Apparat does not exist, creating new apparat")
# create apparat # create apparat
# #print("Creating apparat") # #print("Creating apparat")
if not self.btn_save_apparat(False): if not self.btn_save_apparat(False):
return return
created = True created = True
if self.document_list.rowCount() == 0: if self.document_list.rowCount() == 0:
# #print("No file selected") logger.info("No file selected")
self.tableWidget_apparate.setEnabled(True) self.tableWidget_apparate.setEnabled(True)
self.tableWidget_apparate.setToolTip("") self.tableWidget_apparate.setToolTip("")
return return
else: else:
# if file is selected, check for books in the file # if file is selected, check for books in the file
# #print("File selected") # #print("File selected")
file = self.document_list.item(self.document_list.currentRow(), 3).text()
file_type = self.document_list.item( file_type = self.document_list.item(
self.document_list.currentRow(), 1 self.document_list.currentRow(), 1
).text() ).text()
@@ -1129,8 +1125,10 @@ class Ui(Ui_Semesterapparat):
file_name = self.document_list.item( file_name = self.document_list.item(
self.document_list.currentRow(), 0 self.document_list.currentRow(), 0
).text() ).text()
logger.info("File selected: {}, {}", file_name, file_location)
if file_location == "Database": if file_location == "Database":
file = recreateFile(file_name, app_id, file_type, open=False) file = recreateFile(file_name, app_id, file_type, open=False)
logger.debug("recreated file from database")
else: else:
if not created: if not created:
self.add_files(prof_id) self.add_files(prof_id)
@@ -1144,13 +1142,11 @@ class Ui(Ui_Semesterapparat):
signatures = csv_to_list(file) signatures = csv_to_list(file)
# add the data to the database # add the data to the database
if file_type == "docx": if file_type == "docx":
data = word_docx_to_csv(file) data = word_to_semap(file)
signatures = [ logger.info("Converted data from semap file")
i logger.debug("Got the data: {}", data)
for i in data["Standnummer"].values signatures = data.signatures
if i != "\u2002\u2002\u2002\u2002\u2002" logger.info("Got the signatures: {}", signatures)
]
signatures = [i for i in signatures if i != ""] signatures = [i for i in signatures if i != ""]
# logger.debug(signatures) # logger.debug(signatures)
# #print("starting thread") # #print("starting thread")
@@ -1377,7 +1373,7 @@ class Ui(Ui_Semesterapparat):
reminder.exec() reminder.exec()
tableposition = self.tableWidget_apparate.currentRow() tableposition = self.tableWidget_apparate.currentRow()
appnr = self.tableWidget_apparate.item(tableposition, 0).text() appnr = self.tableWidget_apparate.item(tableposition, 0).text()
if reminder.result() == QtWidgets.QDialog.DialogCode.Accepted: if reminder.result() == QtWidgets.QDialogger.DialogCode.Accepted:
data = reminder.return_message() data = reminder.return_message()
# #print(data) # #print(data)
self.db.addMessage( self.db.addMessage(
@@ -1407,14 +1403,16 @@ class Ui(Ui_Semesterapparat):
dialog = CalendarEntry(messages=messages, date=selected_date) dialog = CalendarEntry(messages=messages, date=selected_date)
# append dialog to self.frame_2 # append dialog to self.frame_2
self.calendarlayout.addWidget(dialog) self.calendarlayout.addWidget(dialog)
dialog.repaintSignal.connect(lambda: self.calendarWidget.reload(selected_date)) dialogger.repaintSignal.connect(
lambda: self.calendarWidget.reload(selected_date)
)
def open_settings(self): def open_settings(self):
# print(settings.dict()) # print(settings.dict())
settingsUI = Settings(self.active_user) settingsUI = Settings(self.active_user)
settingsUI.exec() settingsUI.exec()
if settingsUI.result() == QtWidgets.QDialog.DialogCode.Accepted: if settingsUI.result() == QtWidgets.QDialogger.DialogCode.Accepted:
settingsUI.save() settingsUI.save()
# print(settings.dict()) # print(settings.dict())
@@ -1504,7 +1502,7 @@ class Ui(Ui_Semesterapparat):
self.confirm_popup("Keine weiteren Apparate vorhanden", title="Fehler") self.confirm_popup("Keine weiteren Apparate vorhanden", title="Fehler")
return (None, None) return (None, None)
dialog = QtWidgets.QDialog() dialog = QtWidgets.QDialog()
dialog.setWindowTitle(title) dialogger.setWindowTitle(title)
# add a label to the dialog # add a label to the dialog
label = QtWidgets.QLabel() label = QtWidgets.QLabel()
label.setText(message) label.setText(message)
@@ -1523,12 +1521,12 @@ class Ui(Ui_Semesterapparat):
cancel_button = QtWidgets.QPushButton("Abbrechen") cancel_button = QtWidgets.QPushButton("Abbrechen")
layout.addWidget(okay_button) layout.addWidget(okay_button)
layout.addWidget(cancel_button) layout.addWidget(cancel_button)
okay_button.clicked.connect(dialog.accept) okay_button.clicked.connect(dialogger.accept)
cancel_button.clicked.connect(dialog.reject) cancel_button.clicked.connect(dialogger.reject)
dialog.setLayout(layout) dialogger.setLayout(layout)
return dialog.exec(), self.db.getApparatId( return dialogger.exec(), self.db.getApparatId(
self.db.getApparatNameByAppNr(drpdwn.currentText()) self.db.getApparatNameByAppNr(drpdwn.currentText())
) )
@@ -1567,7 +1565,7 @@ class Ui(Ui_Semesterapparat):
widget.setWindowTitle("Metadaten") widget.setWindowTitle("Metadaten")
bookedit.populate_fields(data) bookedit.populate_fields(data)
widget.exec() widget.exec()
if widget.result() == QtWidgets.QDialog.DialogCode.Accepted: if widget.result() == QtWidgets.QDialogger.DialogCode.Accepted:
data = bookedit.get_data() data = bookedit.get_data()
# #print(data) # #print(data)
self.db.updateBookdata(bookdata=data, book_id=book_id) self.db.updateBookdata(bookdata=data, book_id=book_id)
@@ -1626,7 +1624,7 @@ class Ui(Ui_Semesterapparat):
framework = ApparatExtendDialog() framework = ApparatExtendDialog()
framework.exec() framework.exec()
# return data from dialog if ok is pressed # return data from dialog if ok is pressed
if framework.result() == QtWidgets.QDialog.DialogCode.Accepted: if framework.result() == QtWidgets.QDialogger.DialogCode.Accepted:
data = framework.get_data() data = framework.get_data()
# #print(data) # #print(data)
# return data # return data
@@ -1750,10 +1748,10 @@ def launch_gui():
elif ui.lresult == 0: elif ui.lresult == 0:
warning_dialog = QtWidgets.QMessageBox() warning_dialog = QtWidgets.QMessageBox()
warning_dialog.setIcon(QtWidgets.QMessageBox.Icon.Warning) warning_dialogger.setIcon(QtWidgets.QMessageBox.Icon.Warning)
warning_dialog.setText("Invalid username or password. Please try again.") warning_dialogger.setText("Invalid username or password. Please try again.")
warning_dialog.setWindowTitle("Login Failed") warning_dialogger.setWindowTitle("Login Failed")
warning_dialog.exec() warning_dialogger.exec()
atexit.register(tempdelete) atexit.register(tempdelete)