add SemapDocument and Book dataclasses, improve word document parsing
This commit is contained in:
@@ -5,6 +5,8 @@ from loguru import logger as log
|
||||
from datetime import datetime
|
||||
|
||||
settings = Config("config/config.yaml")
|
||||
if not os.path.exists(settings.database.temp):
|
||||
os.mkdir(settings.database.temp)
|
||||
from .utils.icon import Icon
|
||||
|
||||
__version__ = "0.2.1"
|
||||
|
||||
@@ -181,7 +181,7 @@ class Database:
|
||||
# log_message = f"Querying database with query {query}"
|
||||
if "INTO user" in query:
|
||||
log_message = f"Querying database with query {query}"
|
||||
logger.debug(f"DB Query: {log_message}")
|
||||
# logger.debug(f"DB Query: {log_message}")
|
||||
try:
|
||||
cursor.execute(query, args)
|
||||
rv = cursor.fetchall()
|
||||
@@ -487,7 +487,7 @@ class Database:
|
||||
str: The filename of the recreated file
|
||||
"""
|
||||
blob = self.getBlob(filename, app_id)
|
||||
tempdir = self.database.tempdir
|
||||
tempdir = self.database.temp
|
||||
tempdir = tempdir.replace("~", str(Path.home()))
|
||||
tempdir_path = Path(tempdir)
|
||||
if not os.path.exists(tempdir_path):
|
||||
|
||||
@@ -1,12 +1,93 @@
|
||||
import pandas as pd
|
||||
from docx import Document
|
||||
from dataclasses import dataclass
|
||||
|
||||
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
||||
|
||||
|
||||
def word_docx_to_csv(path) -> pd.DataFrame:
|
||||
@dataclass
|
||||
class Book:
|
||||
author: str = None
|
||||
year: str = None
|
||||
edition: str = None
|
||||
title: str = None
|
||||
location: str = None
|
||||
publisher: str = None
|
||||
signature: str = None
|
||||
internal_notes: str = None
|
||||
|
||||
@property
|
||||
def has_signature(self) -> bool:
|
||||
return self.signature is not None and self.signature != ""
|
||||
|
||||
@property
|
||||
def is_empty(self) -> bool:
|
||||
return all(
|
||||
[
|
||||
self.author == "",
|
||||
self.year == "",
|
||||
self.edition == "",
|
||||
self.title == "",
|
||||
self.location == "",
|
||||
self.publisher == "",
|
||||
self.signature == "",
|
||||
self.internal_notes == "",
|
||||
]
|
||||
)
|
||||
|
||||
def from_dict(self, data: dict):
|
||||
for key, value in data.items():
|
||||
if value == "\u2002\u2002\u2002\u2002\u2002":
|
||||
value = ""
|
||||
|
||||
if key == "Autorenname(n):Nachname, Vorname":
|
||||
self.author = value
|
||||
elif key == "Jahr/Auflage":
|
||||
self.year = value.split("/")[0] if "/" in value else value
|
||||
self.edition = value.split("/")[1] if "/" in value else ""
|
||||
elif key == "Titel":
|
||||
self.title = value
|
||||
elif key == "Ort und Verlag":
|
||||
self.location = value.split(",")[0] if "," in value else value
|
||||
self.publisher = value.split(",")[1] if "," in value else ""
|
||||
elif key == "Standnummer":
|
||||
self.signature = value
|
||||
elif key == "Interne Vermerke":
|
||||
self.internal_notes = value
|
||||
|
||||
|
||||
@dataclass
|
||||
class SemapDocument:
|
||||
subject: str = None
|
||||
phoneNumber: int = None
|
||||
mail: str = None
|
||||
title: str = None
|
||||
semester: str = None
|
||||
books: list[Book] = None
|
||||
|
||||
@property
|
||||
def renameSemester(self) -> None:
|
||||
if self.semester is not None:
|
||||
if "sommersemester" in self.semester.lower():
|
||||
year = self.semester.split(" ")[-1]
|
||||
self.semester = f"SoSe {year}"
|
||||
elif "wintersemester" in self.semester.lower():
|
||||
year = self.semester.split(" ")[-1]
|
||||
self.semester = f"WiSe {year}"
|
||||
|
||||
@property
|
||||
def signatures(self) -> list[str]:
|
||||
if self.books is not None:
|
||||
return [book.signature for book in self.books if book.has_signature]
|
||||
return []
|
||||
|
||||
|
||||
def word_docx_to_csv(path: str) -> list[pd.DataFrame]:
|
||||
doc = Document(path)
|
||||
tables = doc.tables
|
||||
print("Tables: ", len(tables))
|
||||
|
||||
# print content of all tables
|
||||
|
||||
m_data = []
|
||||
for table in tables:
|
||||
@@ -24,8 +105,9 @@ def word_docx_to_csv(path) -> pd.DataFrame:
|
||||
|
||||
m_data.append(df)
|
||||
|
||||
df = m_data[2]
|
||||
return df
|
||||
# for df[0, 1]: merge i and i+1 as key, value
|
||||
|
||||
return m_data
|
||||
|
||||
|
||||
def makeDict():
|
||||
@@ -122,6 +204,46 @@ def elsa_word_to_csv(path):
|
||||
return tuple_to_dict(data, doctype), doctype
|
||||
|
||||
|
||||
def word_to_semap(word_path: str) -> SemapDocument:
|
||||
semap = SemapDocument()
|
||||
df = word_docx_to_csv(word_path)
|
||||
apparatdata = df[0]
|
||||
apparatdata = apparatdata.to_dict()
|
||||
keys = list(apparatdata.keys())
|
||||
appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys), 2)}
|
||||
print(appdata.keys())
|
||||
semap.phoneNumber = appdata["Telefon:"]
|
||||
semap.subject = appdata["Ihr Fach:"]
|
||||
semap.mail = appdata["Mailadresse:"]
|
||||
apparatdata = df[1]
|
||||
apparatdata = apparatdata.to_dict()
|
||||
keys = list(apparatdata.keys())
|
||||
appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys), 2)}
|
||||
semap.title = appdata["Veranstaltung:"]
|
||||
semap.semester = appdata["Semester:"]
|
||||
semap.renameSemester
|
||||
books = df[2]
|
||||
booklist = []
|
||||
for i in range(len(books)):
|
||||
if books.iloc[i].isnull().all():
|
||||
continue
|
||||
data = books.iloc[i].to_dict()
|
||||
book = Book()
|
||||
book.from_dict(data)
|
||||
if book.is_empty:
|
||||
continue
|
||||
elif not book.has_signature:
|
||||
continue
|
||||
else:
|
||||
booklist.append(book)
|
||||
|
||||
semap.books = booklist
|
||||
|
||||
return semap
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
else_df = elsa_word_to_csv("C:/Users/aky547/Desktop/Antrag ELSA Schweitzer.docx")
|
||||
# print(else_df)
|
||||
else_df = word_to_semap(
|
||||
"C:/Users/aky547/Desktop/SA 80 titelmeldung_SoSe2025 Burth.docx"
|
||||
)
|
||||
print(else_df)
|
||||
|
||||
@@ -25,6 +25,7 @@ from src.logic import (
|
||||
BookData,
|
||||
csv_to_list,
|
||||
word_docx_to_csv,
|
||||
word_to_semap,
|
||||
Prof,
|
||||
Apparat,
|
||||
)
|
||||
@@ -52,6 +53,7 @@ from src.ui.widgets import (
|
||||
)
|
||||
from src.utils import SemesterDocument
|
||||
|
||||
|
||||
valid_input = (0, 0, 0, 0, 0, 0)
|
||||
|
||||
|
||||
@@ -763,7 +765,6 @@ class Ui(Ui_Semesterapparat):
|
||||
return
|
||||
|
||||
def check_availability(self):
|
||||
|
||||
def _update_progress(current, all_titles):
|
||||
self.avail_status.setText("{}/{}".format(current, all_titles))
|
||||
|
||||
@@ -911,7 +912,7 @@ class Ui(Ui_Semesterapparat):
|
||||
).setToolTip("Das Medium wurde nicht im Apparat gefunden")
|
||||
|
||||
# make table link clickable
|
||||
#self.tableWidget_apparat_media.itemClicked.connect(self.open_link)
|
||||
# self.tableWidget_apparat_media.itemClicked.connect(self.open_link)
|
||||
# self.tableWidget_apparat_media.
|
||||
|
||||
def open_link(self, item):
|
||||
@@ -931,7 +932,7 @@ class Ui(Ui_Semesterapparat):
|
||||
link = __openLink(item.text())
|
||||
if link is not None:
|
||||
webbrowser.open(link)
|
||||
#os.system("start " + link)
|
||||
# os.system("start " + link)
|
||||
return
|
||||
else:
|
||||
pass
|
||||
@@ -1004,11 +1005,11 @@ class Ui(Ui_Semesterapparat):
|
||||
app_id = self.active_apparat
|
||||
prof_id = self.db.getProfId(self.profdata)
|
||||
|
||||
def __open_dialog(signatures):
|
||||
def __open_dialog(signatures: list[str]):
|
||||
dialog = QtWidgets.QDialog()
|
||||
frame = parsed_titles_ui()
|
||||
frame.setupUi(dialog)
|
||||
dialog.show()
|
||||
dialogger.show()
|
||||
frame.signatures = signatures
|
||||
frame.populate_table()
|
||||
frame.progressBar.setMaximum(len(signatures))
|
||||
@@ -1069,12 +1070,8 @@ class Ui(Ui_Semesterapparat):
|
||||
bookdata=book, app_id=app_id, prof_id=prof_id
|
||||
)
|
||||
if file_type == "docx":
|
||||
data = word_docx_to_csv(file)
|
||||
signatures = [
|
||||
i
|
||||
for i in data["Standnummer"].values
|
||||
if i != "\u2002\u2002\u2002\u2002\u2002"
|
||||
]
|
||||
data = word_to_semap(file)
|
||||
signatures = data.signatures
|
||||
data = __open_dialog(signatures)
|
||||
# if no data was returned, return
|
||||
if data == []:
|
||||
@@ -1105,21 +1102,20 @@ class Ui(Ui_Semesterapparat):
|
||||
# if app_id not in database, create apparat
|
||||
created = False
|
||||
if not self.db.checkApparatExistsById(app_id):
|
||||
logger.info("Apparat does not exist, creating new apparat")
|
||||
# create apparat
|
||||
# #print("Creating apparat")
|
||||
if not self.btn_save_apparat(False):
|
||||
return
|
||||
created = True
|
||||
if self.document_list.rowCount() == 0:
|
||||
# #print("No file selected")
|
||||
logger.info("No file selected")
|
||||
self.tableWidget_apparate.setEnabled(True)
|
||||
self.tableWidget_apparate.setToolTip("")
|
||||
return
|
||||
else:
|
||||
# if file is selected, check for books in the file
|
||||
# #print("File selected")
|
||||
file = self.document_list.item(self.document_list.currentRow(), 3).text()
|
||||
|
||||
file_type = self.document_list.item(
|
||||
self.document_list.currentRow(), 1
|
||||
).text()
|
||||
@@ -1129,8 +1125,10 @@ class Ui(Ui_Semesterapparat):
|
||||
file_name = self.document_list.item(
|
||||
self.document_list.currentRow(), 0
|
||||
).text()
|
||||
logger.info("File selected: {}, {}", file_name, file_location)
|
||||
if file_location == "Database":
|
||||
file = recreateFile(file_name, app_id, file_type, open=False)
|
||||
logger.debug("recreated file from database")
|
||||
else:
|
||||
if not created:
|
||||
self.add_files(prof_id)
|
||||
@@ -1144,13 +1142,11 @@ class Ui(Ui_Semesterapparat):
|
||||
signatures = csv_to_list(file)
|
||||
# add the data to the database
|
||||
if file_type == "docx":
|
||||
data = word_docx_to_csv(file)
|
||||
signatures = [
|
||||
i
|
||||
for i in data["Standnummer"].values
|
||||
if i != "\u2002\u2002\u2002\u2002\u2002"
|
||||
]
|
||||
|
||||
data = word_to_semap(file)
|
||||
logger.info("Converted data from semap file")
|
||||
logger.debug("Got the data: {}", data)
|
||||
signatures = data.signatures
|
||||
logger.info("Got the signatures: {}", signatures)
|
||||
signatures = [i for i in signatures if i != ""]
|
||||
# logger.debug(signatures)
|
||||
# #print("starting thread")
|
||||
@@ -1377,7 +1373,7 @@ class Ui(Ui_Semesterapparat):
|
||||
reminder.exec()
|
||||
tableposition = self.tableWidget_apparate.currentRow()
|
||||
appnr = self.tableWidget_apparate.item(tableposition, 0).text()
|
||||
if reminder.result() == QtWidgets.QDialog.DialogCode.Accepted:
|
||||
if reminder.result() == QtWidgets.QDialogger.DialogCode.Accepted:
|
||||
data = reminder.return_message()
|
||||
# #print(data)
|
||||
self.db.addMessage(
|
||||
@@ -1407,14 +1403,16 @@ class Ui(Ui_Semesterapparat):
|
||||
dialog = CalendarEntry(messages=messages, date=selected_date)
|
||||
# append dialog to self.frame_2
|
||||
self.calendarlayout.addWidget(dialog)
|
||||
dialog.repaintSignal.connect(lambda: self.calendarWidget.reload(selected_date))
|
||||
dialogger.repaintSignal.connect(
|
||||
lambda: self.calendarWidget.reload(selected_date)
|
||||
)
|
||||
|
||||
def open_settings(self):
|
||||
# print(settings.dict())
|
||||
settingsUI = Settings(self.active_user)
|
||||
settingsUI.exec()
|
||||
|
||||
if settingsUI.result() == QtWidgets.QDialog.DialogCode.Accepted:
|
||||
if settingsUI.result() == QtWidgets.QDialogger.DialogCode.Accepted:
|
||||
settingsUI.save()
|
||||
# print(settings.dict())
|
||||
|
||||
@@ -1504,7 +1502,7 @@ class Ui(Ui_Semesterapparat):
|
||||
self.confirm_popup("Keine weiteren Apparate vorhanden", title="Fehler")
|
||||
return (None, None)
|
||||
dialog = QtWidgets.QDialog()
|
||||
dialog.setWindowTitle(title)
|
||||
dialogger.setWindowTitle(title)
|
||||
# add a label to the dialog
|
||||
label = QtWidgets.QLabel()
|
||||
label.setText(message)
|
||||
@@ -1523,12 +1521,12 @@ class Ui(Ui_Semesterapparat):
|
||||
cancel_button = QtWidgets.QPushButton("Abbrechen")
|
||||
layout.addWidget(okay_button)
|
||||
layout.addWidget(cancel_button)
|
||||
okay_button.clicked.connect(dialog.accept)
|
||||
cancel_button.clicked.connect(dialog.reject)
|
||||
okay_button.clicked.connect(dialogger.accept)
|
||||
cancel_button.clicked.connect(dialogger.reject)
|
||||
|
||||
dialog.setLayout(layout)
|
||||
dialogger.setLayout(layout)
|
||||
|
||||
return dialog.exec(), self.db.getApparatId(
|
||||
return dialogger.exec(), self.db.getApparatId(
|
||||
self.db.getApparatNameByAppNr(drpdwn.currentText())
|
||||
)
|
||||
|
||||
@@ -1567,7 +1565,7 @@ class Ui(Ui_Semesterapparat):
|
||||
widget.setWindowTitle("Metadaten")
|
||||
bookedit.populate_fields(data)
|
||||
widget.exec()
|
||||
if widget.result() == QtWidgets.QDialog.DialogCode.Accepted:
|
||||
if widget.result() == QtWidgets.QDialogger.DialogCode.Accepted:
|
||||
data = bookedit.get_data()
|
||||
# #print(data)
|
||||
self.db.updateBookdata(bookdata=data, book_id=book_id)
|
||||
@@ -1626,7 +1624,7 @@ class Ui(Ui_Semesterapparat):
|
||||
framework = ApparatExtendDialog()
|
||||
framework.exec()
|
||||
# return data from dialog if ok is pressed
|
||||
if framework.result() == QtWidgets.QDialog.DialogCode.Accepted:
|
||||
if framework.result() == QtWidgets.QDialogger.DialogCode.Accepted:
|
||||
data = framework.get_data()
|
||||
# #print(data)
|
||||
# return data
|
||||
@@ -1750,10 +1748,10 @@ def launch_gui():
|
||||
|
||||
elif ui.lresult == 0:
|
||||
warning_dialog = QtWidgets.QMessageBox()
|
||||
warning_dialog.setIcon(QtWidgets.QMessageBox.Icon.Warning)
|
||||
warning_dialog.setText("Invalid username or password. Please try again.")
|
||||
warning_dialog.setWindowTitle("Login Failed")
|
||||
warning_dialog.exec()
|
||||
warning_dialogger.setIcon(QtWidgets.QMessageBox.Icon.Warning)
|
||||
warning_dialogger.setText("Invalid username or password. Please try again.")
|
||||
warning_dialogger.setWindowTitle("Login Failed")
|
||||
warning_dialogger.exec()
|
||||
atexit.register(tempdelete)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user