update logging, update docuprint add new ui for generating documents

This commit is contained in:
2025-05-09 11:57:18 +02:00
parent f7ea6f5d34
commit 468e8674ab
16 changed files with 843 additions and 187 deletions

View File

@@ -4,10 +4,11 @@ from bs4 import BeautifulSoup
# import sleep_and_retry decorator to retry requests
from ratelimit import limits, sleep_and_retry
from typing import Union, Any
from typing import Union, Any, Literal, Optional
from src.logic.dataclass import BookData
from src.transformers import ARRAYData, BibTeXData, COinSData, RDSData, RISData
from src.transformers.transformers import RDS_AVAIL_DATA, RDS_GENERIC_DATA
import sys
from loguru import logger as log
@@ -105,6 +106,7 @@ class WebRequest:
def get_data(self) -> Union[list[str], None]:
links = self.get_book_links(self.ppn)
logger.debug(f"Links: {links}")
return_data: list[str] = []
for link in links:
result: str = self.search(link) # type:ignore
# in result search for class col-xs-12 rds-dl RDS_LOCATION
@@ -116,9 +118,9 @@ class WebRequest:
item_location = location.find(
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
).text.strip()
logger.debug(f"Item location: {item_location}")
if self.use_any:
pre_tag = soup.find_all("pre")
return_data: list[str] = []
if pre_tag:
for tag in pre_tag:
data = tag.text.strip()
@@ -142,9 +144,9 @@ class WebRequest:
logger.error(
f"Signature {self.signature} not found in {item_location}"
)
return_data = []
# return_data = []
return return_data
return return_data
def get_data_elsa(self):
links = self.get_book_links(self.ppn)
@@ -225,7 +227,15 @@ class BibTextTransformer:
self.data = line
return self
def return_data(self, option: Any = None) -> Union[BookData, None]:
def return_data(
self, option: Any = None
) -> Union[
Optional[BookData],
Optional[RDS_GENERIC_DATA],
Optional[RDS_AVAIL_DATA],
None,
dict[str, Union[RDS_AVAIL_DATA, RDS_GENERIC_DATA]],
]:
"""Return Data to caller.
Args:

View File

@@ -3,6 +3,8 @@ from docx import Document
from dataclasses import dataclass
import sys
from loguru import logger as log
from src.backend import Semester
from typing import Union, Any
logger = log
logger.remove()
@@ -51,8 +53,9 @@ class Book:
]
)
def from_dict(self, data: dict):
def from_dict(self, data: dict[str, Any]):
for key, value in data.items():
value = value.strip()
if value == "\u2002\u2002\u2002\u2002\u2002":
value = ""
@@ -78,18 +81,21 @@ class SemapDocument:
phoneNumber: int = None
mail: str = None
title: str = None
semester: str = None
semester: Union[str, Semester] = None
books: list[Book] = None
eternal: bool = False
personName: str = None
personTitle: str = None
@property
def renameSemester(self) -> None:
if self.semester is not None:
if "sommersemester" in self.semester.lower():
year = self.semester.split(" ")[-1]
self.semester = f"SoSe {year}"
elif "wintersemester" in self.semester.lower():
year = self.semester.split(" ")[-1]
self.semester = f"WiSe {year}"
if ", Dauer" in self.semester:
self.semester = self.semester.split(",")[0]
self.eternal = True
self.semester = Semester().from_string(self.semester)
else:
logger.warning("Semester {} is not valid", self.semester)
self.semester = None
@property
def signatures(self) -> list[str]:
@@ -181,7 +187,7 @@ def tuple_to_dict(tlist: tuple, type: str) -> dict:
return ret
def elsa_word_to_csv(path):
def elsa_word_to_csv(path: str):
doc = Document(path)
# # print all lines in doc
doctype = [para.text for para in doc.paragraphs if para.text != ""][-1]
@@ -192,18 +198,18 @@ def elsa_word_to_csv(path):
}
tables = doc.tables
m_data = []
m_data: list[pd.DataFrame] = []
for table in tables:
data = []
data: list[list[str]] = []
for row in table.rows:
row_data = []
row_data: list[str] = []
for cell in row.cells:
text = cell.text
text = text.replace("\n", "")
text = text.replace("\u2002", "")
row_data.append(text)
data.append(row_data)
df = pd.DataFrame(data)
df = pd.DataFrame(data)
df.columns = df.iloc[0]
df = df.iloc[1:]
m_data.append(df)
@@ -222,11 +228,15 @@ def word_to_semap(word_path: str) -> SemapDocument:
df = word_docx_to_csv(word_path)
apparatdata = df[0]
apparatdata = apparatdata.to_dict()
keys = list(apparatdata.keys())
appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys), 2)}
semap.phoneNumber = appdata["Telefon:"]
semap.subject = appdata["Ihr Fach:"]
semap.mail = appdata["Mailadresse:"]
semap.personName = ",".join(appdata["Ihr Name und Titel:"].split(",")[:-1])
semap.personTitle = ",".join(appdata["Ihr Name und Titel:"].split(",")[-1:]).strip()
apparatdata = df[1]
apparatdata = apparatdata.to_dict()
keys = list(apparatdata.keys())
@@ -255,6 +265,7 @@ def word_to_semap(word_path: str) -> SemapDocument:
if __name__ == "__main__":
else_df = word_to_semap(
"C:/Users/aky547/Desktop/SA 80 titelmeldung_SoSe2025 Burth.docx"
else_df = elsa_word_to_csv(
"C:/Users/aky547/Desktop/ELSA_Bestellung Scann Der Westen und der Rest.docx"
)
print(else_df)