add logger

This commit is contained in:
2025-04-28 10:18:07 +02:00
parent b4c6169649
commit 7abe3d8cc0

View File

@@ -1,6 +1,22 @@
import pandas as pd import pandas as pd
from docx import Document from docx import Document
from dataclasses import dataclass from dataclasses import dataclass
import sys
from loguru import logger as log
logger = log
logger.remove()
logger.add("logs/wordparser.log", rotation="1 week", enqueue=True)
log.add(
f"logs/application.log",
rotation="1 day",
compression="zip",
enqueue=True,
)
# logger.add(sys.stderr, format="{time} {level} {message}", level="INFO")
logger.add(sys.stdout)
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
@@ -85,10 +101,6 @@ class SemapDocument:
def word_docx_to_csv(path: str) -> list[pd.DataFrame]: def word_docx_to_csv(path: str) -> list[pd.DataFrame]:
doc = Document(path) doc = Document(path)
tables = doc.tables tables = doc.tables
print("Tables: ", len(tables))
# print content of all tables
m_data = [] m_data = []
for table in tables: for table in tables:
data = [] data = []
@@ -205,13 +217,13 @@ def elsa_word_to_csv(path):
def word_to_semap(word_path: str) -> SemapDocument: def word_to_semap(word_path: str) -> SemapDocument:
logger.info("Parsing Word Document {}", word_path)
semap = SemapDocument() semap = SemapDocument()
df = word_docx_to_csv(word_path) df = word_docx_to_csv(word_path)
apparatdata = df[0] apparatdata = df[0]
apparatdata = apparatdata.to_dict() apparatdata = apparatdata.to_dict()
keys = list(apparatdata.keys()) keys = list(apparatdata.keys())
appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys), 2)} appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys), 2)}
print(appdata.keys())
semap.phoneNumber = appdata["Telefon:"] semap.phoneNumber = appdata["Telefon:"]
semap.subject = appdata["Ihr Fach:"] semap.subject = appdata["Ihr Fach:"]
semap.mail = appdata["Mailadresse:"] semap.mail = appdata["Mailadresse:"]
@@ -236,7 +248,7 @@ def word_to_semap(word_path: str) -> SemapDocument:
continue continue
else: else:
booklist.append(book) booklist.append(book)
logger.info("Found {} books", len(booklist))
semap.books = booklist semap.books = booklist
return semap return semap
@@ -246,4 +258,3 @@ if __name__ == "__main__":
else_df = word_to_semap( else_df = word_to_semap(
"C:/Users/aky547/Desktop/SA 80 titelmeldung_SoSe2025 Burth.docx" "C:/Users/aky547/Desktop/SA 80 titelmeldung_SoSe2025 Burth.docx"
) )
print(else_df)