add logger

This commit is contained in:
2025-04-28 10:18:07 +02:00
parent b4c6169649
commit 7abe3d8cc0

View File

@@ -1,6 +1,22 @@
import pandas as pd
from docx import Document
from dataclasses import dataclass
import sys
from loguru import logger as log
logger = log
logger.remove()
logger.add("logs/wordparser.log", rotation="1 week", enqueue=True)
log.add(
f"logs/application.log",
rotation="1 day",
compression="zip",
enqueue=True,
)
# logger.add(sys.stderr, format="{time} {level} {message}", level="INFO")
logger.add(sys.stdout)
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
@@ -85,10 +101,6 @@ class SemapDocument:
def word_docx_to_csv(path: str) -> list[pd.DataFrame]:
doc = Document(path)
tables = doc.tables
print("Tables: ", len(tables))
# print content of all tables
m_data = []
for table in tables:
data = []
@@ -205,13 +217,13 @@ def elsa_word_to_csv(path):
def word_to_semap(word_path: str) -> SemapDocument:
logger.info("Parsing Word Document {}", word_path)
semap = SemapDocument()
df = word_docx_to_csv(word_path)
apparatdata = df[0]
apparatdata = apparatdata.to_dict()
keys = list(apparatdata.keys())
appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys), 2)}
print(appdata.keys())
semap.phoneNumber = appdata["Telefon:"]
semap.subject = appdata["Ihr Fach:"]
semap.mail = appdata["Mailadresse:"]
@@ -236,7 +248,7 @@ def word_to_semap(word_path: str) -> SemapDocument:
continue
else:
booklist.append(book)
logger.info("Found {} books", len(booklist))
semap.books = booklist
return semap
@@ -246,4 +258,3 @@ if __name__ == "__main__":
else_df = word_to_semap(
"C:/Users/aky547/Desktop/SA 80 titelmeldung_SoSe2025 Burth.docx"
)
print(else_df)