add logger
This commit is contained in:
@@ -1,6 +1,22 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
from docx import Document
|
from docx import Document
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
import sys
|
||||||
|
from loguru import logger as log
|
||||||
|
|
||||||
|
logger = log
|
||||||
|
logger.remove()
|
||||||
|
logger.add("logs/wordparser.log", rotation="1 week", enqueue=True)
|
||||||
|
log.add(
|
||||||
|
f"logs/application.log",
|
||||||
|
rotation="1 day",
|
||||||
|
compression="zip",
|
||||||
|
enqueue=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# logger.add(sys.stderr, format="{time} {level} {message}", level="INFO")
|
||||||
|
logger.add(sys.stdout)
|
||||||
|
|
||||||
|
|
||||||
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
||||||
|
|
||||||
@@ -85,10 +101,6 @@ class SemapDocument:
|
|||||||
def word_docx_to_csv(path: str) -> list[pd.DataFrame]:
|
def word_docx_to_csv(path: str) -> list[pd.DataFrame]:
|
||||||
doc = Document(path)
|
doc = Document(path)
|
||||||
tables = doc.tables
|
tables = doc.tables
|
||||||
print("Tables: ", len(tables))
|
|
||||||
|
|
||||||
# print content of all tables
|
|
||||||
|
|
||||||
m_data = []
|
m_data = []
|
||||||
for table in tables:
|
for table in tables:
|
||||||
data = []
|
data = []
|
||||||
@@ -205,13 +217,13 @@ def elsa_word_to_csv(path):
|
|||||||
|
|
||||||
|
|
||||||
def word_to_semap(word_path: str) -> SemapDocument:
|
def word_to_semap(word_path: str) -> SemapDocument:
|
||||||
|
logger.info("Parsing Word Document {}", word_path)
|
||||||
semap = SemapDocument()
|
semap = SemapDocument()
|
||||||
df = word_docx_to_csv(word_path)
|
df = word_docx_to_csv(word_path)
|
||||||
apparatdata = df[0]
|
apparatdata = df[0]
|
||||||
apparatdata = apparatdata.to_dict()
|
apparatdata = apparatdata.to_dict()
|
||||||
keys = list(apparatdata.keys())
|
keys = list(apparatdata.keys())
|
||||||
appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys), 2)}
|
appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys), 2)}
|
||||||
print(appdata.keys())
|
|
||||||
semap.phoneNumber = appdata["Telefon:"]
|
semap.phoneNumber = appdata["Telefon:"]
|
||||||
semap.subject = appdata["Ihr Fach:"]
|
semap.subject = appdata["Ihr Fach:"]
|
||||||
semap.mail = appdata["Mailadresse:"]
|
semap.mail = appdata["Mailadresse:"]
|
||||||
@@ -236,7 +248,7 @@ def word_to_semap(word_path: str) -> SemapDocument:
|
|||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
booklist.append(book)
|
booklist.append(book)
|
||||||
|
logger.info("Found {} books", len(booklist))
|
||||||
semap.books = booklist
|
semap.books = booklist
|
||||||
|
|
||||||
return semap
|
return semap
|
||||||
@@ -246,4 +258,3 @@ if __name__ == "__main__":
|
|||||||
else_df = word_to_semap(
|
else_df = word_to_semap(
|
||||||
"C:/Users/aky547/Desktop/SA 80 titelmeldung_SoSe2025 Burth.docx"
|
"C:/Users/aky547/Desktop/SA 80 titelmeldung_SoSe2025 Burth.docx"
|
||||||
)
|
)
|
||||||
print(else_df)
|
|
||||||
|
|||||||
Reference in New Issue
Block a user