more AI optimizations, reworked logger

This commit is contained in:
2025-10-09 12:35:15 +02:00
parent 7e07bdea0c
commit 3cc6e793d2
22 changed files with 186 additions and 320 deletions

View File

@@ -1,20 +1,13 @@
import sys
import zipfile
from typing import Any
import fitz # PyMuPDF
import loguru
import pandas as pd
from bs4 import BeautifulSoup
from docx import Document
from src import LOG_DIR
from src.logic.dataclass import Book, SemapDocument
log = loguru.logger
log.remove()
log.add(sys.stdout, level="INFO")
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
from src.shared.logging import log
def word_docx_to_csv(path: str) -> list[pd.DataFrame]:
@@ -50,7 +43,6 @@ def get_fach(path: str) -> str:
soup = BeautifulSoup(xml_data, "xml")
# text we need is in <w:p w14:paraId="12456A32" ... > -> w:r -> w:t
paragraphs = soup.find_all("w:p")
names = []
for para in paragraphs:
para_id = para.get("w14:paraId")
if para_id == "12456A32":