more AI optimizations, reworked logger
This commit is contained in:
@@ -1,20 +1,13 @@
|
||||
import sys
|
||||
import zipfile
|
||||
from typing import Any
|
||||
|
||||
import fitz # PyMuPDF
|
||||
import loguru
|
||||
import pandas as pd
|
||||
from bs4 import BeautifulSoup
|
||||
from docx import Document
|
||||
|
||||
from src import LOG_DIR
|
||||
from src.logic.dataclass import Book, SemapDocument
|
||||
|
||||
log = loguru.logger
|
||||
log.remove()
|
||||
log.add(sys.stdout, level="INFO")
|
||||
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
|
||||
from src.shared.logging import log
|
||||
|
||||
|
||||
def word_docx_to_csv(path: str) -> list[pd.DataFrame]:
|
||||
@@ -50,7 +43,6 @@ def get_fach(path: str) -> str:
|
||||
soup = BeautifulSoup(xml_data, "xml")
|
||||
# text we need is in <w:p w14:paraId="12456A32" ... > -> w:r -> w:t
|
||||
paragraphs = soup.find_all("w:p")
|
||||
names = []
|
||||
for para in paragraphs:
|
||||
para_id = para.get("w14:paraId")
|
||||
if para_id == "12456A32":
|
||||
|
||||
Reference in New Issue
Block a user