more AI optimizations, reworked logger
This commit is contained in:
@@ -1,28 +1,17 @@
|
||||
import re
|
||||
import sys
|
||||
import xml.etree.ElementTree as ET
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Dict, Iterable, List, Optional, Tuple
|
||||
from typing import Dict, Iterable, List, Optional, Tuple, Union
|
||||
|
||||
import loguru
|
||||
import requests
|
||||
from requests.adapters import HTTPAdapter
|
||||
|
||||
from src import LOG_DIR
|
||||
# centralized logging used via src.shared.logging
|
||||
from src.logic.dataclass import BookData
|
||||
from src.shared.logging import log
|
||||
|
||||
log = loguru.logger
|
||||
log.remove()
|
||||
log.add(sys.stdout, level="INFO")
|
||||
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
|
||||
|
||||
log.add(
|
||||
f"{LOG_DIR}/{datetime.now().strftime('%Y-%m-%d')}.log",
|
||||
rotation="1 day",
|
||||
retention="1 month",
|
||||
)
|
||||
log # ensure imported logger is referenced
|
||||
|
||||
|
||||
# -----------------------
|
||||
@@ -186,7 +175,9 @@ def parse_echoed_request(root: ET.Element) -> Optional[EchoedSearchRequest]:
|
||||
)
|
||||
|
||||
|
||||
def parse_search_retrieve_response(xml_str: str) -> SearchRetrieveResponse:
|
||||
def parse_search_retrieve_response(
|
||||
xml_str: Union[str, bytes],
|
||||
) -> SearchRetrieveResponse:
|
||||
root = ET.fromstring(xml_str)
|
||||
|
||||
# Root is zs:searchRetrieveResponse
|
||||
@@ -598,12 +589,12 @@ class Api:
|
||||
"Accept-Charset": "latin1,utf-8;q=0.7,*;q=0.3",
|
||||
}
|
||||
# Use persistent session and set timeouts to avoid hanging
|
||||
response = self._session.get(url, headers=headers, timeout=(3.05, 20))
|
||||
if response.status_code != 200:
|
||||
raise Exception(f"Error fetching data from SWB: {response.status_code}")
|
||||
# extract top-level response (decode to text for the XML parser)
|
||||
response = parse_search_retrieve_response(response.text)
|
||||
return response.records
|
||||
resp = self._session.get(url, headers=headers, timeout=(3.05, 60))
|
||||
if resp.status_code != 200:
|
||||
raise Exception(f"Error fetching data from SWB: {resp.status_code}")
|
||||
# Parse using raw bytes (original behavior) to preserve encoding edge cases
|
||||
sr = parse_search_retrieve_response(resp.content)
|
||||
return sr.records
|
||||
|
||||
def getBooks(self, query_args: Iterable[str]) -> List[BookData]:
|
||||
records: List[Record] = self.get(query_args)
|
||||
|
||||
@@ -18,16 +18,8 @@ from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import re
|
||||
import sys
|
||||
|
||||
import loguru
|
||||
|
||||
from src import LOG_DIR
|
||||
|
||||
log = loguru.logger
|
||||
log.remove()
|
||||
log.add(sys.stdout, level="INFO")
|
||||
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
|
||||
from src.shared.logging import log
|
||||
|
||||
|
||||
class Semester:
|
||||
@@ -124,21 +116,22 @@ class Semester:
|
||||
# ------------------------------------------------------------------
|
||||
# Comparison helpers
|
||||
# ------------------------------------------------------------------
|
||||
def isPastSemester(self, other: "Semester") -> bool:
|
||||
if self.year < other.year:
|
||||
def isPastSemester(self, current: "Semester") -> bool:
|
||||
log.debug(f"Comparing {self} < {current}")
|
||||
if self.year < current.year:
|
||||
return True
|
||||
if self.year == other.year:
|
||||
if self.year == current.year:
|
||||
return (
|
||||
self.semester == "WiSe" and other.semester == "SoSe"
|
||||
self.semester == "WiSe" and current.semester == "SoSe"
|
||||
) # WiSe before next SoSe
|
||||
return False
|
||||
|
||||
def isFutureSemester(self, other: "Semester") -> bool:
|
||||
if self.year > other.year:
|
||||
def isFutureSemester(self, current: "Semester") -> bool:
|
||||
if self.year > current.year:
|
||||
return True
|
||||
if self.year == other.year:
|
||||
if self.year == current.year:
|
||||
return (
|
||||
self.semester == "SoSe" and other.semester == "WiSe"
|
||||
self.semester == "SoSe" and current.semester == "WiSe"
|
||||
) # SoSe after WiSe of same year
|
||||
return False
|
||||
|
||||
|
||||
@@ -1,23 +1,16 @@
|
||||
import sys
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import loguru
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# import sleep_and_retry decorator to retry requests
|
||||
from ratelimit import limits, sleep_and_retry
|
||||
|
||||
from src import LOG_DIR
|
||||
from src.logic.dataclass import BookData
|
||||
from src.shared.logging import log
|
||||
from src.transformers import ARRAYData, BibTeXData, COinSData, RDSData, RISData
|
||||
from src.transformers.transformers import RDS_AVAIL_DATA, RDS_GENERIC_DATA
|
||||
|
||||
log = loguru.logger
|
||||
log.remove()
|
||||
log.add(sys.stdout, level="INFO")
|
||||
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
|
||||
|
||||
# logger.add(sys.stderr, format="{time} {level} {message}", level="INFO")
|
||||
|
||||
|
||||
|
||||
@@ -1,20 +1,13 @@
|
||||
import sys
|
||||
import zipfile
|
||||
from typing import Any
|
||||
|
||||
import fitz # PyMuPDF
|
||||
import loguru
|
||||
import pandas as pd
|
||||
from bs4 import BeautifulSoup
|
||||
from docx import Document
|
||||
|
||||
from src import LOG_DIR
|
||||
from src.logic.dataclass import Book, SemapDocument
|
||||
|
||||
log = loguru.logger
|
||||
log.remove()
|
||||
log.add(sys.stdout, level="INFO")
|
||||
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
|
||||
from src.shared.logging import log
|
||||
|
||||
|
||||
def word_docx_to_csv(path: str) -> list[pd.DataFrame]:
|
||||
@@ -50,7 +43,6 @@ def get_fach(path: str) -> str:
|
||||
soup = BeautifulSoup(xml_data, "xml")
|
||||
# text we need is in <w:p w14:paraId="12456A32" ... > -> w:r -> w:t
|
||||
paragraphs = soup.find_all("w:p")
|
||||
names = []
|
||||
for para in paragraphs:
|
||||
para_id = para.get("w14:paraId")
|
||||
if para_id == "12456A32":
|
||||
|
||||
Reference in New Issue
Block a user