Refactor code structure for improved readability and maintainability

This commit is contained in:
2025-12-10 13:47:34 +01:00
parent 67f967aa09
commit bcb96213ee
73 changed files with 4307 additions and 1315 deletions

View File

@@ -48,7 +48,8 @@ class Catalogue:
log.info(f"Searching for term: {searchterm}")
links = self.get_book_links(searchterm)
print(links)
# debug: links
# print(links)
for elink in links:
result = self.search(elink)
# in result search for class col-xs-12 rds-dl RDS_LOCATION
@@ -174,7 +175,8 @@ class Catalogue:
for link in links:
result = self.search(link)
soup = BeautifulSoup(result, "html.parser")
print(link)
# debug: link
# print(link)
ppn = link.split("/")[-1]
if ppn and regex.match(r"^\d{8,10}[X\d]?$", ppn):
return ppn
@@ -266,7 +268,8 @@ class Catalogue:
# Find the signature for the entry whose location mentions "Semesterapparat"
for g in groups:
print(g)
# debug: group contents
# print(g)
loc = g.get("location", "").lower()
if "semesterapparat" in loc:
signature = g.get("signature")

View File

@@ -8,7 +8,10 @@ from bs4 import BeautifulSoup
from ratelimit import limits, sleep_and_retry
from src.core.models import BookData
from src.shared.logging import log
from src.shared.logging import log, get_bloat_logger, preview
# bloat logger for large/raw HTTP responses
bloat = get_bloat_logger()
from src.transformers import ARRAYData, BibTeXData, COinSData, RDSData, RISData
from src.transformers.transformers import RDS_AVAIL_DATA, RDS_GENERIC_DATA
@@ -18,7 +21,6 @@ from src.transformers.transformers import RDS_AVAIL_DATA, RDS_GENERIC_DATA
API_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{}/"
PPN_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND"
BASE = "https://rds.ibs-bw.de"
#
TITLE = "RDS_TITLE"
SIGNATURE = "RDS_SIGNATURE"
EDITION = "RDS_EDITION"
@@ -55,7 +57,7 @@ class WebRequest:
@property
def use_any_book(self):
"""use any book that matches the search term"""
"""Use any book that matches the search term"""
self.use_any = True
log.info("Using any book")
return self
@@ -68,6 +70,7 @@ class WebRequest:
return self
def get_ppn(self, signature: str) -> "WebRequest":
"""Take a book signature as input and set the PPN."""
self.signature = signature
if "+" in signature:
signature = signature.replace("+", "%2B")
@@ -109,7 +112,7 @@ class WebRequest:
def get_data(self) -> Optional[list[str]]:
links = self.get_book_links(self.ppn)
log.debug(f"Links: {links}")
bloat.debug("Links (preview): {}", preview(links, 500))
return_data: list[str] = []
for link in links:
result: str = self.search(link) # type:ignore
@@ -128,40 +131,36 @@ class WebRequest:
data = tag.text.strip()
return_data.append(data)
return return_data
else:
log.error("No <pre> tag found")
log.error("No <pre> tag found")
return return_data
item_location = location.find(
"div",
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
).text.strip()
log.debug(f"Item location: {item_location}")
if self.use_any:
pre_tag = soup.find_all("pre")
if pre_tag:
for tag in pre_tag:
data = tag.text.strip()
return_data.append(data)
return return_data
else:
item_location = location.find(
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
).text.strip()
log.debug(f"Item location: {item_location}")
if self.use_any:
pre_tag = soup.find_all("pre")
if pre_tag:
for tag in pre_tag:
data = tag.text.strip()
return_data.append(data)
return return_data
else:
log.error("No <pre> tag found")
raise ValueError("No <pre> tag found")
elif f"Semesterapparat-{self.apparat}" in item_location:
pre_tag = soup.find_all("pre")
return_data = []
if pre_tag:
for tag in pre_tag:
data = tag.text.strip()
return_data.append(data)
return return_data
else:
log.error("No <pre> tag found")
return return_data
else:
log.error(
f"Signature {self.signature} not found in {item_location}"
)
# return_data = []
log.error("No <pre> tag found")
raise ValueError("No <pre> tag found")
if f"Semesterapparat-{self.apparat}" in item_location:
pre_tag = soup.find_all("pre")
return_data = []
if pre_tag:
for tag in pre_tag:
data = tag.text.strip()
return_data.append(data)
return return_data
log.error("No <pre> tag found")
return return_data
log.error(
f"Signature {self.signature} not found in {item_location}",
)
# return_data = []
return return_data
@@ -182,9 +181,8 @@ class WebRequest:
data = tag.text.strip()
return_data.append(data)
return return_data
else:
log.error("No <pre> tag found")
return return_data
log.error("No <pre> tag found")
return return_data
class BibTextTransformer:
@@ -213,7 +211,7 @@ class BibTextTransformer:
# self.bookdata = BookData(**self.data)
def use_signature(self, signature: str) -> "BibTextTransformer":
"""use the exact signature to search for the book"""
"""Use the exact signature to search for the book"""
self.signature = signature
return self
@@ -251,7 +249,8 @@ class BibTextTransformer:
return self
def return_data(
self, option: Any = None
self,
option: Any = None,
) -> Union[
Optional[BookData],
Optional[RDS_GENERIC_DATA],
@@ -266,6 +265,7 @@ class BibTextTransformer:
Returns:
BookData: a dataclass containing data about the book
"""
if self.data is None:
return None
@@ -311,4 +311,4 @@ if __name__ == "__main__":
link = "CU 8500 K64"
data = WebRequest(71).get_ppn(link).get_data()
bib = BibTextTransformer("ARRAY").get_data().return_data()
log.debug(bib)
bloat.debug("Bib (preview): {}", preview(bib, 1000))

View File

@@ -5,7 +5,7 @@ from pyzotero import zotero
from src import settings
from src.services.webrequest import BibTextTransformer, WebRequest
from src.shared.logging import log
from src.shared.logging import log, get_bloat_logger, preview
@dataclass
@@ -215,7 +215,8 @@ class ZoteroController:
def createItem(self, item) -> Optional[str]:
resp = self.zot.create_items([item]) # type: ignore
if "successful" in resp.keys():
log.debug(resp)
bloat = get_bloat_logger()
bloat.debug("Zotero create_items response (preview): {}", preview(resp, 1000))
return resp["successful"]["0"]["key"]
else:
return None
@@ -229,7 +230,8 @@ class ZoteroController:
break
def createHGSection(self, book: Book, data: dict) -> Optional[str]:
log.debug(book)
bloat = get_bloat_logger()
bloat.debug("Zotero Book payload (preview): {}", preview(book.to_dict(), 1000))
chapter = BookSection()
chapter.assign(book)
chapter.pages = data["pages"]
@@ -247,7 +249,7 @@ class ZoteroController:
]
chapter.creators += authors
log.debug(chapter.to_dict())
bloat.debug("Zotero Chapter payload (preview): {}", preview(chapter.to_dict(), 1000))
return self.createItem(chapter.to_dict())
pass