Refactor code structure for improved readability and maintainability
This commit is contained in:
@@ -48,7 +48,8 @@ class Catalogue:
|
||||
log.info(f"Searching for term: {searchterm}")
|
||||
|
||||
links = self.get_book_links(searchterm)
|
||||
print(links)
|
||||
# debug: links
|
||||
# print(links)
|
||||
for elink in links:
|
||||
result = self.search(elink)
|
||||
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
||||
@@ -174,7 +175,8 @@ class Catalogue:
|
||||
for link in links:
|
||||
result = self.search(link)
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
print(link)
|
||||
# debug: link
|
||||
# print(link)
|
||||
ppn = link.split("/")[-1]
|
||||
if ppn and regex.match(r"^\d{8,10}[X\d]?$", ppn):
|
||||
return ppn
|
||||
@@ -266,7 +268,8 @@ class Catalogue:
|
||||
|
||||
# Find the signature for the entry whose location mentions "Semesterapparat"
|
||||
for g in groups:
|
||||
print(g)
|
||||
# debug: group contents
|
||||
# print(g)
|
||||
loc = g.get("location", "").lower()
|
||||
if "semesterapparat" in loc:
|
||||
signature = g.get("signature")
|
||||
|
||||
@@ -8,7 +8,10 @@ from bs4 import BeautifulSoup
|
||||
from ratelimit import limits, sleep_and_retry
|
||||
|
||||
from src.core.models import BookData
|
||||
from src.shared.logging import log
|
||||
from src.shared.logging import log, get_bloat_logger, preview
|
||||
|
||||
# bloat logger for large/raw HTTP responses
|
||||
bloat = get_bloat_logger()
|
||||
from src.transformers import ARRAYData, BibTeXData, COinSData, RDSData, RISData
|
||||
from src.transformers.transformers import RDS_AVAIL_DATA, RDS_GENERIC_DATA
|
||||
|
||||
@@ -18,7 +21,6 @@ from src.transformers.transformers import RDS_AVAIL_DATA, RDS_GENERIC_DATA
|
||||
API_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{}/"
|
||||
PPN_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND"
|
||||
BASE = "https://rds.ibs-bw.de"
|
||||
#
|
||||
TITLE = "RDS_TITLE"
|
||||
SIGNATURE = "RDS_SIGNATURE"
|
||||
EDITION = "RDS_EDITION"
|
||||
@@ -55,7 +57,7 @@ class WebRequest:
|
||||
|
||||
@property
|
||||
def use_any_book(self):
|
||||
"""use any book that matches the search term"""
|
||||
"""Use any book that matches the search term"""
|
||||
self.use_any = True
|
||||
log.info("Using any book")
|
||||
return self
|
||||
@@ -68,6 +70,7 @@ class WebRequest:
|
||||
return self
|
||||
|
||||
def get_ppn(self, signature: str) -> "WebRequest":
|
||||
"""Take a book signature as input and set the PPN."""
|
||||
self.signature = signature
|
||||
if "+" in signature:
|
||||
signature = signature.replace("+", "%2B")
|
||||
@@ -109,7 +112,7 @@ class WebRequest:
|
||||
|
||||
def get_data(self) -> Optional[list[str]]:
|
||||
links = self.get_book_links(self.ppn)
|
||||
log.debug(f"Links: {links}")
|
||||
bloat.debug("Links (preview): {}", preview(links, 500))
|
||||
return_data: list[str] = []
|
||||
for link in links:
|
||||
result: str = self.search(link) # type:ignore
|
||||
@@ -128,40 +131,36 @@ class WebRequest:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
else:
|
||||
log.error("No <pre> tag found")
|
||||
log.error("No <pre> tag found")
|
||||
return return_data
|
||||
item_location = location.find(
|
||||
"div",
|
||||
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
|
||||
).text.strip()
|
||||
log.debug(f"Item location: {item_location}")
|
||||
if self.use_any:
|
||||
pre_tag = soup.find_all("pre")
|
||||
if pre_tag:
|
||||
for tag in pre_tag:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
else:
|
||||
item_location = location.find(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
).text.strip()
|
||||
log.debug(f"Item location: {item_location}")
|
||||
if self.use_any:
|
||||
pre_tag = soup.find_all("pre")
|
||||
if pre_tag:
|
||||
for tag in pre_tag:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
else:
|
||||
log.error("No <pre> tag found")
|
||||
raise ValueError("No <pre> tag found")
|
||||
elif f"Semesterapparat-{self.apparat}" in item_location:
|
||||
pre_tag = soup.find_all("pre")
|
||||
return_data = []
|
||||
if pre_tag:
|
||||
for tag in pre_tag:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
else:
|
||||
log.error("No <pre> tag found")
|
||||
return return_data
|
||||
else:
|
||||
log.error(
|
||||
f"Signature {self.signature} not found in {item_location}"
|
||||
)
|
||||
# return_data = []
|
||||
log.error("No <pre> tag found")
|
||||
raise ValueError("No <pre> tag found")
|
||||
if f"Semesterapparat-{self.apparat}" in item_location:
|
||||
pre_tag = soup.find_all("pre")
|
||||
return_data = []
|
||||
if pre_tag:
|
||||
for tag in pre_tag:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
log.error("No <pre> tag found")
|
||||
return return_data
|
||||
log.error(
|
||||
f"Signature {self.signature} not found in {item_location}",
|
||||
)
|
||||
# return_data = []
|
||||
|
||||
return return_data
|
||||
|
||||
@@ -182,9 +181,8 @@ class WebRequest:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
else:
|
||||
log.error("No <pre> tag found")
|
||||
return return_data
|
||||
log.error("No <pre> tag found")
|
||||
return return_data
|
||||
|
||||
|
||||
class BibTextTransformer:
|
||||
@@ -213,7 +211,7 @@ class BibTextTransformer:
|
||||
# self.bookdata = BookData(**self.data)
|
||||
|
||||
def use_signature(self, signature: str) -> "BibTextTransformer":
|
||||
"""use the exact signature to search for the book"""
|
||||
"""Use the exact signature to search for the book"""
|
||||
self.signature = signature
|
||||
return self
|
||||
|
||||
@@ -251,7 +249,8 @@ class BibTextTransformer:
|
||||
return self
|
||||
|
||||
def return_data(
|
||||
self, option: Any = None
|
||||
self,
|
||||
option: Any = None,
|
||||
) -> Union[
|
||||
Optional[BookData],
|
||||
Optional[RDS_GENERIC_DATA],
|
||||
@@ -266,6 +265,7 @@ class BibTextTransformer:
|
||||
|
||||
Returns:
|
||||
BookData: a dataclass containing data about the book
|
||||
|
||||
"""
|
||||
if self.data is None:
|
||||
return None
|
||||
@@ -311,4 +311,4 @@ if __name__ == "__main__":
|
||||
link = "CU 8500 K64"
|
||||
data = WebRequest(71).get_ppn(link).get_data()
|
||||
bib = BibTextTransformer("ARRAY").get_data().return_data()
|
||||
log.debug(bib)
|
||||
bloat.debug("Bib (preview): {}", preview(bib, 1000))
|
||||
|
||||
@@ -5,7 +5,7 @@ from pyzotero import zotero
|
||||
|
||||
from src import settings
|
||||
from src.services.webrequest import BibTextTransformer, WebRequest
|
||||
from src.shared.logging import log
|
||||
from src.shared.logging import log, get_bloat_logger, preview
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -215,7 +215,8 @@ class ZoteroController:
|
||||
def createItem(self, item) -> Optional[str]:
|
||||
resp = self.zot.create_items([item]) # type: ignore
|
||||
if "successful" in resp.keys():
|
||||
log.debug(resp)
|
||||
bloat = get_bloat_logger()
|
||||
bloat.debug("Zotero create_items response (preview): {}", preview(resp, 1000))
|
||||
return resp["successful"]["0"]["key"]
|
||||
else:
|
||||
return None
|
||||
@@ -229,7 +230,8 @@ class ZoteroController:
|
||||
break
|
||||
|
||||
def createHGSection(self, book: Book, data: dict) -> Optional[str]:
|
||||
log.debug(book)
|
||||
bloat = get_bloat_logger()
|
||||
bloat.debug("Zotero Book payload (preview): {}", preview(book.to_dict(), 1000))
|
||||
chapter = BookSection()
|
||||
chapter.assign(book)
|
||||
chapter.pages = data["pages"]
|
||||
@@ -247,7 +249,7 @@ class ZoteroController:
|
||||
]
|
||||
chapter.creators += authors
|
||||
|
||||
log.debug(chapter.to_dict())
|
||||
bloat.debug("Zotero Chapter payload (preview): {}", preview(chapter.to_dict(), 1000))
|
||||
return self.createItem(chapter.to_dict())
|
||||
pass
|
||||
|
||||
|
||||
Reference in New Issue
Block a user