Refactor code structure for improved readability and maintainability
This commit is contained in:
@@ -8,7 +8,10 @@ from bs4 import BeautifulSoup
|
||||
from ratelimit import limits, sleep_and_retry
|
||||
|
||||
from src.core.models import BookData
|
||||
from src.shared.logging import log
|
||||
from src.shared.logging import log, get_bloat_logger, preview
|
||||
|
||||
# bloat logger for large/raw HTTP responses
|
||||
bloat = get_bloat_logger()
|
||||
from src.transformers import ARRAYData, BibTeXData, COinSData, RDSData, RISData
|
||||
from src.transformers.transformers import RDS_AVAIL_DATA, RDS_GENERIC_DATA
|
||||
|
||||
@@ -18,7 +21,6 @@ from src.transformers.transformers import RDS_AVAIL_DATA, RDS_GENERIC_DATA
|
||||
API_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{}/"
|
||||
PPN_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND"
|
||||
BASE = "https://rds.ibs-bw.de"
|
||||
#
|
||||
TITLE = "RDS_TITLE"
|
||||
SIGNATURE = "RDS_SIGNATURE"
|
||||
EDITION = "RDS_EDITION"
|
||||
@@ -55,7 +57,7 @@ class WebRequest:
|
||||
|
||||
@property
|
||||
def use_any_book(self):
|
||||
"""use any book that matches the search term"""
|
||||
"""Use any book that matches the search term"""
|
||||
self.use_any = True
|
||||
log.info("Using any book")
|
||||
return self
|
||||
@@ -68,6 +70,7 @@ class WebRequest:
|
||||
return self
|
||||
|
||||
def get_ppn(self, signature: str) -> "WebRequest":
|
||||
"""Take a book signature as input and set the PPN."""
|
||||
self.signature = signature
|
||||
if "+" in signature:
|
||||
signature = signature.replace("+", "%2B")
|
||||
@@ -109,7 +112,7 @@ class WebRequest:
|
||||
|
||||
def get_data(self) -> Optional[list[str]]:
|
||||
links = self.get_book_links(self.ppn)
|
||||
log.debug(f"Links: {links}")
|
||||
bloat.debug("Links (preview): {}", preview(links, 500))
|
||||
return_data: list[str] = []
|
||||
for link in links:
|
||||
result: str = self.search(link) # type:ignore
|
||||
@@ -128,40 +131,36 @@ class WebRequest:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
else:
|
||||
log.error("No <pre> tag found")
|
||||
log.error("No <pre> tag found")
|
||||
return return_data
|
||||
item_location = location.find(
|
||||
"div",
|
||||
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
|
||||
).text.strip()
|
||||
log.debug(f"Item location: {item_location}")
|
||||
if self.use_any:
|
||||
pre_tag = soup.find_all("pre")
|
||||
if pre_tag:
|
||||
for tag in pre_tag:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
else:
|
||||
item_location = location.find(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
).text.strip()
|
||||
log.debug(f"Item location: {item_location}")
|
||||
if self.use_any:
|
||||
pre_tag = soup.find_all("pre")
|
||||
if pre_tag:
|
||||
for tag in pre_tag:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
else:
|
||||
log.error("No <pre> tag found")
|
||||
raise ValueError("No <pre> tag found")
|
||||
elif f"Semesterapparat-{self.apparat}" in item_location:
|
||||
pre_tag = soup.find_all("pre")
|
||||
return_data = []
|
||||
if pre_tag:
|
||||
for tag in pre_tag:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
else:
|
||||
log.error("No <pre> tag found")
|
||||
return return_data
|
||||
else:
|
||||
log.error(
|
||||
f"Signature {self.signature} not found in {item_location}"
|
||||
)
|
||||
# return_data = []
|
||||
log.error("No <pre> tag found")
|
||||
raise ValueError("No <pre> tag found")
|
||||
if f"Semesterapparat-{self.apparat}" in item_location:
|
||||
pre_tag = soup.find_all("pre")
|
||||
return_data = []
|
||||
if pre_tag:
|
||||
for tag in pre_tag:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
log.error("No <pre> tag found")
|
||||
return return_data
|
||||
log.error(
|
||||
f"Signature {self.signature} not found in {item_location}",
|
||||
)
|
||||
# return_data = []
|
||||
|
||||
return return_data
|
||||
|
||||
@@ -182,9 +181,8 @@ class WebRequest:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
else:
|
||||
log.error("No <pre> tag found")
|
||||
return return_data
|
||||
log.error("No <pre> tag found")
|
||||
return return_data
|
||||
|
||||
|
||||
class BibTextTransformer:
|
||||
@@ -213,7 +211,7 @@ class BibTextTransformer:
|
||||
# self.bookdata = BookData(**self.data)
|
||||
|
||||
def use_signature(self, signature: str) -> "BibTextTransformer":
|
||||
"""use the exact signature to search for the book"""
|
||||
"""Use the exact signature to search for the book"""
|
||||
self.signature = signature
|
||||
return self
|
||||
|
||||
@@ -251,7 +249,8 @@ class BibTextTransformer:
|
||||
return self
|
||||
|
||||
def return_data(
|
||||
self, option: Any = None
|
||||
self,
|
||||
option: Any = None,
|
||||
) -> Union[
|
||||
Optional[BookData],
|
||||
Optional[RDS_GENERIC_DATA],
|
||||
@@ -266,6 +265,7 @@ class BibTextTransformer:
|
||||
|
||||
Returns:
|
||||
BookData: a dataclass containing data about the book
|
||||
|
||||
"""
|
||||
if self.data is None:
|
||||
return None
|
||||
@@ -311,4 +311,4 @@ if __name__ == "__main__":
|
||||
link = "CU 8500 K64"
|
||||
data = WebRequest(71).get_ppn(link).get_data()
|
||||
bib = BibTextTransformer("ARRAY").get_data().return_data()
|
||||
log.debug(bib)
|
||||
bloat.debug("Bib (preview): {}", preview(bib, 1000))
|
||||
|
||||
Reference in New Issue
Block a user