This commit is contained in:
@@ -4,7 +4,7 @@ from bs4 import BeautifulSoup
|
||||
|
||||
# import sleep_and_retry decorator to retry requests
|
||||
from ratelimit import limits, sleep_and_retry
|
||||
|
||||
from typing import Union, Any
|
||||
from src.logic.dataclass import BookData
|
||||
|
||||
from src.transformers import ARRAYData, BibTeXData, COinSData, RDSData, RISData
|
||||
@@ -61,14 +61,14 @@ class WebRequest:
|
||||
logger.info("Using any book")
|
||||
return self
|
||||
|
||||
def set_apparat(self, apparat):
|
||||
def set_apparat(self, apparat: int):
|
||||
self.apparat = apparat
|
||||
if int(self.apparat) < 10:
|
||||
self.apparat = f"0{self.apparat}"
|
||||
logger.info(f"Set apparat to {self.apparat}")
|
||||
return self
|
||||
|
||||
def get_ppn(self, signature):
|
||||
def get_ppn(self, signature: str):
|
||||
self.signature = signature
|
||||
if "+" in signature:
|
||||
signature = signature.replace("+", "%2B")
|
||||
@@ -79,15 +79,15 @@ class WebRequest:
|
||||
|
||||
@sleep_and_retry
|
||||
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
|
||||
def search_book(self, searchterm: str):
|
||||
def search_book(self, searchterm: str) -> str:
|
||||
response = requests.get(PPN_URL.format(searchterm), timeout=self.timeout)
|
||||
return response.text
|
||||
|
||||
def get_book_links(self, searchterm: str):
|
||||
response = self.search_book(searchterm)
|
||||
def get_book_links(self, searchterm: str) -> list[str]:
|
||||
response: str = self.search_book(searchterm) # type:ignore
|
||||
soup = BeautifulSoup(response, "html.parser")
|
||||
links = soup.find_all("a", class_="title getFull")
|
||||
res = []
|
||||
res: list[str] = []
|
||||
for link in links:
|
||||
res.append(BASE + link["href"])
|
||||
return res
|
||||
@@ -102,10 +102,11 @@ class WebRequest:
|
||||
logger.error(f"Request failed: {e}")
|
||||
return None
|
||||
|
||||
def get_data(self):
|
||||
def get_data(self) -> Union[list[str], None]:
|
||||
links = self.get_book_links(self.ppn)
|
||||
logger.debug(f"Links: {links}")
|
||||
for link in links:
|
||||
result = self.search(link)
|
||||
result: str = self.search(link) # type:ignore
|
||||
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
||||
# if found, return text of href
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
@@ -117,7 +118,7 @@ class WebRequest:
|
||||
).text.strip()
|
||||
if self.use_any:
|
||||
pre_tag = soup.find_all("pre")
|
||||
return_data = []
|
||||
return_data: list[str] = []
|
||||
if pre_tag:
|
||||
for tag in pre_tag:
|
||||
data = tag.text.strip()
|
||||
@@ -126,7 +127,7 @@ class WebRequest:
|
||||
else:
|
||||
logger.error("No <pre> tag found")
|
||||
raise ValueError("No <pre> tag found")
|
||||
if f"Semesterapparat-{self.apparat}" in item_location:
|
||||
elif f"Semesterapparat-{self.apparat}" in item_location:
|
||||
pre_tag = soup.find_all("pre")
|
||||
return_data = []
|
||||
if pre_tag:
|
||||
@@ -137,6 +138,13 @@ class WebRequest:
|
||||
else:
|
||||
logger.error("No <pre> tag found")
|
||||
return return_data
|
||||
else:
|
||||
logger.error(
|
||||
f"Signature {self.signature} not found in {item_location}"
|
||||
)
|
||||
return_data = []
|
||||
|
||||
return return_data
|
||||
|
||||
def get_data_elsa(self):
|
||||
links = self.get_book_links(self.ppn)
|
||||
@@ -184,7 +192,7 @@ class BibTextTransformer:
|
||||
self.signature = signature
|
||||
return self
|
||||
|
||||
def get_data(self, data: list):
|
||||
def get_data(self, data: Union[list[str]] = None) -> "BibTextTransformer":
|
||||
RIS_IDENT = "TY -"
|
||||
ARRAY_IDENT = "[kid]"
|
||||
COinS_IDENT = "ctx_ver"
|
||||
@@ -217,7 +225,7 @@ class BibTextTransformer:
|
||||
self.data = line
|
||||
return self
|
||||
|
||||
def return_data(self, option=None) -> BookData:
|
||||
def return_data(self, option: Any = None) -> Union[BookData, None]:
|
||||
"""Return Data to caller.
|
||||
|
||||
Args:
|
||||
@@ -239,7 +247,7 @@ class BibTextTransformer:
|
||||
return RISData().transform(self.data)
|
||||
case "RDS":
|
||||
return RDSData().transform(self.data).return_data(option)
|
||||
case None:
|
||||
case _:
|
||||
return None
|
||||
|
||||
# if self.mode == "ARRAY":
|
||||
@@ -256,7 +264,7 @@ class BibTextTransformer:
|
||||
|
||||
def cover(isbn):
|
||||
test_url = f"https://www.buchhandel.de/cover/{isbn}/{isbn}-cover-m.jpg"
|
||||
# print(test_url)
|
||||
# logger.debug(test_url)
|
||||
data = requests.get(test_url, stream=True)
|
||||
return data.content
|
||||
|
||||
@@ -266,8 +274,8 @@ def get_content(soup, css_class):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# print("main")
|
||||
# logger.debug("main")
|
||||
link = "CU 8500 K64"
|
||||
data = WebRequest(71).get_ppn(link).get_data()
|
||||
bib = BibTextTransformer("ARRAY").get_data().return_data()
|
||||
print(bib)
|
||||
logger.debug(bib)
|
||||
|
||||
Reference in New Issue
Block a user