implement rate limit and retry to prevent ratelimits
This commit is contained in:
@@ -27,19 +27,25 @@ HEADERS = {
|
||||
(KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36",
|
||||
"Accept-Language": "en-US, en;q=0.5",
|
||||
}
|
||||
|
||||
RATE_LIMIT = 20
|
||||
RATE_PERIOD = 30
|
||||
|
||||
class WebRequest:
|
||||
def __init__(self, appnr) -> None:
|
||||
def __init__(self) -> None:
|
||||
"""Request data from the web, and format it depending on the mode."""
|
||||
self.apparat = appnr
|
||||
if int(self.apparat) < 10:
|
||||
self.apparat = f"0{self.apparat}"
|
||||
self.apparat = None
|
||||
|
||||
self.signature = None
|
||||
self.ppn = None
|
||||
self.data = None
|
||||
self.timeout = 5
|
||||
logger.log_info("Initialized WebRequest")
|
||||
|
||||
def set_apparat(self, apparat):
|
||||
self.apparat = apparat
|
||||
if int(self.apparat) < 10:
|
||||
self.apparat = f"0{self.apparat}"
|
||||
return self
|
||||
|
||||
def get_ppn(self, signature):
|
||||
self.signature = signature
|
||||
@@ -49,7 +55,8 @@ class WebRequest:
|
||||
signature = signature.split("/")[-1]
|
||||
self.ppn = signature
|
||||
return self
|
||||
|
||||
@sleep_and_retry
|
||||
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
|
||||
def search_book(self, searchterm: str):
|
||||
response = requests.get(PPN_URL.format(searchterm), timeout=self.timeout)
|
||||
return response.text
|
||||
@@ -62,16 +69,17 @@ class WebRequest:
|
||||
for link in links:
|
||||
res.append(BASE + link["href"])
|
||||
return res
|
||||
|
||||
@sleep_and_retry
|
||||
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
|
||||
def search(self, link: str):
|
||||
response = requests.get(link, timeout=self.timeout)
|
||||
return response.text
|
||||
|
||||
def get_data(
|
||||
self
|
||||
):
|
||||
try:
|
||||
response = requests.get(link, timeout=self.timeout)
|
||||
return response.text
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.log_error(f"Request failed: {e}")
|
||||
return None
|
||||
def get_data(self):
|
||||
links = self.get_book_links(self.ppn)
|
||||
print(links)
|
||||
for link in links:
|
||||
result = self.search(link)
|
||||
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
||||
@@ -95,6 +103,27 @@ class WebRequest:
|
||||
logger.log_error("No <pre> tag found")
|
||||
return return_data
|
||||
|
||||
def get_data_elsa(self):
|
||||
links = self.get_book_links(self.ppn)
|
||||
for link in links:
|
||||
result = self.search(link)
|
||||
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
||||
# if found, return text of href
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
locations = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION")
|
||||
if locations:
|
||||
for location in locations:
|
||||
pre_tag = soup.find_all("pre")
|
||||
return_data = []
|
||||
if pre_tag:
|
||||
for tag in pre_tag:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
else:
|
||||
logger.log_error("No <pre> tag found")
|
||||
return return_data
|
||||
|
||||
|
||||
class BibTextTransformer:
|
||||
"""Transforms data from the web into a BibText format.
|
||||
@@ -114,7 +143,6 @@ class BibTextTransformer:
|
||||
if mode not in self.valid_modes:
|
||||
logger.log_error(f"Mode {mode} not valid")
|
||||
raise ValueError(f"Mode {mode} not valid")
|
||||
# # print(self.field)
|
||||
self.data = None
|
||||
# self.bookdata = BookData(**self.data)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user