implement rate limit and retry to prevent ratelimits

This commit is contained in:
WorldTeacher
2024-10-10 14:37:54 +02:00
parent ed30d4d0cc
commit 7e2adc5416

View File

@@ -27,19 +27,25 @@ HEADERS = {
(KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36",
"Accept-Language": "en-US, en;q=0.5",
}
RATE_LIMIT = 20
RATE_PERIOD = 30
class WebRequest:
def __init__(self, appnr) -> None:
def __init__(self) -> None:
"""Request data from the web, and format it depending on the mode."""
self.apparat = appnr
if int(self.apparat) < 10:
self.apparat = f"0{self.apparat}"
self.apparat = None
self.signature = None
self.ppn = None
self.data = None
self.timeout = 5
logger.log_info("Initialized WebRequest")
def set_apparat(self, apparat):
self.apparat = apparat
if int(self.apparat) < 10:
self.apparat = f"0{self.apparat}"
return self
def get_ppn(self, signature):
self.signature = signature
@@ -49,7 +55,8 @@ class WebRequest:
signature = signature.split("/")[-1]
self.ppn = signature
return self
@sleep_and_retry
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
def search_book(self, searchterm: str):
response = requests.get(PPN_URL.format(searchterm), timeout=self.timeout)
return response.text
@@ -62,16 +69,17 @@ class WebRequest:
for link in links:
res.append(BASE + link["href"])
return res
@sleep_and_retry
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
def search(self, link: str):
response = requests.get(link, timeout=self.timeout)
return response.text
def get_data(
self
):
try:
response = requests.get(link, timeout=self.timeout)
return response.text
except requests.exceptions.RequestException as e:
logger.log_error(f"Request failed: {e}")
return None
def get_data(self):
links = self.get_book_links(self.ppn)
print(links)
for link in links:
result = self.search(link)
# in result search for class col-xs-12 rds-dl RDS_LOCATION
@@ -95,6 +103,27 @@ class WebRequest:
logger.log_error("No <pre> tag found")
return return_data
def get_data_elsa(self):
links = self.get_book_links(self.ppn)
for link in links:
result = self.search(link)
# in result search for class col-xs-12 rds-dl RDS_LOCATION
# if found, return text of href
soup = BeautifulSoup(result, "html.parser")
locations = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION")
if locations:
for location in locations:
pre_tag = soup.find_all("pre")
return_data = []
if pre_tag:
for tag in pre_tag:
data = tag.text.strip()
return_data.append(data)
return return_data
else:
logger.log_error("No <pre> tag found")
return return_data
class BibTextTransformer:
"""Transforms data from the web into a BibText format.
@@ -114,7 +143,6 @@ class BibTextTransformer:
if mode not in self.valid_modes:
logger.log_error(f"Mode {mode} not valid")
raise ValueError(f"Mode {mode} not valid")
# # print(self.field)
self.data = None
# self.bookdata = BookData(**self.data)