implement rate limit and retry to prevent ratelimits
This commit is contained in:
@@ -27,20 +27,26 @@ HEADERS = {
|
|||||||
(KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36",
|
(KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36",
|
||||||
"Accept-Language": "en-US, en;q=0.5",
|
"Accept-Language": "en-US, en;q=0.5",
|
||||||
}
|
}
|
||||||
|
RATE_LIMIT = 20
|
||||||
|
RATE_PERIOD = 30
|
||||||
|
|
||||||
class WebRequest:
|
class WebRequest:
|
||||||
def __init__(self, appnr) -> None:
|
def __init__(self) -> None:
|
||||||
"""Request data from the web, and format it depending on the mode."""
|
"""Request data from the web, and format it depending on the mode."""
|
||||||
self.apparat = appnr
|
self.apparat = None
|
||||||
if int(self.apparat) < 10:
|
|
||||||
self.apparat = f"0{self.apparat}"
|
|
||||||
self.signature = None
|
self.signature = None
|
||||||
self.ppn = None
|
self.ppn = None
|
||||||
self.data = None
|
self.data = None
|
||||||
self.timeout = 5
|
self.timeout = 5
|
||||||
logger.log_info("Initialized WebRequest")
|
logger.log_info("Initialized WebRequest")
|
||||||
|
|
||||||
|
def set_apparat(self, apparat):
|
||||||
|
self.apparat = apparat
|
||||||
|
if int(self.apparat) < 10:
|
||||||
|
self.apparat = f"0{self.apparat}"
|
||||||
|
return self
|
||||||
|
|
||||||
def get_ppn(self, signature):
|
def get_ppn(self, signature):
|
||||||
self.signature = signature
|
self.signature = signature
|
||||||
if "+" in signature:
|
if "+" in signature:
|
||||||
@@ -49,7 +55,8 @@ class WebRequest:
|
|||||||
signature = signature.split("/")[-1]
|
signature = signature.split("/")[-1]
|
||||||
self.ppn = signature
|
self.ppn = signature
|
||||||
return self
|
return self
|
||||||
|
@sleep_and_retry
|
||||||
|
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
|
||||||
def search_book(self, searchterm: str):
|
def search_book(self, searchterm: str):
|
||||||
response = requests.get(PPN_URL.format(searchterm), timeout=self.timeout)
|
response = requests.get(PPN_URL.format(searchterm), timeout=self.timeout)
|
||||||
return response.text
|
return response.text
|
||||||
@@ -62,16 +69,17 @@ class WebRequest:
|
|||||||
for link in links:
|
for link in links:
|
||||||
res.append(BASE + link["href"])
|
res.append(BASE + link["href"])
|
||||||
return res
|
return res
|
||||||
|
@sleep_and_retry
|
||||||
|
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
|
||||||
def search(self, link: str):
|
def search(self, link: str):
|
||||||
response = requests.get(link, timeout=self.timeout)
|
try:
|
||||||
return response.text
|
response = requests.get(link, timeout=self.timeout)
|
||||||
|
return response.text
|
||||||
def get_data(
|
except requests.exceptions.RequestException as e:
|
||||||
self
|
logger.log_error(f"Request failed: {e}")
|
||||||
):
|
return None
|
||||||
|
def get_data(self):
|
||||||
links = self.get_book_links(self.ppn)
|
links = self.get_book_links(self.ppn)
|
||||||
print(links)
|
|
||||||
for link in links:
|
for link in links:
|
||||||
result = self.search(link)
|
result = self.search(link)
|
||||||
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
||||||
@@ -95,6 +103,27 @@ class WebRequest:
|
|||||||
logger.log_error("No <pre> tag found")
|
logger.log_error("No <pre> tag found")
|
||||||
return return_data
|
return return_data
|
||||||
|
|
||||||
|
def get_data_elsa(self):
|
||||||
|
links = self.get_book_links(self.ppn)
|
||||||
|
for link in links:
|
||||||
|
result = self.search(link)
|
||||||
|
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
||||||
|
# if found, return text of href
|
||||||
|
soup = BeautifulSoup(result, "html.parser")
|
||||||
|
locations = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION")
|
||||||
|
if locations:
|
||||||
|
for location in locations:
|
||||||
|
pre_tag = soup.find_all("pre")
|
||||||
|
return_data = []
|
||||||
|
if pre_tag:
|
||||||
|
for tag in pre_tag:
|
||||||
|
data = tag.text.strip()
|
||||||
|
return_data.append(data)
|
||||||
|
return return_data
|
||||||
|
else:
|
||||||
|
logger.log_error("No <pre> tag found")
|
||||||
|
return return_data
|
||||||
|
|
||||||
|
|
||||||
class BibTextTransformer:
|
class BibTextTransformer:
|
||||||
"""Transforms data from the web into a BibText format.
|
"""Transforms data from the web into a BibText format.
|
||||||
@@ -114,7 +143,6 @@ class BibTextTransformer:
|
|||||||
if mode not in self.valid_modes:
|
if mode not in self.valid_modes:
|
||||||
logger.log_error(f"Mode {mode} not valid")
|
logger.log_error(f"Mode {mode} not valid")
|
||||||
raise ValueError(f"Mode {mode} not valid")
|
raise ValueError(f"Mode {mode} not valid")
|
||||||
# # print(self.field)
|
|
||||||
self.data = None
|
self.data = None
|
||||||
# self.bookdata = BookData(**self.data)
|
# self.bookdata = BookData(**self.data)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user