Refactor WebRequest class: improve location handling and logging for Semesterapparat
This commit is contained in:
@@ -1,17 +1,18 @@
|
|||||||
|
import sys
|
||||||
|
from typing import Any, Optional, Union
|
||||||
|
|
||||||
|
import loguru
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
# import sleep_and_retry decorator to retry requests
|
# import sleep_and_retry decorator to retry requests
|
||||||
from ratelimit import limits, sleep_and_retry
|
from ratelimit import limits, sleep_and_retry
|
||||||
from typing import Union, Any, Optional
|
|
||||||
from src.logic.dataclass import BookData
|
|
||||||
|
|
||||||
|
from src import LOG_DIR
|
||||||
|
from src.logic.dataclass import BookData
|
||||||
from src.transformers import ARRAYData, BibTeXData, COinSData, RDSData, RISData
|
from src.transformers import ARRAYData, BibTeXData, COinSData, RDSData, RISData
|
||||||
from src.transformers.transformers import RDS_AVAIL_DATA, RDS_GENERIC_DATA
|
from src.transformers.transformers import RDS_AVAIL_DATA, RDS_GENERIC_DATA
|
||||||
import loguru
|
|
||||||
import sys
|
|
||||||
from src import LOG_DIR
|
|
||||||
log = loguru.logger
|
log = loguru.logger
|
||||||
log.remove()
|
log.remove()
|
||||||
log.add(sys.stdout, level="INFO")
|
log.add(sys.stdout, level="INFO")
|
||||||
@@ -20,7 +21,6 @@ log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
|
|||||||
# logger.add(sys.stderr, format="{time} {level} {message}", level="INFO")
|
# logger.add(sys.stderr, format="{time} {level} {message}", level="INFO")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
API_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{}/"
|
API_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{}/"
|
||||||
PPN_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND"
|
PPN_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND"
|
||||||
BASE = "https://rds.ibs-bw.de"
|
BASE = "https://rds.ibs-bw.de"
|
||||||
@@ -111,21 +111,8 @@ class WebRequest:
|
|||||||
locations = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION")
|
locations = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION")
|
||||||
if locations:
|
if locations:
|
||||||
for location in locations:
|
for location in locations:
|
||||||
item_location = location.find(
|
if "1. OG Semesterapparat" in location.text:
|
||||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
log.success("Found Semesterapparat, adding entry")
|
||||||
).text.strip()
|
|
||||||
log.debug(f"Item location: {item_location}")
|
|
||||||
if self.use_any:
|
|
||||||
pre_tag = soup.find_all("pre")
|
|
||||||
if pre_tag:
|
|
||||||
for tag in pre_tag:
|
|
||||||
data = tag.text.strip()
|
|
||||||
return_data.append(data)
|
|
||||||
return return_data
|
|
||||||
else:
|
|
||||||
log.error("No <pre> tag found")
|
|
||||||
raise ValueError("No <pre> tag found")
|
|
||||||
elif f"Semesterapparat-{self.apparat}" in item_location:
|
|
||||||
pre_tag = soup.find_all("pre")
|
pre_tag = soup.find_all("pre")
|
||||||
return_data = []
|
return_data = []
|
||||||
if pre_tag:
|
if pre_tag:
|
||||||
@@ -137,10 +124,36 @@ class WebRequest:
|
|||||||
log.error("No <pre> tag found")
|
log.error("No <pre> tag found")
|
||||||
return return_data
|
return return_data
|
||||||
else:
|
else:
|
||||||
log.error(
|
item_location = location.find(
|
||||||
f"Signature {self.signature} not found in {item_location}"
|
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||||
)
|
).text.strip()
|
||||||
# return_data = []
|
log.debug(f"Item location: {item_location}")
|
||||||
|
if self.use_any:
|
||||||
|
pre_tag = soup.find_all("pre")
|
||||||
|
if pre_tag:
|
||||||
|
for tag in pre_tag:
|
||||||
|
data = tag.text.strip()
|
||||||
|
return_data.append(data)
|
||||||
|
return return_data
|
||||||
|
else:
|
||||||
|
log.error("No <pre> tag found")
|
||||||
|
raise ValueError("No <pre> tag found")
|
||||||
|
elif f"Semesterapparat-{self.apparat}" in item_location:
|
||||||
|
pre_tag = soup.find_all("pre")
|
||||||
|
return_data = []
|
||||||
|
if pre_tag:
|
||||||
|
for tag in pre_tag:
|
||||||
|
data = tag.text.strip()
|
||||||
|
return_data.append(data)
|
||||||
|
return return_data
|
||||||
|
else:
|
||||||
|
log.error("No <pre> tag found")
|
||||||
|
return return_data
|
||||||
|
else:
|
||||||
|
log.error(
|
||||||
|
f"Signature {self.signature} not found in {item_location}"
|
||||||
|
)
|
||||||
|
# return_data = []
|
||||||
|
|
||||||
return return_data
|
return return_data
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user