Refactor WebRequest class: improve location handling and logging for Semesterapparat

This commit is contained in:
2025-09-03 10:38:59 +02:00
parent f6ab64a8ee
commit 09ec304637

View File

@@ -1,17 +1,18 @@
import sys
from typing import Any, Optional, Union
import loguru
import requests
from bs4 import BeautifulSoup
# import sleep_and_retry decorator to retry requests
from ratelimit import limits, sleep_and_retry
from typing import Union, Any, Optional
from src.logic.dataclass import BookData
from src import LOG_DIR
from src.logic.dataclass import BookData
from src.transformers import ARRAYData, BibTeXData, COinSData, RDSData, RISData
from src.transformers.transformers import RDS_AVAIL_DATA, RDS_GENERIC_DATA
import loguru
import sys
from src import LOG_DIR
log = loguru.logger
log.remove()
log.add(sys.stdout, level="INFO")
@@ -20,7 +21,6 @@ log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
# logger.add(sys.stderr, format="{time} {level} {message}", level="INFO")
API_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{}/"
PPN_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND"
BASE = "https://rds.ibs-bw.de"
@@ -111,21 +111,8 @@ class WebRequest:
locations = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION")
if locations:
for location in locations:
item_location = location.find(
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
).text.strip()
log.debug(f"Item location: {item_location}")
if self.use_any:
pre_tag = soup.find_all("pre")
if pre_tag:
for tag in pre_tag:
data = tag.text.strip()
return_data.append(data)
return return_data
else:
log.error("No <pre> tag found")
raise ValueError("No <pre> tag found")
elif f"Semesterapparat-{self.apparat}" in item_location:
if "1. OG Semesterapparat" in location.text:
log.success("Found Semesterapparat, adding entry")
pre_tag = soup.find_all("pre")
return_data = []
if pre_tag:
@@ -137,10 +124,36 @@ class WebRequest:
log.error("No <pre> tag found")
return return_data
else:
log.error(
f"Signature {self.signature} not found in {item_location}"
)
# return_data = []
item_location = location.find(
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
).text.strip()
log.debug(f"Item location: {item_location}")
if self.use_any:
pre_tag = soup.find_all("pre")
if pre_tag:
for tag in pre_tag:
data = tag.text.strip()
return_data.append(data)
return return_data
else:
log.error("No <pre> tag found")
raise ValueError("No <pre> tag found")
elif f"Semesterapparat-{self.apparat}" in item_location:
pre_tag = soup.find_all("pre")
return_data = []
if pre_tag:
for tag in pre_tag:
data = tag.text.strip()
return_data.append(data)
return return_data
else:
log.error("No <pre> tag found")
return return_data
else:
log.error(
f"Signature {self.signature} not found in {item_location}"
)
# return_data = []
return return_data