custom class to request data from our webcatalogue and return dataclass books
This commit is contained in:
75
src/logic/catalogue.py
Normal file
75
src/logic/catalogue.py
Normal file
@@ -0,0 +1,75 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from src import config
|
||||
from src.schemas import Book
|
||||
|
||||
URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?lookfor={}+&type=AllFields&limit=10&sort=py+desc%2C+title"
|
||||
BASE = "https://rds.ibs-bw.de"
|
||||
|
||||
|
||||
class Catalogue:
|
||||
def __init__(self, timeout=5):
|
||||
self.timeout = timeout
|
||||
|
||||
def search_book(self, searchterm: str):
|
||||
response = requests.get(URL.format(searchterm), timeout=self.timeout)
|
||||
return response.text
|
||||
|
||||
def search(self, link: str):
|
||||
response = requests.get(link, timeout=self.timeout)
|
||||
return response.text
|
||||
|
||||
def get_book_links(self, searchterm: str):
|
||||
response = self.search_book(searchterm)
|
||||
soup = BeautifulSoup(response, "html.parser")
|
||||
links = soup.find_all("a", class_="title getFull")
|
||||
res = []
|
||||
for link in links:
|
||||
res.append(BASE + link["href"])
|
||||
return res
|
||||
|
||||
def get_book(self, searchterm: str):
|
||||
links = self.get_book_links(searchterm)
|
||||
for link in links:
|
||||
result = self.search(link)
|
||||
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
||||
# if found, return text of href
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
location = soup.find("div", class_="col-xs-12 rds-dl RDS_LOCATION")
|
||||
if location:
|
||||
if config.institution_name in location.text:
|
||||
location = config.institution_name
|
||||
title = (
|
||||
soup.find("div", class_="headline text")
|
||||
.text.replace("\n", "")
|
||||
.strip()
|
||||
)
|
||||
ppn = soup.find(
|
||||
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PPN"
|
||||
)
|
||||
signature = soup.find(
|
||||
"div", class_="col-xs-12 rds-dl RDS_SIGNATURE"
|
||||
)
|
||||
if signature:
|
||||
signature = (
|
||||
signature.find_next("div")
|
||||
.find_next("div")
|
||||
.text.replace("\n", "")
|
||||
.strip()
|
||||
)
|
||||
# use ppn to find the next div and extract the text
|
||||
if ppn:
|
||||
ppn = ppn.find_next("div").text.replace("\n", "").strip()
|
||||
else:
|
||||
ppn = None
|
||||
isbn = soup.find(
|
||||
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_ISBN"
|
||||
)
|
||||
if isbn:
|
||||
isbn = isbn.find_next("div").find_next("div").text
|
||||
else:
|
||||
isbn = None
|
||||
return Book(
|
||||
title=title, ppn=ppn, signature=signature, isbn=isbn, link=link
|
||||
)
|
||||
return False
|
||||
Reference in New Issue
Block a user