Add Catalogue class for book searching and connection handling
This commit is contained in:
101
src/backend/catalogue.py
Normal file
101
src/backend/catalogue.py
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from src.logic import BookData as Book
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
import sys
|
||||||
|
import loguru
|
||||||
|
from src import LOG_DIR
|
||||||
|
URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND"
|
||||||
|
BASE = "https://rds.ibs-bw.de"
|
||||||
|
|
||||||
|
log = loguru.logger
|
||||||
|
log.remove()
|
||||||
|
log.add(sys.stdout, level="INFO")
|
||||||
|
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
|
||||||
|
|
||||||
|
log.add(
|
||||||
|
f"{LOG_DIR}/{datetime.now().strftime('%Y-%m-%d')}.log",
|
||||||
|
rotation="1 day",
|
||||||
|
retention="1 month",
|
||||||
|
)
|
||||||
|
class Catalogue:
|
||||||
|
def __init__(self, timeout=5):
|
||||||
|
self.timeout = timeout
|
||||||
|
reachable = self.check_connection()
|
||||||
|
if not reachable:
|
||||||
|
log.error("No internet connection available.")
|
||||||
|
raise ConnectionError("No internet connection available.")
|
||||||
|
|
||||||
|
def check_connection(self):
|
||||||
|
try:
|
||||||
|
response = requests.get("https://www.google.com", timeout=self.timeout)
|
||||||
|
if response.status_code == 200:
|
||||||
|
return True
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
log.error(f"Could not connect to google.com: {e}")
|
||||||
|
|
||||||
|
def search_book(self, searchterm: str):
|
||||||
|
response = requests.get(URL.format(searchterm), timeout=self.timeout)
|
||||||
|
return response.text
|
||||||
|
|
||||||
|
def search(self, link: str):
|
||||||
|
response = requests.get(link, timeout=self.timeout)
|
||||||
|
return response.text
|
||||||
|
|
||||||
|
def get_book_links(self, searchterm: str):
|
||||||
|
response = self.search_book(searchterm)
|
||||||
|
soup = BeautifulSoup(response, "html.parser")
|
||||||
|
links = soup.find_all("a", class_="title getFull")
|
||||||
|
res = []
|
||||||
|
for link in links:
|
||||||
|
res.append(BASE + link["href"])
|
||||||
|
return res
|
||||||
|
|
||||||
|
def get_book(self, searchterm: str):
|
||||||
|
log.info(f"Searching for term: {searchterm}")
|
||||||
|
|
||||||
|
links = self.get_book_links(searchterm)
|
||||||
|
for link in links:
|
||||||
|
result = self.search(link)
|
||||||
|
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
||||||
|
# if found, return text of href
|
||||||
|
soup = BeautifulSoup(result, "html.parser")
|
||||||
|
location = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION")
|
||||||
|
for loc in location:
|
||||||
|
if f"1. OG Semesterapparat" in loc.text:
|
||||||
|
title = (
|
||||||
|
soup.find("div", class_="headline text")
|
||||||
|
.text.replace("\n", "")
|
||||||
|
.strip()
|
||||||
|
)
|
||||||
|
ppn = soup.find(
|
||||||
|
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PPN"
|
||||||
|
)
|
||||||
|
signature = soup.find(
|
||||||
|
"div", class_="col-xs-12 rds-dl RDS_SIGNATURE"
|
||||||
|
)
|
||||||
|
if signature:
|
||||||
|
signature = (
|
||||||
|
signature.find_next("div")
|
||||||
|
.find_next("div")
|
||||||
|
.text.replace("\n", "")
|
||||||
|
.strip()
|
||||||
|
)
|
||||||
|
# use ppn to find the next div and extract the text
|
||||||
|
if ppn:
|
||||||
|
ppn = ppn.find_next("div").text.replace("\n", "").strip()
|
||||||
|
else:
|
||||||
|
ppn = None
|
||||||
|
isbn = soup.find(
|
||||||
|
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_ISBN"
|
||||||
|
)
|
||||||
|
if isbn:
|
||||||
|
isbn = isbn.find_next("div").find_next("div").text
|
||||||
|
else:
|
||||||
|
isbn = None
|
||||||
|
return Book(
|
||||||
|
title=title, ppn=ppn, signature=signature, isbn=isbn, link=link
|
||||||
|
)
|
||||||
|
return False
|
||||||
Reference in New Issue
Block a user