Add Catalogue class for book searching and connection handling

2025-09-03 10:41:40 +02:00
parent bf419ec3bf
commit 0ac5051aef
1 changed files with 101 additions and 0 deletions
--- a/src/backend/catalogue.py
+++ b/src/backend/catalogue.py
@@ -0,0 +1,101 @@
+import requests
+from bs4 import BeautifulSoup
+
+from src.logic import BookData as Book
+
+from datetime import datetime
+import sys
+import loguru
+from src import LOG_DIR
+URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND"
+BASE = "https://rds.ibs-bw.de"
+
+log = loguru.logger
+log.remove()
+log.add(sys.stdout, level="INFO")
+log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
+
+log.add(
+    f"{LOG_DIR}/{datetime.now().strftime('%Y-%m-%d')}.log",
+    rotation="1 day",
+    retention="1 month",
+)
+class Catalogue:
+    def __init__(self, timeout=5):
+        self.timeout = timeout
+        reachable = self.check_connection()
+        if not reachable:
+            log.error("No internet connection available.")
+            raise ConnectionError("No internet connection available.")
+
+    def check_connection(self):
+        try:
+            response = requests.get("https://www.google.com", timeout=self.timeout)
+            if response.status_code == 200:
+                return True
+        except requests.exceptions.RequestException as e:
+            log.error(f"Could not connect to google.com: {e}")
+
+    def search_book(self, searchterm: str):
+        response = requests.get(URL.format(searchterm), timeout=self.timeout)
+        return response.text
+
+    def search(self, link: str):
+        response = requests.get(link, timeout=self.timeout)
+        return response.text
+
+    def get_book_links(self, searchterm: str):
+        response = self.search_book(searchterm)
+        soup = BeautifulSoup(response, "html.parser")
+        links = soup.find_all("a", class_="title getFull")
+        res = []
+        for link in links:
+            res.append(BASE + link["href"])
+        return res
+
+    def get_book(self, searchterm: str):
+        log.info(f"Searching for term: {searchterm}")
+
+        links = self.get_book_links(searchterm)
+        for link in links:
+            result = self.search(link)
+            # in result search for class col-xs-12 rds-dl RDS_LOCATION
+            # if found, return text of href
+            soup = BeautifulSoup(result, "html.parser")
+            location = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION")
+            for loc in location:
+                if f"1. OG Semesterapparat" in loc.text:
+                    title = (
+                        soup.find("div", class_="headline text")
+                        .text.replace("\n", "")
+                        .strip()
+                    )
+                    ppn = soup.find(
+                        "div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PPN"
+                    )
+                    signature = soup.find(
+                        "div", class_="col-xs-12 rds-dl RDS_SIGNATURE"
+                    )
+                    if signature:
+                        signature = (
+                            signature.find_next("div")
+                            .find_next("div")
+                            .text.replace("\n", "")
+                            .strip()
+                        )
+                    # use ppn to find the next div and extract the text
+                    if ppn:
+                        ppn = ppn.find_next("div").text.replace("\n", "").strip()
+                    else:
+                        ppn = None
+                    isbn = soup.find(
+                        "div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_ISBN"
+                    )
+                    if isbn:
+                        isbn = isbn.find_next("div").find_next("div").text
+                    else:
+                        isbn = None
+                    return Book(
+                        title=title, ppn=ppn, signature=signature, isbn=isbn, link=link
+                    )
+        return False