From d74b94b769897bb496b47f51c10e22a6b33c5e15 Mon Sep 17 00:00:00 2001 From: WorldTeacher Date: Wed, 19 Nov 2025 14:48:42 +0100 Subject: [PATCH] feat: get additional data from catalogue: - signature - isbn (bit broken rn) - pages (only for print books) --- src/bibapi/catalogue.py | 42 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/bibapi/catalogue.py b/src/bibapi/catalogue.py index a933a81..74aea10 100644 --- a/src/bibapi/catalogue.py +++ b/src/bibapi/catalogue.py @@ -4,6 +4,8 @@ import regex import requests from bs4 import BeautifulSoup +from .schemas.bookdata import BookData as Book + URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND" BASE = "https://rds.ibs-bw.de" @@ -156,6 +158,46 @@ class Catalogue: edition=edition, ) + def get_book_with_data(self, searchterm: str) -> Book | None: + book = self.get_book(searchterm) + if book: + # request data from book.link and parse for additional data + result = self.search(book.link) + soup = BeautifulSoup(result, "html.parser") + + # from div col-xs-12 rds-dl RDS_SIGNATURE get signature (second div in this div) + signature = None + signature_el = soup.find("div", class_="RDS_SIGNATURE") + print(signature_el) + if signature_el: + signature = signature_el.find("div", class_="rds-dl-panel").get_text( + strip=True + ) + print(signature) + book.signature = signature + # from div col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_ISBN get isbn (second div in this div) + isbn = None + isbn_el = soup.find("div", class_="RDS_ISBN") + if isbn_el: + isbn = isbn_el.find_next_sibling( + "div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel" + ).get_text(strip=True) + book.isbn = isbn + # from div col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_SCOPE get pages (second div in this div) + pages = None + pages_el = soup.find("div", class_="RDS_SCOPE") + if pages_el: + pages = pages_el.find_next_sibling( + "div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel" + ).get_text(strip=True) + # regex match to get pages by grabbing the first number in the string + match = regex.search(r"(\d+)", pages) + if match: + pages = match.group(1) + book.pages = pages + return book + return None + def get(self, ppn: str) -> Book | None: # based on PPN, get title, people, edition, year, language, pages, isbn, link = f"https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{ppn}" -- 2.49.1