Merge pull request 'feat: get additional data from catalogue:' (#9) from dev into main
Reviewed-on: #9
This commit was merged in pull request #9.
This commit is contained in:
@@ -4,6 +4,8 @@ import regex
|
|||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from .schemas.bookdata import BookData as Book
|
||||||
|
|
||||||
URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND"
|
URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND"
|
||||||
BASE = "https://rds.ibs-bw.de"
|
BASE = "https://rds.ibs-bw.de"
|
||||||
|
|
||||||
@@ -156,6 +158,46 @@ class Catalogue:
|
|||||||
edition=edition,
|
edition=edition,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def get_book_with_data(self, searchterm: str) -> Book | None:
|
||||||
|
book = self.get_book(searchterm)
|
||||||
|
if book:
|
||||||
|
# request data from book.link and parse for additional data
|
||||||
|
result = self.search(book.link)
|
||||||
|
soup = BeautifulSoup(result, "html.parser")
|
||||||
|
|
||||||
|
# from div col-xs-12 rds-dl RDS_SIGNATURE get signature (second div in this div)
|
||||||
|
signature = None
|
||||||
|
signature_el = soup.find("div", class_="RDS_SIGNATURE")
|
||||||
|
print(signature_el)
|
||||||
|
if signature_el:
|
||||||
|
signature = signature_el.find("div", class_="rds-dl-panel").get_text(
|
||||||
|
strip=True
|
||||||
|
)
|
||||||
|
print(signature)
|
||||||
|
book.signature = signature
|
||||||
|
# from div col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_ISBN get isbn (second div in this div)
|
||||||
|
isbn = None
|
||||||
|
isbn_el = soup.find("div", class_="RDS_ISBN")
|
||||||
|
if isbn_el:
|
||||||
|
isbn = isbn_el.find_next_sibling(
|
||||||
|
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||||
|
).get_text(strip=True)
|
||||||
|
book.isbn = isbn
|
||||||
|
# from div col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_SCOPE get pages (second div in this div)
|
||||||
|
pages = None
|
||||||
|
pages_el = soup.find("div", class_="RDS_SCOPE")
|
||||||
|
if pages_el:
|
||||||
|
pages = pages_el.find_next_sibling(
|
||||||
|
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||||
|
).get_text(strip=True)
|
||||||
|
# regex match to get pages by grabbing the first number in the string
|
||||||
|
match = regex.search(r"(\d+)", pages)
|
||||||
|
if match:
|
||||||
|
pages = match.group(1)
|
||||||
|
book.pages = pages
|
||||||
|
return book
|
||||||
|
return None
|
||||||
|
|
||||||
def get(self, ppn: str) -> Book | None:
|
def get(self, ppn: str) -> Book | None:
|
||||||
# based on PPN, get title, people, edition, year, language, pages, isbn,
|
# based on PPN, get title, people, edition, year, language, pages, isbn,
|
||||||
link = f"https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{ppn}"
|
link = f"https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{ppn}"
|
||||||
|
|||||||
Reference in New Issue
Block a user