dev #10
@@ -44,7 +44,6 @@ class Catalogue:
|
||||
|
||||
def get_book(self, searchterm: str):
|
||||
links = self.get_book_links(searchterm)
|
||||
print(links)
|
||||
for elink in links:
|
||||
result = self.search(elink)
|
||||
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
||||
@@ -56,12 +55,14 @@ class Catalogue:
|
||||
title = title_el.get_text(strip=True) if title_el else None
|
||||
|
||||
ppn_el = soup.find(
|
||||
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PPN"
|
||||
"div",
|
||||
class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PPN",
|
||||
)
|
||||
# in ppn_el, get text of div col-xs-12 col-md-7 col-lg-8 rds-dl-panel
|
||||
ppn = (
|
||||
ppn_el.find_next_sibling(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
"div",
|
||||
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
|
||||
).get_text(strip=True)
|
||||
if ppn_el
|
||||
else None
|
||||
@@ -69,18 +70,21 @@ class Catalogue:
|
||||
|
||||
# get edition text at div class col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_EDITION
|
||||
edition_el = soup.find(
|
||||
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_EDITION"
|
||||
"div",
|
||||
class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_EDITION",
|
||||
)
|
||||
edition = (
|
||||
edition_el.find_next_sibling(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
"div",
|
||||
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
|
||||
).get_text(strip=True)
|
||||
if edition_el
|
||||
else None
|
||||
)
|
||||
|
||||
authors = soup.find_all(
|
||||
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON"
|
||||
"div",
|
||||
class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON",
|
||||
)
|
||||
author = None
|
||||
if authors:
|
||||
@@ -88,7 +92,8 @@ class Catalogue:
|
||||
author_names = []
|
||||
for author in authors:
|
||||
panel = author.find_next_sibling(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
"div",
|
||||
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
|
||||
)
|
||||
if panel:
|
||||
links = panel.find_all("a")
|
||||
@@ -105,7 +110,7 @@ class Catalogue:
|
||||
groups = []
|
||||
cur = {}
|
||||
for node in panel.select(
|
||||
"div.rds-dl.RDS_SIGNATURE, div.rds-dl.RDS_STATUS, div.rds-dl.RDS_LOCATION, div.col-xs-12.space"
|
||||
"div.rds-dl.RDS_SIGNATURE, div.rds-dl.RDS_STATUS, div.rds-dl.RDS_LOCATION, div.col-xs-12.space",
|
||||
):
|
||||
classes = node.get("class", [])
|
||||
# Separator between entries
|
||||
@@ -147,16 +152,15 @@ class Catalogue:
|
||||
author=author,
|
||||
edition=edition,
|
||||
)
|
||||
else:
|
||||
return Book(
|
||||
title=title,
|
||||
ppn=ppn,
|
||||
signature=signature,
|
||||
library_location=loc.split("\n\n")[-1],
|
||||
link=elink,
|
||||
author=author,
|
||||
edition=edition,
|
||||
)
|
||||
return Book(
|
||||
title=title,
|
||||
ppn=ppn,
|
||||
signature=signature,
|
||||
library_location=loc.split("\n\n")[-1],
|
||||
link=elink,
|
||||
author=author,
|
||||
edition=edition,
|
||||
)
|
||||
|
||||
def get_book_with_data(self, searchterm: str) -> Book | None:
|
||||
book = self.get_book(searchterm)
|
||||
@@ -168,19 +172,18 @@ class Catalogue:
|
||||
# from div col-xs-12 rds-dl RDS_SIGNATURE get signature (second div in this div)
|
||||
signature = None
|
||||
signature_el = soup.find("div", class_="RDS_SIGNATURE")
|
||||
print(signature_el)
|
||||
if signature_el:
|
||||
signature = signature_el.find("div", class_="rds-dl-panel").get_text(
|
||||
strip=True
|
||||
strip=True,
|
||||
)
|
||||
print(signature)
|
||||
book.signature = signature
|
||||
# from div col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_ISBN get isbn (second div in this div)
|
||||
isbn = None
|
||||
isbn_el = soup.find("div", class_="RDS_ISBN")
|
||||
if isbn_el:
|
||||
isbn = isbn_el.find_next_sibling(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
"div",
|
||||
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
|
||||
).get_text(strip=True)
|
||||
book.isbn = isbn
|
||||
# from div col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_SCOPE get pages (second div in this div)
|
||||
@@ -188,7 +191,8 @@ class Catalogue:
|
||||
pages_el = soup.find("div", class_="RDS_SCOPE")
|
||||
if pages_el:
|
||||
pages = pages_el.find_next_sibling(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
"div",
|
||||
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
|
||||
).get_text(strip=True)
|
||||
# regex match to get pages by grabbing the first number in the string
|
||||
match = regex.search(r"(\d+)", pages)
|
||||
@@ -210,7 +214,6 @@ class Catalogue:
|
||||
for link in links:
|
||||
result = self.search(link)
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
print(link)
|
||||
ppn = link.split("/")[-1]
|
||||
if ppn and regex.match(r"^\d{8,10}[X\d]?$", ppn):
|
||||
return ppn
|
||||
@@ -239,19 +242,20 @@ class Catalogue:
|
||||
links = self.get_book_links(f"kid:{link}")
|
||||
author = None
|
||||
for link in links:
|
||||
# print(link)
|
||||
result = self.search(link)
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
# get all authors, return them as a string seperated by ;
|
||||
authors = soup.find_all(
|
||||
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON"
|
||||
"div",
|
||||
class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON",
|
||||
)
|
||||
if authors:
|
||||
# get the names of the a href links in the div col-xs-12 col-md-7 col-lg-8 rds-dl-panel
|
||||
author_names = []
|
||||
for author in authors:
|
||||
panel = author.find_next_sibling(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
"div",
|
||||
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
|
||||
)
|
||||
if panel:
|
||||
links = panel.find_all("a")
|
||||
@@ -272,7 +276,7 @@ class Catalogue:
|
||||
groups = []
|
||||
cur = {}
|
||||
for node in panel.select(
|
||||
"div.rds-dl.RDS_SIGNATURE, div.rds-dl.RDS_STATUS, div.rds-dl.RDS_LOCATION, div.col-xs-12.space"
|
||||
"div.rds-dl.RDS_SIGNATURE, div.rds-dl.RDS_STATUS, div.rds-dl.RDS_LOCATION, div.col-xs-12.space",
|
||||
):
|
||||
classes = node.get("class", [])
|
||||
# Separator between entries
|
||||
@@ -302,14 +306,12 @@ class Catalogue:
|
||||
|
||||
# Find the signature for the entry whose location mentions "Semesterapparat"
|
||||
for g in groups:
|
||||
print(g)
|
||||
loc = g.get("location", "").lower()
|
||||
if "semesterapparat" in loc:
|
||||
signature = g.get("signature")
|
||||
return signature
|
||||
else:
|
||||
signature = g.get("signature")
|
||||
return signature
|
||||
signature = g.get("signature")
|
||||
return signature
|
||||
print("No signature found")
|
||||
return signature
|
||||
|
||||
|
||||
Reference in New Issue
Block a user