chore(codebase): import fixes, restructuring
This commit is contained in:
@@ -4,7 +4,7 @@ import regex
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from src.logic import BookData as Book
|
||||
from src.core.models import BookData as Book
|
||||
from src.shared.logging import log
|
||||
|
||||
URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND"
|
||||
@@ -60,12 +60,14 @@ class Catalogue:
|
||||
title = title_el.get_text(strip=True) if title_el else None
|
||||
|
||||
ppn_el = soup.find(
|
||||
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PPN"
|
||||
"div",
|
||||
class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PPN",
|
||||
)
|
||||
# in ppn_el, get text of div col-xs-12 col-md-7 col-lg-8 rds-dl-panel
|
||||
ppn = (
|
||||
ppn_el.find_next_sibling(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
"div",
|
||||
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
|
||||
).get_text(strip=True)
|
||||
if ppn_el
|
||||
else None
|
||||
@@ -73,18 +75,21 @@ class Catalogue:
|
||||
|
||||
# get edition text at div class col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_EDITION
|
||||
edition_el = soup.find(
|
||||
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_EDITION"
|
||||
"div",
|
||||
class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_EDITION",
|
||||
)
|
||||
edition = (
|
||||
edition_el.find_next_sibling(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
"div",
|
||||
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
|
||||
).get_text(strip=True)
|
||||
if edition_el
|
||||
else None
|
||||
)
|
||||
|
||||
authors = soup.find_all(
|
||||
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON"
|
||||
"div",
|
||||
class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON",
|
||||
)
|
||||
author = None
|
||||
if authors:
|
||||
@@ -92,7 +97,8 @@ class Catalogue:
|
||||
author_names = []
|
||||
for author in authors:
|
||||
panel = author.find_next_sibling(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
"div",
|
||||
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
|
||||
)
|
||||
if panel:
|
||||
links = panel.find_all("a")
|
||||
@@ -109,7 +115,7 @@ class Catalogue:
|
||||
groups = []
|
||||
cur = {}
|
||||
for node in panel.select(
|
||||
"div.rds-dl.RDS_SIGNATURE, div.rds-dl.RDS_STATUS, div.rds-dl.RDS_LOCATION, div.col-xs-12.space"
|
||||
"div.rds-dl.RDS_SIGNATURE, div.rds-dl.RDS_STATUS, div.rds-dl.RDS_LOCATION, div.col-xs-12.space",
|
||||
):
|
||||
classes = node.get("class", [])
|
||||
# Separator between entries
|
||||
@@ -151,16 +157,15 @@ class Catalogue:
|
||||
author=author,
|
||||
edition=edition,
|
||||
)
|
||||
else:
|
||||
return Book(
|
||||
title=title,
|
||||
ppn=ppn,
|
||||
signature=signature,
|
||||
library_location=loc.split("\n\n")[-1],
|
||||
link=elink,
|
||||
author=author,
|
||||
edition=edition,
|
||||
)
|
||||
return Book(
|
||||
title=title,
|
||||
ppn=ppn,
|
||||
signature=signature,
|
||||
library_location=loc.split("\n\n")[-1],
|
||||
link=elink,
|
||||
author=author,
|
||||
edition=edition,
|
||||
)
|
||||
|
||||
def get(self, ppn: str) -> Book | None:
|
||||
# based on PPN, get title, people, edition, year, language, pages, isbn,
|
||||
@@ -208,14 +213,16 @@ class Catalogue:
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
# get all authors, return them as a string seperated by ;
|
||||
authors = soup.find_all(
|
||||
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON"
|
||||
"div",
|
||||
class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON",
|
||||
)
|
||||
if authors:
|
||||
# get the names of the a href links in the div col-xs-12 col-md-7 col-lg-8 rds-dl-panel
|
||||
author_names = []
|
||||
for author in authors:
|
||||
panel = author.find_next_sibling(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
"div",
|
||||
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
|
||||
)
|
||||
if panel:
|
||||
links = panel.find_all("a")
|
||||
@@ -236,7 +243,7 @@ class Catalogue:
|
||||
groups = []
|
||||
cur = {}
|
||||
for node in panel.select(
|
||||
"div.rds-dl.RDS_SIGNATURE, div.rds-dl.RDS_STATUS, div.rds-dl.RDS_LOCATION, div.col-xs-12.space"
|
||||
"div.rds-dl.RDS_SIGNATURE, div.rds-dl.RDS_STATUS, div.rds-dl.RDS_LOCATION, div.col-xs-12.space",
|
||||
):
|
||||
classes = node.get("class", [])
|
||||
# Separator between entries
|
||||
@@ -271,9 +278,8 @@ class Catalogue:
|
||||
if "semesterapparat" in loc:
|
||||
signature = g.get("signature")
|
||||
return signature
|
||||
else:
|
||||
signature = g.get("signature")
|
||||
return signature
|
||||
signature = g.get("signature")
|
||||
return signature
|
||||
print("No signature found")
|
||||
return signature
|
||||
|
||||
|
||||
Reference in New Issue
Block a user