chore(codebase): import fixes, restructuring

This commit is contained in:
2025-12-01 14:24:24 +01:00
parent 6523ad655c
commit 05289ef244
58 changed files with 2692 additions and 6574 deletions

View File

@@ -4,7 +4,7 @@ import regex
import requests
from bs4 import BeautifulSoup
from src.logic import BookData as Book
from src.core.models import BookData as Book
from src.shared.logging import log
URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND"
@@ -60,12 +60,14 @@ class Catalogue:
title = title_el.get_text(strip=True) if title_el else None
ppn_el = soup.find(
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PPN"
"div",
class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PPN",
)
# in ppn_el, get text of div col-xs-12 col-md-7 col-lg-8 rds-dl-panel
ppn = (
ppn_el.find_next_sibling(
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
"div",
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
).get_text(strip=True)
if ppn_el
else None
@@ -73,18 +75,21 @@ class Catalogue:
# get edition text at div class col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_EDITION
edition_el = soup.find(
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_EDITION"
"div",
class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_EDITION",
)
edition = (
edition_el.find_next_sibling(
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
"div",
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
).get_text(strip=True)
if edition_el
else None
)
authors = soup.find_all(
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON"
"div",
class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON",
)
author = None
if authors:
@@ -92,7 +97,8 @@ class Catalogue:
author_names = []
for author in authors:
panel = author.find_next_sibling(
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
"div",
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
)
if panel:
links = panel.find_all("a")
@@ -109,7 +115,7 @@ class Catalogue:
groups = []
cur = {}
for node in panel.select(
"div.rds-dl.RDS_SIGNATURE, div.rds-dl.RDS_STATUS, div.rds-dl.RDS_LOCATION, div.col-xs-12.space"
"div.rds-dl.RDS_SIGNATURE, div.rds-dl.RDS_STATUS, div.rds-dl.RDS_LOCATION, div.col-xs-12.space",
):
classes = node.get("class", [])
# Separator between entries
@@ -151,16 +157,15 @@ class Catalogue:
author=author,
edition=edition,
)
else:
return Book(
title=title,
ppn=ppn,
signature=signature,
library_location=loc.split("\n\n")[-1],
link=elink,
author=author,
edition=edition,
)
return Book(
title=title,
ppn=ppn,
signature=signature,
library_location=loc.split("\n\n")[-1],
link=elink,
author=author,
edition=edition,
)
def get(self, ppn: str) -> Book | None:
# based on PPN, get title, people, edition, year, language, pages, isbn,
@@ -208,14 +213,16 @@ class Catalogue:
soup = BeautifulSoup(result, "html.parser")
# get all authors, return them as a string seperated by ;
authors = soup.find_all(
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON"
"div",
class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON",
)
if authors:
# get the names of the a href links in the div col-xs-12 col-md-7 col-lg-8 rds-dl-panel
author_names = []
for author in authors:
panel = author.find_next_sibling(
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
"div",
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
)
if panel:
links = panel.find_all("a")
@@ -236,7 +243,7 @@ class Catalogue:
groups = []
cur = {}
for node in panel.select(
"div.rds-dl.RDS_SIGNATURE, div.rds-dl.RDS_STATUS, div.rds-dl.RDS_LOCATION, div.col-xs-12.space"
"div.rds-dl.RDS_SIGNATURE, div.rds-dl.RDS_STATUS, div.rds-dl.RDS_LOCATION, div.col-xs-12.space",
):
classes = node.get("class", [])
# Separator between entries
@@ -271,9 +278,8 @@ class Catalogue:
if "semesterapparat" in loc:
signature = g.get("signature")
return signature
else:
signature = g.get("signature")
return signature
signature = g.get("signature")
return signature
print("No signature found")
return signature