Refactor and enhance type hints across multiple modules
- Updated the `from_tuple` method in `Prof` class to specify return type. - Added type hints for various methods in `LehmannsClient`, `OpenAI`, `WebRequest`, and `ZoteroController` classes to improve code clarity and type safety. - Modified `pdf_to_csv` function to return a string instead of a DataFrame. - Enhanced error handling and type hints in `wordparser` and `xmlparser` modules. - Removed unused UI file `Ui_medianadder.ts`. - Improved the layout and structure of the `semesterapparat_ui` to enhance user experience. - Updated file picker to support `.doc` files in addition to `.docx`. - Added unique item handling in `Ui` class to prevent duplicates in apparat list. - General code cleanup and consistency improvements across various files.
This commit is contained in:
@@ -134,10 +134,10 @@ class LehmannsClient:
|
||||
enriched.append(r)
|
||||
continue
|
||||
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
soup = BeautifulSoup(html, "html.parser") # type: ignore
|
||||
|
||||
# Pages
|
||||
pages_node = soup.select_one(
|
||||
pages_node = soup.select_one( # type: ignore
|
||||
"span.book-meta.meta-seiten[itemprop='numberOfPages'], "
|
||||
"span.book-meta.meta-seiten[itemprop='numberofpages'], "
|
||||
".meta-seiten [itemprop='numberOfPages'], "
|
||||
@@ -151,7 +151,7 @@ class LehmannsClient:
|
||||
r.pages = f"{m.group(0)} Seiten"
|
||||
|
||||
# Availability via li.availability-3
|
||||
avail_li = soup.select_one("li.availability-3")
|
||||
avail_li = soup.select_one("li.availability-3") # type: ignore
|
||||
if avail_li:
|
||||
avail_text = " ".join(
|
||||
avail_li.get_text(" ", strip=True).split()
|
||||
@@ -200,12 +200,12 @@ class LehmannsClient:
|
||||
if not a:
|
||||
continue
|
||||
url = urljoin(BASE, a["href"].strip())
|
||||
base_title = (block.select_one(".title [itemprop='name']") or a).get_text(
|
||||
base_title = (block.select_one(".title [itemprop='name']") or a).get_text( # type: ignore
|
||||
strip=True
|
||||
)
|
||||
|
||||
# Alternative headline => extend title
|
||||
alt_tag = block.select_one(".description[itemprop='alternativeHeadline']")
|
||||
alt_tag = block.select_one(".description[itemprop='alternativeHeadline']") # type: ignore
|
||||
alternative_headline = alt_tag.get_text(strip=True) if alt_tag else None
|
||||
title = (
|
||||
f"{base_title} : {alternative_headline}"
|
||||
@@ -216,7 +216,7 @@ class LehmannsClient:
|
||||
|
||||
# Authors from .author
|
||||
authors: list[str] = []
|
||||
author_div = block.select_one("div.author")
|
||||
author_div = block.select_one("div.author") # type: ignore
|
||||
if author_div:
|
||||
t = author_div.get_text(" ", strip=True)
|
||||
t = re.sub(r"^\s*von\s+", "", t, flags=re.I)
|
||||
@@ -228,7 +228,7 @@ class LehmannsClient:
|
||||
# Media + format
|
||||
media_type = None
|
||||
book_format = None
|
||||
type_text = block.select_one(".type")
|
||||
type_text = block.select_one(".type") # type: ignore
|
||||
if type_text:
|
||||
t = type_text.get_text(" ", strip=True)
|
||||
m = re.search(r"\b(Buch|eBook|Hörbuch)\b", t)
|
||||
@@ -240,7 +240,7 @@ class LehmannsClient:
|
||||
|
||||
# Year
|
||||
year = None
|
||||
y = block.select_one("[itemprop='copyrightYear']")
|
||||
y = block.select_one("[itemprop='copyrightYear']") # type: ignore
|
||||
if y:
|
||||
try:
|
||||
year = int(y.get_text(strip=True))
|
||||
@@ -249,7 +249,7 @@ class LehmannsClient:
|
||||
|
||||
# Edition
|
||||
edition = None
|
||||
ed = block.select_one("[itemprop='bookEdition']")
|
||||
ed = block.select_one("[itemprop='bookEdition']") # type: ignore
|
||||
if ed:
|
||||
m = re.search(r"\d+", ed.get_text(strip=True))
|
||||
if m:
|
||||
@@ -257,15 +257,15 @@ class LehmannsClient:
|
||||
|
||||
# Publisher
|
||||
publisher = None
|
||||
pub = block.select_one(
|
||||
pub = block.select_one( # type: ignore
|
||||
".publisherprop [itemprop='name']"
|
||||
) or block.select_one(".publisher [itemprop='name']")
|
||||
) or block.select_one(".publisher [itemprop='name']") # type: ignore
|
||||
if pub:
|
||||
publisher = pub.get_text(strip=True)
|
||||
|
||||
# ISBN-13
|
||||
isbn13 = None
|
||||
isbn_tag = block.select_one(".isbn [itemprop='isbn'], [itemprop='isbn']")
|
||||
isbn_tag = block.select_one(".isbn [itemprop='isbn'], [itemprop='isbn']") # type: ignore
|
||||
if isbn_tag:
|
||||
digits = re.sub(r"[^0-9Xx]", "", isbn_tag.get_text(strip=True))
|
||||
m = re.search(r"(97[89]\d{10})", digits)
|
||||
@@ -288,7 +288,7 @@ class LehmannsClient:
|
||||
|
||||
# Image (best-effort)
|
||||
image = None
|
||||
left_img = block.find_previous("img")
|
||||
left_img = block.find_previous("img") # type: ignore
|
||||
if left_img and left_img.get("src"):
|
||||
image = urljoin(BASE, left_img["src"])
|
||||
|
||||
|
||||
Reference in New Issue
Block a user