Refactor and enhance type hints across multiple modules

- Updated the `from_tuple` method in `Prof` class to specify return type. - Added type hints for various methods in `LehmannsClient`, `OpenAI`, `WebRequest`, and `ZoteroController` classes to improve code clarity and type safety. - Modified `pdf_to_csv` function to return a string instead of a DataFrame. - Enhanced error handling and type hints in `wordparser` and `xmlparser` modules. - Removed unused UI file `Ui_medianadder.ts`. - Improved the layout and structure of the `semesterapparat_ui` to enhance user experience. - Updated file picker to support `.doc` files in addition to `.docx`. - Added unique item handling in `Ui` class to prevent duplicates in apparat list. - General code cleanup and consistency improvements across various files.
2025-10-21 09:09:54 +02:00
parent 560d8285b5
commit 0406fe4f6f
26 changed files with 437 additions and 396 deletions
--- a/src/logic/lehmannsapi.py
+++ b/src/logic/lehmannsapi.py
@@ -134,10 +134,10 @@ class LehmannsClient:
                        enriched.append(r)
                    continue

-                soup = BeautifulSoup(html, "html.parser")
+                soup = BeautifulSoup(html, "html.parser")  # type: ignore

                # Pages
-                pages_node = soup.select_one(
+                pages_node = soup.select_one(  # type: ignore
                    "span.book-meta.meta-seiten[itemprop='numberOfPages'], "
                    "span.book-meta.meta-seiten[itemprop='numberofpages'], "
                    ".meta-seiten [itemprop='numberOfPages'], "
@@ -151,7 +151,7 @@ class LehmannsClient:
                        r.pages = f"{m.group(0)} Seiten"

                # Availability via li.availability-3
-                avail_li = soup.select_one("li.availability-3")
+                avail_li = soup.select_one("li.availability-3")  # type: ignore
                if avail_li:
                    avail_text = " ".join(
                        avail_li.get_text(" ", strip=True).split()
@@ -200,12 +200,12 @@ class LehmannsClient:
            if not a:
                continue
            url = urljoin(BASE, a["href"].strip())
-            base_title = (block.select_one(".title [itemprop='name']") or a).get_text(
+            base_title = (block.select_one(".title [itemprop='name']") or a).get_text(  # type: ignore
                strip=True
            )

            # Alternative headline => extend title
-            alt_tag = block.select_one(".description[itemprop='alternativeHeadline']")
+            alt_tag = block.select_one(".description[itemprop='alternativeHeadline']")  # type: ignore
            alternative_headline = alt_tag.get_text(strip=True) if alt_tag else None
            title = (
                f"{base_title} : {alternative_headline}"
@@ -216,7 +216,7 @@ class LehmannsClient:

            # Authors from .author
            authors: list[str] = []
-            author_div = block.select_one("div.author")
+            author_div = block.select_one("div.author")  # type: ignore
            if author_div:
                t = author_div.get_text(" ", strip=True)
                t = re.sub(r"^\s*von\s+", "", t, flags=re.I)
@@ -228,7 +228,7 @@ class LehmannsClient:
            # Media + format
            media_type = None
            book_format = None
-            type_text = block.select_one(".type")
+            type_text = block.select_one(".type")  # type: ignore
            if type_text:
                t = type_text.get_text(" ", strip=True)
                m = re.search(r"\b(Buch|eBook|Hörbuch)\b", t)
@@ -240,7 +240,7 @@ class LehmannsClient:

            # Year
            year = None
-            y = block.select_one("[itemprop='copyrightYear']")
+            y = block.select_one("[itemprop='copyrightYear']")  # type: ignore
            if y:
                try:
                    year = int(y.get_text(strip=True))
@@ -249,7 +249,7 @@ class LehmannsClient:

            # Edition
            edition = None
-            ed = block.select_one("[itemprop='bookEdition']")
+            ed = block.select_one("[itemprop='bookEdition']")  # type: ignore
            if ed:
                m = re.search(r"\d+", ed.get_text(strip=True))
                if m:
@@ -257,15 +257,15 @@ class LehmannsClient:

            # Publisher
            publisher = None
-            pub = block.select_one(
+            pub = block.select_one(  # type: ignore
                ".publisherprop [itemprop='name']"
-            ) or block.select_one(".publisher [itemprop='name']")
+            ) or block.select_one(".publisher [itemprop='name']")  # type: ignore
            if pub:
                publisher = pub.get_text(strip=True)

            # ISBN-13
            isbn13 = None
-            isbn_tag = block.select_one(".isbn [itemprop='isbn'], [itemprop='isbn']")
+            isbn_tag = block.select_one(".isbn [itemprop='isbn'], [itemprop='isbn']")  # type: ignore
            if isbn_tag:
                digits = re.sub(r"[^0-9Xx]", "", isbn_tag.get_text(strip=True))
                m = re.search(r"(97[89]\d{10})", digits)
@@ -288,7 +288,7 @@ class LehmannsClient:

            # Image (best-effort)
            image = None
-            left_img = block.find_previous("img")
+            left_img = block.find_previous("img")  # type: ignore
            if left_img and left_img.get("src"):
                image = urljoin(BASE, left_img["src"])