Refactor and enhance type hints across multiple modules

- Updated the `from_tuple` method in `Prof` class to specify return type. - Added type hints for various methods in `LehmannsClient`, `OpenAI`, `WebRequest`, and `ZoteroController` classes to improve code clarity and type safety. - Modified `pdf_to_csv` function to return a string instead of a DataFrame. - Enhanced error handling and type hints in `wordparser` and `xmlparser` modules. - Removed unused UI file `Ui_medianadder.ts`. - Improved the layout and structure of the `semesterapparat_ui` to enhance user experience. - Updated file picker to support `.doc` files in addition to `.docx`. - Added unique item handling in `Ui` class to prevent duplicates in apparat list. - General code cleanup and consistency improvements across various files.
2025-10-21 09:09:54 +02:00
parent 560d8285b5
commit 0406fe4f6f
26 changed files with 437 additions and 396 deletions
--- a/src/logic/wordparser.py
+++ b/src/logic/wordparser.py
@@ -1,5 +1,5 @@
 import zipfile
-from typing import Any
+from typing import Any, Optional

 import fitz  # PyMuPDF
 import pandas as pd
@@ -35,7 +35,7 @@ def word_docx_to_csv(path: str) -> list[pd.DataFrame]:
    return m_data


-def get_fach(path: str) -> str:
+def get_fach(path: str) -> Optional[str]:
    document = zipfile.ZipFile(path)
    xml_data = document.read("word/document.xml")
    document.close()
@@ -49,10 +49,12 @@ def get_fach(path: str) -> str:
            # get the data in the w:t
            for run in para.find_all("w:r"):
                data = run.find("w:t")
-                return data.contents[0]
+                if data and data.contents:
+                    return data.contents[0]
+    return None


-def makeDict():
+def makeDict() -> dict[str, Optional[str]]:
    return {
        "work_author": None,
        "section_author": None,
@@ -70,8 +72,8 @@ def makeDict():
    }


-def tuple_to_dict(tlist: tuple, type: str) -> dict:
-    ret = []
+def tuple_to_dict(tlist: tuple, type: str) -> list[dict[str, Optional[str]]]:
+    ret: list[dict[str, Optional[str]]] = []
    for line in tlist:
        data = makeDict()
        if type == "Monografien":
@@ -111,7 +113,7 @@ def tuple_to_dict(tlist: tuple, type: str) -> dict:
    return ret


-def elsa_word_to_csv(path: str):
+def elsa_word_to_csv(path: str) -> tuple[list[dict[str, Optional[str]]], str]:
    doc = Document(path)
    # # print all lines in doc
    doctype = [para.text for para in doc.paragraphs if para.text != ""][-1]