Refactor and enhance type hints across multiple modules

- Updated the `from_tuple` method in `Prof` class to specify return type.
- Added type hints for various methods in `LehmannsClient`, `OpenAI`, `WebRequest`, and `ZoteroController` classes to improve code clarity and type safety.
- Modified `pdf_to_csv` function to return a string instead of a DataFrame.
- Enhanced error handling and type hints in `wordparser` and `xmlparser` modules.
- Removed unused UI file `Ui_medianadder.ts`.
- Improved the layout and structure of the `semesterapparat_ui` to enhance user experience.
- Updated file picker to support `.doc` files in addition to `.docx`.
- Added unique item handling in `Ui` class to prevent duplicates in apparat list.
- General code cleanup and consistency improvements across various files.
This commit is contained in:
2025-10-21 09:09:54 +02:00
parent 560d8285b5
commit 0406fe4f6f
26 changed files with 437 additions and 396 deletions

View File

@@ -1,5 +1,5 @@
import zipfile
from typing import Any
from typing import Any, Optional
import fitz # PyMuPDF
import pandas as pd
@@ -35,7 +35,7 @@ def word_docx_to_csv(path: str) -> list[pd.DataFrame]:
return m_data
def get_fach(path: str) -> str:
def get_fach(path: str) -> Optional[str]:
document = zipfile.ZipFile(path)
xml_data = document.read("word/document.xml")
document.close()
@@ -49,10 +49,12 @@ def get_fach(path: str) -> str:
# get the data in the w:t
for run in para.find_all("w:r"):
data = run.find("w:t")
return data.contents[0]
if data and data.contents:
return data.contents[0]
return None
def makeDict():
def makeDict() -> dict[str, Optional[str]]:
return {
"work_author": None,
"section_author": None,
@@ -70,8 +72,8 @@ def makeDict():
}
def tuple_to_dict(tlist: tuple, type: str) -> dict:
ret = []
def tuple_to_dict(tlist: tuple, type: str) -> list[dict[str, Optional[str]]]:
ret: list[dict[str, Optional[str]]] = []
for line in tlist:
data = makeDict()
if type == "Monografien":
@@ -111,7 +113,7 @@ def tuple_to_dict(tlist: tuple, type: str) -> dict:
return ret
def elsa_word_to_csv(path: str):
def elsa_word_to_csv(path: str) -> tuple[list[dict[str, Optional[str]]], str]:
doc = Document(path)
# # print all lines in doc
doctype = [para.text for para in doc.paragraphs if para.text != ""][-1]