Refactor and enhance type hints across multiple modules
- Updated the `from_tuple` method in `Prof` class to specify return type. - Added type hints for various methods in `LehmannsClient`, `OpenAI`, `WebRequest`, and `ZoteroController` classes to improve code clarity and type safety. - Modified `pdf_to_csv` function to return a string instead of a DataFrame. - Enhanced error handling and type hints in `wordparser` and `xmlparser` modules. - Removed unused UI file `Ui_medianadder.ts`. - Improved the layout and structure of the `semesterapparat_ui` to enhance user experience. - Updated file picker to support `.doc` files in addition to `.docx`. - Added unique item handling in `Ui` class to prevent duplicates in apparat list. - General code cleanup and consistency improvements across various files.
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
import zipfile
|
||||
from typing import Any
|
||||
from typing import Any, Optional
|
||||
|
||||
import fitz # PyMuPDF
|
||||
import pandas as pd
|
||||
@@ -35,7 +35,7 @@ def word_docx_to_csv(path: str) -> list[pd.DataFrame]:
|
||||
return m_data
|
||||
|
||||
|
||||
def get_fach(path: str) -> str:
|
||||
def get_fach(path: str) -> Optional[str]:
|
||||
document = zipfile.ZipFile(path)
|
||||
xml_data = document.read("word/document.xml")
|
||||
document.close()
|
||||
@@ -49,10 +49,12 @@ def get_fach(path: str) -> str:
|
||||
# get the data in the w:t
|
||||
for run in para.find_all("w:r"):
|
||||
data = run.find("w:t")
|
||||
return data.contents[0]
|
||||
if data and data.contents:
|
||||
return data.contents[0]
|
||||
return None
|
||||
|
||||
|
||||
def makeDict():
|
||||
def makeDict() -> dict[str, Optional[str]]:
|
||||
return {
|
||||
"work_author": None,
|
||||
"section_author": None,
|
||||
@@ -70,8 +72,8 @@ def makeDict():
|
||||
}
|
||||
|
||||
|
||||
def tuple_to_dict(tlist: tuple, type: str) -> dict:
|
||||
ret = []
|
||||
def tuple_to_dict(tlist: tuple, type: str) -> list[dict[str, Optional[str]]]:
|
||||
ret: list[dict[str, Optional[str]]] = []
|
||||
for line in tlist:
|
||||
data = makeDict()
|
||||
if type == "Monografien":
|
||||
@@ -111,7 +113,7 @@ def tuple_to_dict(tlist: tuple, type: str) -> dict:
|
||||
return ret
|
||||
|
||||
|
||||
def elsa_word_to_csv(path: str):
|
||||
def elsa_word_to_csv(path: str) -> tuple[list[dict[str, Optional[str]]], str]:
|
||||
doc = Document(path)
|
||||
# # print all lines in doc
|
||||
doctype = [para.text for para in doc.paragraphs if para.text != ""][-1]
|
||||
|
||||
Reference in New Issue
Block a user