From 0406fe4f6f04c4171517ed87406e5986c883170f Mon Sep 17 00:00:00 2001 From: WorldTeacher Date: Tue, 21 Oct 2025 09:09:54 +0200 Subject: [PATCH] Refactor and enhance type hints across multiple modules - Updated the `from_tuple` method in `Prof` class to specify return type. - Added type hints for various methods in `LehmannsClient`, `OpenAI`, `WebRequest`, and `ZoteroController` classes to improve code clarity and type safety. - Modified `pdf_to_csv` function to return a string instead of a DataFrame. - Enhanced error handling and type hints in `wordparser` and `xmlparser` modules. - Removed unused UI file `Ui_medianadder.ts`. - Improved the layout and structure of the `semesterapparat_ui` to enhance user experience. - Updated file picker to support `.doc` files in addition to `.docx`. - Added unique item handling in `Ui` class to prevent duplicates in apparat list. - General code cleanup and consistency improvements across various files. --- src/__init__.py | 4 +- src/backend/catalogue.py | 8 +- src/backend/database.py | 4 +- src/logic/SRU.py | 10 +- src/logic/c_sort.py | 51 ++--- src/logic/constants.py | 360 +++++++++++++++---------------- src/logic/dataclass.py | 3 +- src/logic/lehmannsapi.py | 26 +-- src/logic/openai.py | 37 ++-- src/logic/pdfparser.py | 3 +- src/logic/semester.py | 14 ++ src/logic/settings.py | 2 +- src/logic/webrequest.py | 14 +- src/logic/wordparser.py | 16 +- src/logic/xmlparser.py | 4 +- src/logic/zotero.py | 43 ++-- src/ui/dialogs/Ui_medianadder.ts | 4 - src/ui/dialogs/docuprint.py | 6 +- src/ui/semesterapparat_ui.ui | 122 +++++------ src/ui/semesterapparat_ui_ui.py | 27 ++- src/ui/userInterface.py | 32 ++- src/ui/widgets/filepicker.py | 2 +- src/utils/blob.py | 2 +- src/utils/documentation.py | 13 +- src/utils/pickles.py | 4 +- src/utils/richtext.py | 22 +- 26 files changed, 437 insertions(+), 396 deletions(-) delete mode 100644 src/ui/dialogs/Ui_medianadder.ts diff --git a/src/__init__.py b/src/__init__.py index 1062a81..93fbfbc 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -3,6 +3,8 @@ __author__ = "Alexander Kirchner" __all__ = ["__version__", "__author__", "Icon", "settings"] import os +from pathlib import Path +from typing import Union from appdirs import AppDirs @@ -18,7 +20,7 @@ if not os.path.exists(CONFIG_DIR): # type: ignore settings = Config(f"{CONFIG_DIR}/config.yaml") -DATABASE_DIR = ( # type: ignore +DATABASE_DIR: Union[Path, str] = ( # type: ignore app.user_config_dir if settings.database.path is None else settings.database.path # type: ignore ) if not os.path.exists(DATABASE_DIR): # type: ignore diff --git a/src/backend/catalogue.py b/src/backend/catalogue.py index be10401..c9f1a82 100644 --- a/src/backend/catalogue.py +++ b/src/backend/catalogue.py @@ -1,3 +1,5 @@ +from typing import List + import regex import requests from bs4 import BeautifulSoup @@ -33,13 +35,13 @@ class Catalogue: response = requests.get(link, timeout=self.timeout) return response.text - def get_book_links(self, searchterm: str): + def get_book_links(self, searchterm: str) -> List[str]: response = self.search_book(searchterm) soup = BeautifulSoup(response, "html.parser") links = soup.find_all("a", class_="title getFull") - res = [] + res: List[str] = [] for link in links: - res.append(BASE + link["href"]) + res.append(BASE + link["href"]) # type: ignore return res def get_book(self, searchterm: str): diff --git a/src/backend/database.py b/src/backend/database.py index 99a945a..a31651f 100644 --- a/src/backend/database.py +++ b/src/backend/database.py @@ -144,7 +144,7 @@ class Database: self.create_tables() self.insertSubjects() - def getElsaMediaID(self, work_author, signature, pages): + def getElsaMediaID(self, work_author: str, signature: str, pages: str): query = ( "SELECT id FROM elsa_media WHERE work_author=? AND signature=? AND pages=?" ) @@ -160,7 +160,7 @@ class Database: query = "SELECT type FROM elsa_media WHERE id=?" return self.query_db(query, (id,), one=True)[0] - def get_db_contents(self) -> Union[List[Tuple], None]: + def get_db_contents(self) -> Union[List[Tuple[Any]], None]: """ Get the contents of the diff --git a/src/logic/SRU.py b/src/logic/SRU.py index d31e582..a838e40 100644 --- a/src/logic/SRU.py +++ b/src/logic/SRU.py @@ -86,7 +86,7 @@ def _text(elem: Optional[ET.Element]) -> str: return (elem.text or "") if elem is not None else "" -def _req_text(parent: ET.Element, path: str) -> str: +def _req_text(parent: ET.Element, path: str) -> Optional[str]: el = parent.find(path, NS) if el is None or el.text is None: return None @@ -98,7 +98,7 @@ def parse_marc_record(record_el: ET.Element) -> MarcRecord: record_el is the element (default ns MARC in your sample) """ # leader - leader_text = _req_text(record_el, "marc:leader") + leader_text = _req_text(record_el, "marc:leader") or "" # controlfields controlfields: List[ControlField] = [] @@ -124,8 +124,8 @@ def parse_marc_record(record_el: ET.Element) -> MarcRecord: def parse_record(zs_record_el: ET.Element) -> Record: - recordSchema = _req_text(zs_record_el, "zs:recordSchema") - recordPacking = _req_text(zs_record_el, "zs:recordPacking") + recordSchema = _req_text(zs_record_el, "zs:recordSchema") or "" + recordPacking = _req_text(zs_record_el, "zs:recordPacking") or "" # recordData contains a MARC with default MARC namespace in your sample recordData_el = zs_record_el.find("zs:recordData", NS) @@ -140,7 +140,7 @@ def parse_record(zs_record_el: ET.Element) -> Record: marc_record = parse_marc_record(marc_record_el) - recordPosition = int(_req_text(zs_record_el, "zs:recordPosition")) + recordPosition = int(_req_text(zs_record_el, "zs:recordPosition") or "0") return Record( recordSchema=recordSchema, recordPacking=recordPacking, diff --git a/src/logic/c_sort.py b/src/logic/c_sort.py index ae0285d..aafbc80 100644 --- a/src/logic/c_sort.py +++ b/src/logic/c_sort.py @@ -1,36 +1,4 @@ -def parse_semester(semester: str): - """ - Parses the semester string into a sortable format. - Returns a tuple of (year, type), where type is 0 for SoSe and 1 for WiSe. - """ - if semester.startswith("SoSe"): - return int(semester.split()[1]), 0 - elif semester.startswith("WiSe"): - year_part = semester.split()[1] - start_year, _ = map(int, year_part.split("/")) - return start_year, 1 - else: - raise ValueError(f"Invalid semester format: {semester}") - - -def custom_sort(entries): - """ - Sorts the list of tuples based on the custom schema. - - :param entries: List of tuples in the format (str, int, int). - :return: Sorted list of tuples. - """ - return sorted( - entries, - key=lambda entry: ( - parse_semester(entry[0]), # Sort by semester parsed as (year, type) - entry[1], # Then by the second element of the tuple - entry[2], # Finally by the third element of the tuple - ), - ) - - -def parse_semester(semester: str): +def parse_semester(semester: str) -> tuple[int, int]: """ Parses the semester string into a sortable format. Returns a tuple of (year, type), where type is 0 for SoSe and 1 for WiSe. @@ -48,6 +16,23 @@ def parse_semester(semester: str): raise ValueError(f"Invalid semester format: {semester}") +def custom_sort(entries) -> list: + """ + Sorts the list of tuples based on the custom schema. + + :param entries: List of tuples in the format (str, int, int). + :return: Sorted list of tuples. + """ + return sorted( + entries, + key=lambda entry: ( + parse_semester(entry[0]), # Sort by semester parsed as (year, type) + entry[1], # Then by the second element of the tuple + entry[2], # Finally by the third element of the tuple + ), + ) + + def sort_semesters_list(semesters: list) -> list: """ Sorts a list of semester strings based on year and type. diff --git a/src/logic/constants.py b/src/logic/constants.py index d910e26..94f0916 100644 --- a/src/logic/constants.py +++ b/src/logic/constants.py @@ -30,184 +30,184 @@ PROF_TITLES = [ ] SEMAP_MEDIA_ACCOUNTS = { - "1": "1008000055", - "2": "1008000188", - "3": "1008000211", - "4": "1008000344", - "5": "1008000477", - "6": "1008000500", - "7": "1008000633", - "8": "1008000766", - "9": "1008000899", - "10": "1008000922", - "11": "1008001044", - "12": "1008001177", - "13": "1008001200", - "14": "1008001333", - "15": "1008001466", - "16": "1008001599", - "17": "1008001622", - "18": "1008001755", - "19": "1008001888", - "20": "1008001911", - "21": "1008002033", - "22": "1008002166", - "23": "1008002299", - "24": "1008002322", - "25": "1008002455", - "26": "1008002588", - "27": "1008002611", - "28": "1008002744", - "29": "1008002877", - "30": "1008002900", - "31": "1008003022", - "32": "1008003155", - "33": "1008003288", - "34": "1008003311", - "35": "1008003444", - "36": "1008003577", - "37": "1008003600", - "38": "1008003733", - "39": "1008003866", - "40": "1008003999", - "41": "1008004011", - "42": "1008004144", - "43": "1008004277", - "44": "1008004300", - "45": "1008004433", - "46": "1008004566", - "47": "1008004699", - "48": "1008004722", - "49": "1008004855", - "50": "1008004988", - "51": "1008005000", - "52": "1008005133", - "53": "1008005266", - "54": "1008005399", - "55": "1008005422", - "56": "1008005555", - "57": "1008005688", - "58": "1008005711", - "59": "1008005844", - "60": "1008005977", - "61": "1008006099", - "62": "1008006122", - "63": "1008006255", - "64": "1008006388", - "65": "1008006411", - "66": "1008006544", - "67": "1008006677", - "68": "1008006700", - "69": "1008006833", - "70": "1008006966", - "71": "1008007088", - "72": "1008007111", - "73": "1008007244", - "74": "1008007377", - "75": "1008007400", - "76": "1008007533", - "77": "1008007666", - "78": "1008007799", - "79": "1008007822", - "80": "1008007955", - "81": "1008008077", - "82": "1008008100", - "83": "1008008233", - "84": "1008008366", - "85": "1008008499", - "86": "1008008522", - "87": "1008008655", - "88": "1008008788", - "89": "1008008811", - "90": "1008008944", - "91": "1008009066", - "92": "1008009199", - "93": "1008009222", - "94": "1008009355", - "95": "1008009488", - "96": "1008009511", - "97": "1008009644", - "98": "1008009777", - "99": "1008009800", - "100": "1008009933", - "101": "1008010022", - "102": "1008010155", - "103": "1008010288", - "104": "1008010311", - "105": "1008010444", - "106": "1008010577", - "107": "1008010600", - "108": "1008010733", - "109": "1008010866", - "110": "1008010999", - "111": "1008011011", - "112": "1008011144", - "113": "1008011277", - "114": "1008011300", - "115": "1008011433", - "116": "1008011566", - "117": "1008011699", - "118": "1008011722", - "119": "1008011855", - "120": "1008011988", - "121": "1008012000", - "122": "1008012133", - "123": "1008012266", - "124": "1008012399", - "125": "1008012422", - "126": "1008012555", - "127": "1008012688", - "128": "1008012711", - "129": "1008012844", - "130": "1008012977", - "131": "1008013099", - "132": "1008013122", - "133": "1008013255", - "134": "1008013388", - "135": "1008013411", - "136": "1008013544", - "137": "1008013677", - "138": "1008013700", - "139": "1008013833", - "140": "1008013966", - "141": "1008014088", - "142": "1008014111", - "143": "1008014244", - "144": "1008014377", - "145": "1008014400", - "146": "1008014533", - "147": "1008014666", - "148": "1008014799", - "149": "1008014822", - "150": "1008014955", - "151": "1008015077", - "152": "1008015100", - "153": "1008015233", - "154": "1008015366", - "155": "1008015499", - "156": "1008015522", - "157": "1008015655", - "158": "1008015788", - "159": "1008015811", - "160": "1008015944", - "161": "1008016066", - "162": "1008016199", - "163": "1008016222", - "164": "1008016355", - "165": "1008016488", - "166": "1008016511", - "167": "1008016644", - "168": "1008016777", - "169": "1008016800", - "170": "1008016933", - "171": "1008017055", - "172": "1008017188", - "173": "1008017211", - "174": "1008017344", - "175": "1008017477", - "176": "1008017500", - "177": "1008017633", - "178": "1008017766", - "179": "1008017899", - "180": "1008017922", + 1: "1008000055", + 2: "1008000188", + 3: "1008000211", + 4: "1008000344", + 5: "1008000477", + 6: "1008000500", + 7: "1008000633", + 8: "1008000766", + 9: "1008000899", + 10: "1008000922", + 11: "1008001044", + 12: "1008001177", + 13: "1008001200", + 14: "1008001333", + 15: "1008001466", + 16: "1008001599", + 17: "1008001622", + 18: "1008001755", + 19: "1008001888", + 20: "1008001911", + 21: "1008002033", + 22: "1008002166", + 23: "1008002299", + 24: "1008002322", + 25: "1008002455", + 26: "1008002588", + 27: "1008002611", + 28: "1008002744", + 29: "1008002877", + 30: "1008002900", + 31: "1008003022", + 32: "1008003155", + 33: "1008003288", + 34: "1008003311", + 35: "1008003444", + 36: "1008003577", + 37: "1008003600", + 38: "1008003733", + 39: "1008003866", + 40: "1008003999", + 41: "1008004011", + 42: "1008004144", + 43: "1008004277", + 44: "1008004300", + 45: "1008004433", + 46: "1008004566", + 47: "1008004699", + 48: "1008004722", + 49: "1008004855", + 50: "1008004988", + 51: "1008005000", + 52: "1008005133", + 53: "1008005266", + 54: "1008005399", + 55: "1008005422", + 56: "1008005555", + 57: "1008005688", + 58: "1008005711", + 59: "1008005844", + 60: "1008005977", + 61: "1008006099", + 62: "1008006122", + 63: "1008006255", + 64: "1008006388", + 65: "1008006411", + 66: "1008006544", + 67: "1008006677", + 68: "1008006700", + 69: "1008006833", + 70: "1008006966", + 71: "1008007088", + 72: "1008007111", + 73: "1008007244", + 74: "1008007377", + 75: "1008007400", + 76: "1008007533", + 77: "1008007666", + 78: "1008007799", + 79: "1008007822", + 80: "1008007955", + 81: "1008008077", + 82: "1008008100", + 83: "1008008233", + 84: "1008008366", + 85: "1008008499", + 86: "1008008522", + 87: "1008008655", + 88: "1008008788", + 89: "1008008811", + 90: "1008008944", + 91: "1008009066", + 92: "1008009199", + 93: "1008009222", + 94: "1008009355", + 95: "1008009488", + 96: "1008009511", + 97: "1008009644", + 98: "1008009777", + 99: "1008009800", + 100: "1008009933", + 101: "1008010022", + 102: "1008010155", + 103: "1008010288", + 104: "1008010311", + 105: "1008010444", + 106: "1008010577", + 107: "1008010600", + 108: "1008010733", + 109: "1008010866", + 110: "1008010999", + 111: "1008011011", + 112: "1008011144", + 113: "1008011277", + 114: "1008011300", + 115: "1008011433", + 116: "1008011566", + 117: "1008011699", + 118: "1008011722", + 119: "1008011855", + 120: "1008011988", + 121: "1008012000", + 122: "1008012133", + 123: "1008012266", + 124: "1008012399", + 125: "1008012422", + 126: "1008012555", + 127: "1008012688", + 128: "1008012711", + 129: "1008012844", + 130: "1008012977", + 131: "1008013099", + 132: "1008013122", + 133: "1008013255", + 134: "1008013388", + 135: "1008013411", + 136: "1008013544", + 137: "1008013677", + 138: "1008013700", + 139: "1008013833", + 140: "1008013966", + 141: "1008014088", + 142: "1008014111", + 143: "1008014244", + 144: "1008014377", + 145: "1008014400", + 146: "1008014533", + 147: "1008014666", + 148: "1008014799", + 149: "1008014822", + 150: "1008014955", + 151: "1008015077", + 152: "1008015100", + 153: "1008015233", + 154: "1008015366", + 155: "1008015499", + 156: "1008015522", + 157: "1008015655", + 158: "1008015788", + 159: "1008015811", + 160: "1008015944", + 161: "1008016066", + 162: "1008016199", + 163: "1008016222", + 164: "1008016355", + 165: "1008016488", + 166: "1008016511", + 167: "1008016644", + 168: "1008016777", + 169: "1008016800", + 170: "1008016933", + 171: "1008017055", + 172: "1008017188", + 173: "1008017211", + 174: "1008017344", + 175: "1008017477", + 176: "1008017500", + 177: "1008017633", + 178: "1008017766", + 179: "1008017899", + 180: "1008017922", } diff --git a/src/logic/dataclass.py b/src/logic/dataclass.py index 01a95c2..ffe22f0 100644 --- a/src/logic/dataclass.py +++ b/src/logic/dataclass.py @@ -37,7 +37,7 @@ class Prof: self._title = value # add function that sets the data from a tuple - def from_tuple(self, data: tuple[Union[str, int], ...]): + def from_tuple(self, data: tuple[Union[str, int], ...]) -> "Prof": setattr(self, "id", data[0]) setattr(self, "_title", data[1]) setattr(self, "firstname", data[2]) @@ -222,6 +222,7 @@ class Subjects(Enum): for i in cls: if i.name == name: return i.id - 1 + return None @dataclass diff --git a/src/logic/lehmannsapi.py b/src/logic/lehmannsapi.py index 4be1495..e17d164 100644 --- a/src/logic/lehmannsapi.py +++ b/src/logic/lehmannsapi.py @@ -134,10 +134,10 @@ class LehmannsClient: enriched.append(r) continue - soup = BeautifulSoup(html, "html.parser") + soup = BeautifulSoup(html, "html.parser") # type: ignore # Pages - pages_node = soup.select_one( + pages_node = soup.select_one( # type: ignore "span.book-meta.meta-seiten[itemprop='numberOfPages'], " "span.book-meta.meta-seiten[itemprop='numberofpages'], " ".meta-seiten [itemprop='numberOfPages'], " @@ -151,7 +151,7 @@ class LehmannsClient: r.pages = f"{m.group(0)} Seiten" # Availability via li.availability-3 - avail_li = soup.select_one("li.availability-3") + avail_li = soup.select_one("li.availability-3") # type: ignore if avail_li: avail_text = " ".join( avail_li.get_text(" ", strip=True).split() @@ -200,12 +200,12 @@ class LehmannsClient: if not a: continue url = urljoin(BASE, a["href"].strip()) - base_title = (block.select_one(".title [itemprop='name']") or a).get_text( + base_title = (block.select_one(".title [itemprop='name']") or a).get_text( # type: ignore strip=True ) # Alternative headline => extend title - alt_tag = block.select_one(".description[itemprop='alternativeHeadline']") + alt_tag = block.select_one(".description[itemprop='alternativeHeadline']") # type: ignore alternative_headline = alt_tag.get_text(strip=True) if alt_tag else None title = ( f"{base_title} : {alternative_headline}" @@ -216,7 +216,7 @@ class LehmannsClient: # Authors from .author authors: list[str] = [] - author_div = block.select_one("div.author") + author_div = block.select_one("div.author") # type: ignore if author_div: t = author_div.get_text(" ", strip=True) t = re.sub(r"^\s*von\s+", "", t, flags=re.I) @@ -228,7 +228,7 @@ class LehmannsClient: # Media + format media_type = None book_format = None - type_text = block.select_one(".type") + type_text = block.select_one(".type") # type: ignore if type_text: t = type_text.get_text(" ", strip=True) m = re.search(r"\b(Buch|eBook|Hörbuch)\b", t) @@ -240,7 +240,7 @@ class LehmannsClient: # Year year = None - y = block.select_one("[itemprop='copyrightYear']") + y = block.select_one("[itemprop='copyrightYear']") # type: ignore if y: try: year = int(y.get_text(strip=True)) @@ -249,7 +249,7 @@ class LehmannsClient: # Edition edition = None - ed = block.select_one("[itemprop='bookEdition']") + ed = block.select_one("[itemprop='bookEdition']") # type: ignore if ed: m = re.search(r"\d+", ed.get_text(strip=True)) if m: @@ -257,15 +257,15 @@ class LehmannsClient: # Publisher publisher = None - pub = block.select_one( + pub = block.select_one( # type: ignore ".publisherprop [itemprop='name']" - ) or block.select_one(".publisher [itemprop='name']") + ) or block.select_one(".publisher [itemprop='name']") # type: ignore if pub: publisher = pub.get_text(strip=True) # ISBN-13 isbn13 = None - isbn_tag = block.select_one(".isbn [itemprop='isbn'], [itemprop='isbn']") + isbn_tag = block.select_one(".isbn [itemprop='isbn'], [itemprop='isbn']") # type: ignore if isbn_tag: digits = re.sub(r"[^0-9Xx]", "", isbn_tag.get_text(strip=True)) m = re.search(r"(97[89]\d{10})", digits) @@ -288,7 +288,7 @@ class LehmannsClient: # Image (best-effort) image = None - left_img = block.find_previous("img") + left_img = block.find_previous("img") # type: ignore if left_img and left_img.get("src"): image = urljoin(BASE, left_img["src"]) diff --git a/src/logic/openai.py b/src/logic/openai.py index 4fda61d..715be68 100644 --- a/src/logic/openai.py +++ b/src/logic/openai.py @@ -1,10 +1,12 @@ -from openai import OpenAI -from src import settings import json +from typing import Any + +from openai import OpenAI + +from src import settings - -def init_client(): +def init_client() -> OpenAI: """Initialize the OpenAI client with the API key and model from settings.""" global client, model, api_key if not settings.openAI.api_key: @@ -16,9 +18,11 @@ def init_client(): api_key = settings.openAI.api_key client = OpenAI(api_key=api_key) return client -def run_shortener(title:str, length:int): + + +def run_shortener(title: str, length: int) -> list[dict[str, Any]]: client = init_client() - response = client.responses.create( + response = client.responses.create( # type: ignore model=model, instructions="""you are a sentence shortener. The next message will contain the string to shorten and the length limit. You need to shorten the string to be under the length limit, while keeping as much detail as possible. The result may NOT be longer than the length limit. @@ -27,27 +31,28 @@ based on that, please reply only the shortened string. Give me 5 choices. if the ) answers = response.output_text return eval(answers) # type: ignore - #answers are strings in json format, so we need to convert them to a list of dicts + # answers are strings in json format, so we need to convert them to a list of dicts -def name_tester(name: str): +def name_tester(name: str) -> dict: client = init_client() - response = client.responses.create( - model = model, + response = client.responses.create( # type: ignore + model=model, instructions="""you are a name tester, You are given a name and will have to split the name into first name, last name, and if present the title. Return the name in a json format with the keys "title", "first_name", "last_name". If no title is present, set title to none. Do NOt return the answer in a codeblock, use a pure json string. Assume the names are in the usual german naming scheme""", - input = f'{{"name":"{name}"}}' + input=f'{{"name":"{name}"}}', ) answers = response.output_text return json.loads(answers) -def semester_converter(semester:str): + +def semester_converter(semester: str) -> str: client = init_client() - response = client.responses.create( - model = model, + response = client.responses.create( # type: ignore + model=model, instructions="""you are a semester converter. You will be given a string. Convert this into a string like this: SoSe YY or WiSe YY/YY+1. Do not return the answer in a codeblock, use a pure string.""", - input = semester + input=semester, ) answers = response.output_text - return answers \ No newline at end of file + return answers diff --git a/src/logic/pdfparser.py b/src/logic/pdfparser.py index 07c9409..de5e87a 100644 --- a/src/logic/pdfparser.py +++ b/src/logic/pdfparser.py @@ -1,10 +1,9 @@ # add depend path to system path -import pandas as pd from pdfquery import PDFQuery -def pdf_to_csv(path: str) -> pd.DataFrame: +def pdf_to_csv(path: str) -> str: """ Extracts the data from a pdf file and returns it as a pandas dataframe """ diff --git a/src/logic/semester.py b/src/logic/semester.py index 0a529ca..08e2b03 100644 --- a/src/logic/semester.py +++ b/src/logic/semester.py @@ -232,3 +232,17 @@ if __name__ == "__main__": # print("generate_missing:", [str(s) for s in chain]) # Parsing demo --------------------------------------------------------- + examples = [ + "SoSe 6", + "WiSe 6/7", + "WiSe 6", + "SoSe 23", + "WiSe 23/24", + "WiSe 24", + "WiSe 99/00", + "SoSe 00", + "WiSe 100/101", # test large year + ] + for ex in examples: + parsed = Semester.from_string(ex) + print(f"'{ex}' → {parsed} ({parsed.year=}, {parsed.semester=})") diff --git a/src/logic/settings.py b/src/logic/settings.py index 3b4754b..2cab463 100644 --- a/src/logic/settings.py +++ b/src/logic/settings.py @@ -13,7 +13,7 @@ class Settings: default_apps: bool = True custom_applications: list[dict] = field(default_factory=list) - def save_settings(self): + def save_settings(self) -> None: """Save the settings to the config file.""" with open("config.yaml", "w") as f: yaml.dump(self.__dict__, f) diff --git a/src/logic/webrequest.py b/src/logic/webrequest.py index cdded76..acd93b4 100644 --- a/src/logic/webrequest.py +++ b/src/logic/webrequest.py @@ -51,14 +51,14 @@ class WebRequest: log.info("Using any book") return self - def set_apparat(self, apparat: int): + def set_apparat(self, apparat: int) -> "WebRequest": self.apparat = apparat if int(self.apparat) < 10: self.apparat = f"0{self.apparat}" log.info(f"Set apparat to {self.apparat}") return self - def get_ppn(self, signature: str): + def get_ppn(self, signature: str) -> "WebRequest": self.signature = signature if "+" in signature: signature = signature.replace("+", "%2B") @@ -90,7 +90,7 @@ class WebRequest: @sleep_and_retry @limits(calls=RATE_LIMIT, period=RATE_PERIOD) - def search(self, link: str): + def search(self, link: str) -> Optional[str]: try: response = requests.get(link, timeout=self.timeout) return response.text @@ -98,7 +98,7 @@ class WebRequest: log.error(f"Request failed: {e}") return None - def get_data(self) -> Union[list[str], None]: + def get_data(self) -> Optional[list[str]]: links = self.get_book_links(self.ppn) log.debug(f"Links: {links}") return_data: list[str] = [] @@ -156,7 +156,7 @@ class WebRequest: return return_data - def get_data_elsa(self): + def get_data_elsa(self) -> Optional[list[str]]: links = self.get_book_links(self.ppn) for link in links: result = self.search(link) @@ -197,12 +197,12 @@ class BibTextTransformer: self.data = None # self.bookdata = BookData(**self.data) - def use_signature(self, signature: str): + def use_signature(self, signature: str) -> "BibTextTransformer": """use the exact signature to search for the book""" self.signature = signature return self - def get_data(self, data: Union[list[str]] = None) -> "BibTextTransformer": + def get_data(self, data: Optional[list[str]] = None) -> "BibTextTransformer": RIS_IDENT = "TY -" ARRAY_IDENT = "[kid]" COinS_IDENT = "ctx_ver" diff --git a/src/logic/wordparser.py b/src/logic/wordparser.py index a548b48..f3030cb 100644 --- a/src/logic/wordparser.py +++ b/src/logic/wordparser.py @@ -1,5 +1,5 @@ import zipfile -from typing import Any +from typing import Any, Optional import fitz # PyMuPDF import pandas as pd @@ -35,7 +35,7 @@ def word_docx_to_csv(path: str) -> list[pd.DataFrame]: return m_data -def get_fach(path: str) -> str: +def get_fach(path: str) -> Optional[str]: document = zipfile.ZipFile(path) xml_data = document.read("word/document.xml") document.close() @@ -49,10 +49,12 @@ def get_fach(path: str) -> str: # get the data in the w:t for run in para.find_all("w:r"): data = run.find("w:t") - return data.contents[0] + if data and data.contents: + return data.contents[0] + return None -def makeDict(): +def makeDict() -> dict[str, Optional[str]]: return { "work_author": None, "section_author": None, @@ -70,8 +72,8 @@ def makeDict(): } -def tuple_to_dict(tlist: tuple, type: str) -> dict: - ret = [] +def tuple_to_dict(tlist: tuple, type: str) -> list[dict[str, Optional[str]]]: + ret: list[dict[str, Optional[str]]] = [] for line in tlist: data = makeDict() if type == "Monografien": @@ -111,7 +113,7 @@ def tuple_to_dict(tlist: tuple, type: str) -> dict: return ret -def elsa_word_to_csv(path: str): +def elsa_word_to_csv(path: str) -> tuple[list[dict[str, Optional[str]]], str]: doc = Document(path) # # print all lines in doc doctype = [para.text for para in doc.paragraphs if para.text != ""][-1] diff --git a/src/logic/xmlparser.py b/src/logic/xmlparser.py index e16471f..a53fb76 100644 --- a/src/logic/xmlparser.py +++ b/src/logic/xmlparser.py @@ -56,8 +56,8 @@ def eml_parser(path: str) -> XMLMailSubmission: return parse_xml_submission(xml_content) -def eml_to_semap(path: str) -> SemapDocument: - submission = eml_parser(path) +def eml_to_semap(xml_mail: XMLMailSubmission) -> SemapDocument: + submission = eml_parser(xml_mail) semap_doc = SemapDocument( # prof=Prof(name=submission.name, lastname=submission.lastname, email=submission.email), apparat=Apparat(name=submission.app_name, subject=submission.subject), diff --git a/src/logic/zotero.py b/src/logic/zotero.py index 6c2de8b..e5847d1 100644 --- a/src/logic/zotero.py +++ b/src/logic/zotero.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from typing import Optional from pyzotero import zotero @@ -12,11 +13,11 @@ class Creator: lastName: str = None creatorType: str = "author" - def from_dict(self, data: dict): + def from_dict(self, data: dict) -> None: for key, value in data.items(): setattr(self, key, value) - def from_string(self, data: str): + def from_string(self, data: str) -> "Creator": if "," in data: self.firstName = data.split(",")[1] self.lastName = data.split(",")[0] @@ -56,7 +57,7 @@ class Book: rights: str = None extra: str = None - def to_dict(self): + def to_dict(self) -> dict: ret = {} for key, value in self.__dict__.items(): if value: @@ -95,14 +96,14 @@ class BookSection: collections = list relations = dict - def to_dict(self): + def to_dict(self) -> dict: ret = {} for key, value in self.__dict__.items(): if value: ret[key] = value return ret - def assign(self, book): + def assign(self, book) -> None: for key, value in book.__dict__.items(): if key in self.__dict__.keys(): try: @@ -142,14 +143,14 @@ class JournalArticle: collections = list relations = dict - def to_dict(self): + def to_dict(self) -> dict: ret = {} for key, value in self.__dict__.items(): if value: ret[key] = value return ret - def assign(self, book: dict): + def assign(self, book: dict) -> None: for key, value in book.__dict__.items(): if key in self.__dict__.keys(): try: @@ -164,15 +165,15 @@ class ZoteroController: def __init__(self): if self.zoterocfg.library_id is None: return - self.zot = zotero.Zotero( + self.zot = zotero.Zotero( # type: ignore self.zoterocfg.library_id, self.zoterocfg.library_type, self.zoterocfg.api_key, ) - def get_books(self): + def get_books(self) -> list: ret = [] - items = self.zot.top() + items = self.zot.top() # type: ignore for item in items: if item["data"]["itemType"] == "book": ret.append(item) @@ -180,7 +181,7 @@ class ZoteroController: # create item in zotero # item is a part of a book - def __get_data(self, isbn): + def __get_data(self, isbn) -> dict: web = WebRequest() web.get_ppn(isbn) data = web.get_data_elsa() @@ -190,7 +191,7 @@ class ZoteroController: return book # # #print(zot.item_template("bookSection")) - def createBook(self, isbn): + def createBook(self, isbn) -> Book: book = self.__get_data(isbn) bookdata = Book() @@ -209,23 +210,23 @@ class ZoteroController: bookdata.creators = authors return bookdata - def createItem(self, item): - resp = self.zot.create_items([item]) + def createItem(self, item) -> Optional[str]: + resp = self.zot.create_items([item]) # type: ignore if "successful" in resp.keys(): # #print(resp["successful"]["0"]["key"]) return resp["successful"]["0"]["key"] else: return None - def deleteItem(self, key): + def deleteItem(self, key) -> None: items = self.zot.items() for item in items: if item["key"] == key: - self.zot.delete_item(item) + self.zot.delete_item(item) # type: ignore # #print(item) break - def createHGSection(self, book: Book, data: dict): + def createHGSection(self, book: Book, data: dict) -> Optional[str]: chapter = BookSection() chapter.assign(book) chapter.pages = data["pages"] @@ -247,7 +248,7 @@ class ZoteroController: return self.createItem(chapter.to_dict()) pass - def createBookSection(self, book: Book, data: dict): + def createBookSection(self, book: Book, data: dict) -> Optional[str]: chapter = BookSection() chapter.assign(book) chapter.pages = data["pages"] @@ -258,7 +259,7 @@ class ZoteroController: return self.createItem(chapter.to_dict()) # chapter.creators - def createJournalArticle(self, journal, article): + def createJournalArticle(self, journal, article) -> Optional[str]: # #print(type(article)) journalarticle = JournalArticle() journalarticle.assign(journal) @@ -279,8 +280,8 @@ class ZoteroController: return self.createItem(journalarticle.to_dict()) - def get_citation(self, item): - title = self.zot.item( + def get_citation(self, item) -> str: + title = self.zot.item( # type: ignore item, content="bib", style="deutsche-gesellschaft-fur-psychologie", diff --git a/src/ui/dialogs/Ui_medianadder.ts b/src/ui/dialogs/Ui_medianadder.ts deleted file mode 100644 index 6401616..0000000 --- a/src/ui/dialogs/Ui_medianadder.ts +++ /dev/null @@ -1,4 +0,0 @@ - - - - diff --git a/src/ui/dialogs/docuprint.py b/src/ui/dialogs/docuprint.py index 3ffaacd..750fc10 100644 --- a/src/ui/dialogs/docuprint.py +++ b/src/ui/dialogs/docuprint.py @@ -110,10 +110,10 @@ class DocumentPrintDialog(QtWidgets.QDialog, Ui_Dialog): def on_pushButton_clicked(self): apparats: list[tuple[int, str]] = [] apps = self.db.getAllAparats(0) - apps = natsorted(apps, key=lambda x: x[4], reverse=True) + apps = natsorted(apps, key=lambda x: x.appnr, reverse=True) for app in apps: - prof = self.db.getProfById(app[2]) - data = (app[4], f"{prof.lastname} ({app[1]})") + prof = self.db.getProfById(app.prof_id) + data = (app.appnr, f"{prof.lastname} ({app.name})") apparats.append(data) SemesterDocument( semester=self.semester.value, diff --git a/src/ui/semesterapparat_ui.ui b/src/ui/semesterapparat_ui.ui index 615f845..2138d35 100644 --- a/src/ui/semesterapparat_ui.ui +++ b/src/ui/semesterapparat_ui.ui @@ -1349,7 +1349,7 @@ Die Apparatsdetails werden aus dem Dokument gelesen und eingetragen -Einige Angaben müssen ggf angepasst werden +Die gewünschten Medien werden automatisch in die Medienliste eingetragen, evtl. unvollständig, da eBooks nicht erfasst werden könnenEinige Angaben müssen ggf angepasst werden Daten aus Dokument @@ -1618,72 +1618,72 @@ Einige Angaben müssen ggf angepasst werden Admin - + - 10 - 30 - 47 - 22 + 0 + 0 + 1251 + 711 - - Aktion: + + QFrame::StyledPanel - - - - - 60 - 30 - 181 - 22 - - - - - Nutzer anlegen - - - - - Nutzer bearbeiten - - - - - Lehrperson bearbeiten - - - - - Medien bearbeiten - - - - - - - 10 - 70 - 570 - 291 - - - - - false - - - - GroupBox - - - true - - - false + + QFrame::Raised + + + + + Aktion: + + + + + + + + Nutzer anlegen + + + + + Nutzer bearbeiten + + + + + Lehrperson bearbeiten + + + + + Medien bearbeiten + + + + + + + + + false + + + + GroupBox + + + true + + + false + + + + diff --git a/src/ui/semesterapparat_ui_ui.py b/src/ui/semesterapparat_ui_ui.py index be2d9e9..adfe069 100644 --- a/src/ui/semesterapparat_ui_ui.py +++ b/src/ui/semesterapparat_ui_ui.py @@ -638,24 +638,37 @@ class Ui_MainWindow(object): self.tabWidget.addTab(self.elsatab, "") self.admin = QWidget() self.admin.setObjectName(u"admin") - self.label_21 = QLabel(self.admin) + self.frame = QFrame(self.admin) + self.frame.setObjectName(u"frame") + self.frame.setGeometry(QRect(0, 0, 1251, 711)) + self.frame.setFrameShape(QFrame.StyledPanel) + self.frame.setFrameShadow(QFrame.Raised) + self.formLayout_2 = QFormLayout(self.frame) + self.formLayout_2.setObjectName(u"formLayout_2") + self.label_21 = QLabel(self.frame) self.label_21.setObjectName(u"label_21") - self.label_21.setGeometry(QRect(10, 30, 47, 22)) - self.select_action_box = QComboBox(self.admin) + + self.formLayout_2.setWidget(0, QFormLayout.ItemRole.LabelRole, self.label_21) + + self.select_action_box = QComboBox(self.frame) self.select_action_box.addItem("") self.select_action_box.addItem("") self.select_action_box.addItem("") self.select_action_box.addItem("") self.select_action_box.setObjectName(u"select_action_box") - self.select_action_box.setGeometry(QRect(60, 30, 181, 22)) - self.admin_action = QGroupBox(self.admin) + + self.formLayout_2.setWidget(0, QFormLayout.ItemRole.FieldRole, self.select_action_box) + + self.admin_action = QGroupBox(self.frame) self.admin_action.setObjectName(u"admin_action") - self.admin_action.setGeometry(QRect(10, 70, 570, 291)) font5 = QFont() font5.setBold(False) self.admin_action.setFont(font5) self.admin_action.setFlat(True) self.admin_action.setCheckable(False) + + self.formLayout_2.setWidget(1, QFormLayout.ItemRole.FieldRole, self.admin_action) + self.tabWidget.addTab(self.admin, "") self.gridLayout.addWidget(self.tabWidget, 0, 0, 1, 1) @@ -963,7 +976,7 @@ class Ui_MainWindow(object): " hinzuf\u00fcgen", None)) #if QT_CONFIG(tooltip) self.btn_extract_data_from_document.setToolTip(QCoreApplication.translate("MainWindow", u"Die Apparatsdetails werden aus dem Dokument gelesen und eingetragen\n" -"Einige Angaben m\u00fcssen ggf angepasst werden", None)) +"Die gew\u00fcnschten Medien werden automatisch in die Medienliste eingetragen, evtl. unvollst\u00e4ndig, da eBooks nicht erfasst werden k\u00f6nnenEinige Angaben m\u00fcssen ggf angepasst werden", None)) #endif // QT_CONFIG(tooltip) self.btn_extract_data_from_document.setText(QCoreApplication.translate("MainWindow", u"Daten aus Dokument\n" "\u00fcbernehmen", None)) diff --git a/src/ui/userInterface.py b/src/ui/userInterface.py index a5c0b8f..efac025 100644 --- a/src/ui/userInterface.py +++ b/src/ui/userInterface.py @@ -373,7 +373,7 @@ class Ui(QtWidgets.QMainWindow, Ui_Semesterapparat): self.setWidget(UpdateSignatures()) self.admin_action.setTitle("Medien bearbeiten") else: - self.hideWidget() + # self.hideWidget() self.admin_action.setTitle("") def toggleButton(self, button: QtWidgets.QCheckBox): @@ -1224,12 +1224,14 @@ class Ui(QtWidgets.QMainWindow, Ui_Semesterapparat): signatures = csv_to_list(file) # add the data to the database return signatures - if file_type == "docx": + if file_type in ("docx", "doc"): data = word_to_semap(file) log.info("Converted data from semap file") log.debug("Got the data: {}", data) return data + else: + raise ValueError("Dateityp wird nicht unterstützt") def import_data_from_document(self): global valid_input @@ -1241,6 +1243,7 @@ class Ui(QtWidgets.QMainWindow, Ui_Semesterapparat): self.prof_mail.setText(data.mail) self.prof_tel_nr.setText(str(data.phoneNumber).replace("-", "")) + self.app_name.setText(data.title) if len(data.title_suggestions) > 0: # create a dialog that has a dropdown with the suggestions, and oc and cancel button. on ok return the selected text and set it as title dialog = QtWidgets.QDialog() @@ -1271,6 +1274,7 @@ class Ui(QtWidgets.QMainWindow, Ui_Semesterapparat): self.app_name.setText(dropdown.currentText().split(" [")[0].strip()) else: self.app_name.setText("CHANGEME") + # self.app_name.setText(data.title) subjects = self.db.getSubjects() subjects = [subject[1] for subject in subjects] @@ -1287,8 +1291,10 @@ class Ui(QtWidgets.QMainWindow, Ui_Semesterapparat): if data.eternal: self.check_eternal_app.setChecked(True) self.validate_semester() + if data.books != []: + self.btn_check_file_threaded(data) - def btn_check_file_threaded(self): + def btn_check_file_threaded(self, c_document: Optional[SemapDocument] = None): for runner in self.bookGrabber: if not runner.isRunning(): runner.deleteLater() @@ -1335,7 +1341,10 @@ class Ui(QtWidgets.QMainWindow, Ui_Semesterapparat): prof_id = self.db.getProfId(self.profdata) # log.debug("Prof ID is None", prof_id) - document = self.extract_document_data() + document = None + + if c_document is None or not isinstance(c_document, SemapDocument): + document = self.extract_document_data() if document is None: log.error("Document is None") elif isinstance(document, SemapDocument): @@ -1410,7 +1419,7 @@ class Ui(QtWidgets.QMainWindow, Ui_Semesterapparat): ) prof.title = self.prof_title.text() apparat = Apparat( - appnr=self.active_apparat, + appnr=int(self.drpdwn_app_nr.currentText()), name=self.app_name.text(), created_semester=self.generateSemester(), eternal=1 if self.check_eternal_app.isChecked() else 0, @@ -1433,7 +1442,8 @@ class Ui(QtWidgets.QMainWindow, Ui_Semesterapparat): return appdata = self.db.getAllAparats() # merge self.appdata and appdata, remove duplicates - self.apparats = list(set(self.apparats + appdata)) + + self.apparats = self.__uniques(self.apparats, appdata) self.apparats = natsorted(self.apparats, key=lambda x: x[4], reverse=True) self.update_apparat_list() @@ -1452,6 +1462,16 @@ class Ui(QtWidgets.QMainWindow, Ui_Semesterapparat): self.__clear_fields() return True + def __uniques(self, list1, list2): + seen = set() + unique_list = [] + for item in list1 + list2: + identifier = (item.appnr, item.name) + if identifier not in seen: + seen.add(identifier) + unique_list.append(item) + return unique_list + def send_mail_preview(self): pass diff --git a/src/ui/widgets/filepicker.py b/src/ui/widgets/filepicker.py index e8a899b..3aaefb5 100644 --- a/src/ui/widgets/filepicker.py +++ b/src/ui/widgets/filepicker.py @@ -21,7 +21,7 @@ class FilePicker: files, _ = filepicker.getOpenFileNames( caption="Open file", dir=self.last_path, - filter="Unterstützte Dateien (*.docx *.csv *.eml );;Word (*.docx);;CSV Files (*.csv);;Mail (*.eml)", + filter="Unterstützte Dateien (*.docx *.doc *.csv *.eml );;Word (*.docx *.doc);;CSV Files (*.csv);;Mail (*.eml)", ) if files: self.last_path = files[0] diff --git a/src/utils/blob.py b/src/utils/blob.py index f2c8070..d991b67 100644 --- a/src/utils/blob.py +++ b/src/utils/blob.py @@ -1,4 +1,4 @@ -def create_blob(file: str): +def create_blob(file: str) -> bytes: """ Creates a blob from a file. """ diff --git a/src/utils/documentation.py b/src/utils/documentation.py index 88789e5..9caa9be 100644 --- a/src/utils/documentation.py +++ b/src/utils/documentation.py @@ -1,9 +1,10 @@ -import os -from pyramid.config import Configurator -from wsgiref.simple_server import WSGIRequestHandler -from src import LOG_DIR import logging +import os +from wsgiref.simple_server import WSGIRequestHandler +from pyramid.config import Configurator + +from src import LOG_DIR log_path = os.path.join(LOG_DIR, "web_documentation.log") @@ -31,7 +32,7 @@ class QuietHandler(WSGIRequestHandler): pass -def website(): +def website() -> object: config = Configurator() # Set up static file serving from the 'site/' directory @@ -40,4 +41,4 @@ def website(): ) app = config.make_wsgi_app() - return app + return app # type: ignore diff --git a/src/utils/pickles.py b/src/utils/pickles.py index e943a0d..a35c206 100644 --- a/src/utils/pickles.py +++ b/src/utils/pickles.py @@ -2,9 +2,9 @@ import pickle from typing import Any -def load_pickle(data: Any): +def load_pickle(data: Any) -> Any: return pickle.loads(data) -def dump_pickle(data: Any): +def dump_pickle(data: Any) -> bytes: return pickle.dumps(data) diff --git a/src/utils/richtext.py b/src/utils/richtext.py index 9b26489..bd163cd 100644 --- a/src/utils/richtext.py +++ b/src/utils/richtext.py @@ -16,7 +16,7 @@ logger = log font = "Cascadia Mono" -def print_document(file: str): +def print_document(file: str) -> None: # send document to printer as attachment of email import smtplib from email.mime.application import MIMEApplication @@ -98,7 +98,7 @@ class SemesterDocument: self.filename = filename if full: log.info("Full document generation") - self.cleanup() + self.cleanup log.info("Cleanup done") self.make_document() log.info("Document created") @@ -221,15 +221,15 @@ class SemesterDocument: self.create_sorted_table() - def save_document(self, name): + def save_document(self, name: str) -> None: # Save the document self.doc.save(name) - def create_pdf(self): + def create_pdf(self) -> None: # Save the document import comtypes.client - word = comtypes.client.CreateObject("Word.Application") + word = comtypes.client.CreateObject("Word.Application") # type: ignore self.save_document(self.filename + ".docx") docpath = os.path.abspath(self.filename + ".docx") doc = word.Documents.Open(docpath) @@ -240,13 +240,13 @@ class SemesterDocument: log.debug("PDF saved") @property - def cleanup(self): + def cleanup(self) -> None: if os.path.exists(f"{self.filename}.docx"): os.remove(f"{self.filename}.docx") os.remove(f"{self.filename}.pdf") @property - def send(self): + def send(self) -> None: print_document(self.filename + ".pdf") log.debug("Document sent to printer") @@ -309,11 +309,11 @@ class SemapSchilder: self.doc.save(f"{self.filename}.docx") log.debug(f"Document saved as {self.filename}.docx") - def create_pdf(self): + def create_pdf(self) -> None: # Save the document import comtypes.client - word = comtypes.client.CreateObject("Word.Application") + word = comtypes.client.CreateObject("Word.Application") # type: ignore self.save_document() docpath = os.path.abspath(f"{self.filename}.docx") doc = word.Documents.Open(docpath) @@ -323,14 +323,14 @@ class SemapSchilder: word.Quit() log.debug("PDF saved") - def cleanup(self): + def cleanup(self) -> None: if os.path.exists(f"{self.filename}.docx"): os.remove(f"{self.filename}.docx") if os.path.exists(f"{self.filename}.pdf"): os.remove(f"{self.filename}.pdf") @property - def send(self): + def send(self) -> None: print_document(self.filename + ".pdf") log.debug("Document sent to printer") -- 2.49.1