Files
SemesterapparatsManager/src/logic/xmlparser.py
WorldTeacher 0406fe4f6f Refactor and enhance type hints across multiple modules
- Updated the `from_tuple` method in `Prof` class to specify return type.
- Added type hints for various methods in `LehmannsClient`, `OpenAI`, `WebRequest`, and `ZoteroController` classes to improve code clarity and type safety.
- Modified `pdf_to_csv` function to return a string instead of a DataFrame.
- Enhanced error handling and type hints in `wordparser` and `xmlparser` modules.
- Removed unused UI file `Ui_medianadder.ts`.
- Improved the layout and structure of the `semesterapparat_ui` to enhance user experience.
- Updated file picker to support `.doc` files in addition to `.docx`.
- Added unique item handling in `Ui` class to prevent duplicates in apparat list.
- General code cleanup and consistency improvements across various files.
2025-10-21 09:09:54 +02:00

68 lines
2.6 KiB
Python

import xml.etree.ElementTree as ET
from src.logic.dataclass import Apparat, BookData, SemapDocument, XMLMailSubmission
from src.logic.semester import Semester
def parse_xml_submission(xml_string: str) -> XMLMailSubmission:
"""
Parse an XML string representing a mail submission and return an XMLMailSubmission object.
"""
submission = XMLMailSubmission()
root = ET.fromstring(xml_string)
static_data = root.find("static")
static_info = {child.tag: child.text for child in static_data}
books = root.find("books")
books_info = []
for book in books:
book_details = {detail.tag: detail.text for detail in book}
book = BookData(
author=book_details.get("authorname"),
year=book_details.get("year").split("/")[0]
if "/" in book_details.get("year")
else book_details.get("year"),
edition=book_details.get("year").split("/")[1]
if "/" in book_details.get("year")
else None,
title=book_details.get("title"),
signature=book_details.get("signature"),
)
books_info.append(book)
# Extract static data
submission.name = static_info.get("name")
submission.lastname = static_info.get("lastname")
submission.title = static_info.get("title")
submission.telno = int(static_info.get("telno"))
submission.email = static_info.get("mail")
submission.app_name = static_info.get("apparatsname")
submission.subject = static_info.get("subject")
sem_year = static_info.get("semester").split()[1]
sem_term = static_info.get("semester").split()[0]
submission.semester = Semester(semester=sem_term, year=int(sem_year))
submission.books = books_info
# Extract book information
# book_info = []
# for book in books:
# book_details = {detail.tag: detail.text for detail in book}
# book_info.append(book_details)
return submission
def eml_parser(path: str) -> XMLMailSubmission:
with open(path, "r", encoding="utf-8") as file:
xml_content = file.read().split("\n\n", 1)[1] # Skip headers
print("EML content loaded, parsing XML...")
print(xml_content)
return parse_xml_submission(xml_content)
def eml_to_semap(xml_mail: XMLMailSubmission) -> SemapDocument:
submission = eml_parser(xml_mail)
semap_doc = SemapDocument(
# prof=Prof(name=submission.name, lastname=submission.lastname, email=submission.email),
apparat=Apparat(name=submission.app_name, subject=submission.subject),
semester=submission.semester,
books=submission.books,
)
return semap_doc