Files
SemesterapparatsManager/src/parsers/xml_parser.py

68 lines
2.6 KiB
Python

import xml.etree.ElementTree as ET
from src.core.models import Apparat, BookData, SemapDocument, XMLMailSubmission
from src.core.semester import Semester
def parse_xml_submission(xml_string: str) -> XMLMailSubmission:
"""
Parse an XML string representing a mail submission and return an XMLMailSubmission object.
"""
submission = XMLMailSubmission()
root = ET.fromstring(xml_string)
static_data = root.find("static")
static_info = {child.tag: child.text for child in static_data}
books = root.find("books")
books_info = []
for book in books:
book_details = {detail.tag: detail.text for detail in book}
book = BookData(
author=book_details.get("authorname"),
year=book_details.get("year").split("/")[0]
if "/" in book_details.get("year")
else book_details.get("year"),
edition=book_details.get("year").split("/")[1]
if "/" in book_details.get("year")
else None,
title=book_details.get("title"),
signature=book_details.get("signature"),
)
books_info.append(book)
# Extract static data
submission.name = static_info.get("name")
submission.lastname = static_info.get("lastname")
submission.title = static_info.get("title")
submission.telno = int(static_info.get("telno"))
submission.email = static_info.get("mail")
submission.app_name = static_info.get("apparatsname")
submission.subject = static_info.get("subject")
sem_year = static_info.get("semester").split()[1]
sem_term = static_info.get("semester").split()[0]
submission.semester = Semester(semester=sem_term, year=int(sem_year))
submission.books = books_info
# Extract book information
# book_info = []
# for book in books:
# book_details = {detail.tag: detail.text for detail in book}
# book_info.append(book_details)
return submission
def eml_parser(path: str) -> XMLMailSubmission:
with open(path, "r", encoding="utf-8") as file:
xml_content = file.read().split("\n\n", 1)[1] # Skip headers
print("EML content loaded, parsing XML...")
print(xml_content)
return parse_xml_submission(xml_content)
def eml_to_semap(xml_mail: XMLMailSubmission) -> SemapDocument:
submission = eml_parser(xml_mail)
semap_doc = SemapDocument(
# prof=Prof(name=submission.name, lastname=submission.lastname, email=submission.email),
apparat=Apparat(name=submission.app_name, subject=submission.subject),
semester=submission.semester,
books=submission.books,
)
return semap_doc