68 lines
2.6 KiB
Python
68 lines
2.6 KiB
Python
import xml.etree.ElementTree as ET
|
|
|
|
from src.core.models import Apparat, BookData, SemapDocument, XMLMailSubmission
|
|
from src.core.semester import Semester
|
|
|
|
|
|
def parse_xml_submission(xml_string: str) -> XMLMailSubmission:
|
|
"""
|
|
Parse an XML string representing a mail submission and return an XMLMailSubmission object.
|
|
"""
|
|
submission = XMLMailSubmission()
|
|
root = ET.fromstring(xml_string)
|
|
static_data = root.find("static")
|
|
static_info = {child.tag: child.text for child in static_data}
|
|
books = root.find("books")
|
|
books_info = []
|
|
for book in books:
|
|
book_details = {detail.tag: detail.text for detail in book}
|
|
book = BookData(
|
|
author=book_details.get("authorname"),
|
|
year=book_details.get("year").split("/")[0]
|
|
if "/" in book_details.get("year")
|
|
else book_details.get("year"),
|
|
edition=book_details.get("year").split("/")[1]
|
|
if "/" in book_details.get("year")
|
|
else None,
|
|
title=book_details.get("title"),
|
|
signature=book_details.get("signature"),
|
|
)
|
|
books_info.append(book)
|
|
# Extract static data
|
|
submission.name = static_info.get("name")
|
|
submission.lastname = static_info.get("lastname")
|
|
submission.title = static_info.get("title")
|
|
submission.telno = int(static_info.get("telno"))
|
|
submission.email = static_info.get("mail")
|
|
submission.app_name = static_info.get("apparatsname")
|
|
submission.subject = static_info.get("subject")
|
|
sem_year = static_info.get("semester").split()[1]
|
|
sem_term = static_info.get("semester").split()[0]
|
|
submission.semester = Semester(semester=sem_term, year=int(sem_year))
|
|
submission.books = books_info
|
|
# Extract book information
|
|
# book_info = []
|
|
# for book in books:
|
|
# book_details = {detail.tag: detail.text for detail in book}
|
|
# book_info.append(book_details)
|
|
return submission
|
|
|
|
|
|
def eml_parser(path: str) -> XMLMailSubmission:
|
|
with open(path, "r", encoding="utf-8") as file:
|
|
xml_content = file.read().split("\n\n", 1)[1] # Skip headers
|
|
print("EML content loaded, parsing XML...")
|
|
print(xml_content)
|
|
return parse_xml_submission(xml_content)
|
|
|
|
|
|
def eml_to_semap(xml_mail: XMLMailSubmission) -> SemapDocument:
|
|
submission = eml_parser(xml_mail)
|
|
semap_doc = SemapDocument(
|
|
# prof=Prof(name=submission.name, lastname=submission.lastname, email=submission.email),
|
|
apparat=Apparat(name=submission.app_name, subject=submission.subject),
|
|
semester=submission.semester,
|
|
books=submission.books,
|
|
)
|
|
return semap_doc
|