minor and major reworks: rename swb to SRU, add a test for pdf parsing
major: rework mail to send mail as plaintext instead of html, preventing the bleed-in of html text
This commit is contained in:
67
src/logic/xmlparser.py
Normal file
67
src/logic/xmlparser.py
Normal file
@@ -0,0 +1,67 @@
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from src.logic.dataclass import Apparat, BookData, SemapDocument, XMLMailSubmission
|
||||
from src.logic.semester import Semester
|
||||
|
||||
|
||||
def parse_xml_submission(xml_string: str) -> XMLMailSubmission:
|
||||
"""
|
||||
Parse an XML string representing a mail submission and return an XMLMailSubmission object.
|
||||
"""
|
||||
submission = XMLMailSubmission()
|
||||
root = ET.fromstring(xml_string)
|
||||
static_data = root.find("static")
|
||||
static_info = {child.tag: child.text for child in static_data}
|
||||
books = root.find("books")
|
||||
books_info = []
|
||||
for book in books:
|
||||
book_details = {detail.tag: detail.text for detail in book}
|
||||
book = BookData(
|
||||
author=book_details.get("authorname"),
|
||||
year=book_details.get("year").split("/")[0]
|
||||
if "/" in book_details.get("year")
|
||||
else book_details.get("year"),
|
||||
edition=book_details.get("year").split("/")[1]
|
||||
if "/" in book_details.get("year")
|
||||
else None,
|
||||
title=book_details.get("title"),
|
||||
signature=book_details.get("signature"),
|
||||
)
|
||||
books_info.append(book)
|
||||
# Extract static data
|
||||
submission.name = static_info.get("name")
|
||||
submission.lastname = static_info.get("lastname")
|
||||
submission.title = static_info.get("title")
|
||||
submission.telno = int(static_info.get("telno"))
|
||||
submission.email = static_info.get("mail")
|
||||
submission.app_name = static_info.get("apparatsname")
|
||||
submission.subject = static_info.get("subject")
|
||||
sem_year = static_info.get("semester").split()[1]
|
||||
sem_term = static_info.get("semester").split()[0]
|
||||
submission.semester = Semester(semester=sem_term, year=int(sem_year))
|
||||
submission.books = books_info
|
||||
# Extract book information
|
||||
# book_info = []
|
||||
# for book in books:
|
||||
# book_details = {detail.tag: detail.text for detail in book}
|
||||
# book_info.append(book_details)
|
||||
return submission
|
||||
|
||||
|
||||
def eml_parser(path: str) -> XMLMailSubmission:
|
||||
with open(path, "r", encoding="utf-8") as file:
|
||||
xml_content = file.read().split("\n\n", 1)[1] # Skip headers
|
||||
print("EML content loaded, parsing XML...")
|
||||
print(xml_content)
|
||||
return parse_xml_submission(xml_content)
|
||||
|
||||
|
||||
def eml_to_semap(path: str) -> SemapDocument:
|
||||
submission = eml_parser(path)
|
||||
semap_doc = SemapDocument(
|
||||
# prof=Prof(name=submission.name, lastname=submission.lastname, email=submission.email),
|
||||
apparat=Apparat(name=submission.app_name, subject=submission.subject),
|
||||
semester=submission.semester,
|
||||
books=submission.books,
|
||||
)
|
||||
return semap_doc
|
||||
Reference in New Issue
Block a user