From 06965db26a3dfe2434fc31e319db5c2226839c93 Mon Sep 17 00:00:00 2001
From: WorldTeacher <coding_contact@pm.me>
Date: Tue, 7 Oct 2025 14:15:10 +0200
Subject: [PATCH] minor and major reworks: rename swb to SRU, add a test for
 pdf parsing major: rework mail to send mail as plaintext instead of html,
 preventing the bleed-in of html text

---
 src/backend/__init__.py            |   6 +-
 src/backend/catalogue.py           | 189 +++++++++++++++++-
 src/backend/database.py            |  33 +++-
 src/logic/{swb.py => SRU.py}       | 166 ++++++++++++++--
 src/logic/__init__.py              |  33 +++-
 src/logic/c_sort.py                |   2 +-
 src/logic/csvparser.py             |   3 +-
 src/logic/dataclass.py             | 190 +++++++++++++++++-
 src/logic/lehmannsapi.py           |  66 +++++--
 src/logic/pdfparser.py             |   2 +-
 src/logic/wordparser.py            | 299 ++++++++++++++++++-----------
 src/logic/xmlparser.py             |  67 +++++++
 src/logic/zotero.py                |  28 +--
 src/ui/dialogs/__init__.py         |  13 +-
 src/ui/dialogs/docuprint.py        |   3 +-
 src/ui/dialogs/mail.py             | 237 ++++++++++++++---------
 src/ui/dialogs/progress.py         |   2 +-
 src/ui/semesterapparat_ui.ui       |  26 ++-
 src/ui/semesterapparat_ui_ui.py    |  21 +-
 src/ui/userInterface.py            |  60 ++++--
 src/ui/widgets/admin_query.py      |   7 +-
 src/ui/widgets/elsa_main.py        |   4 +-
 src/ui/widgets/graph.py            |   2 +-
 src/ui/widgets/searchPage.py       |  16 +-
 src/ui/widgets/signature_update.py |   2 +-
 25 files changed, 1174 insertions(+), 303 deletions(-)
 rename src/logic/{swb.py => SRU.py} (71%)
 create mode 100644 src/logic/xmlparser.py

diff --git a/src/backend/__init__.py b/src/backend/__init__.py
index 9b79959..66a6838 100644
--- a/src/backend/__init__.py
+++ b/src/backend/__init__.py
@@ -1,6 +1,5 @@
 __all__ = [
     "AdminCommands",
-    "Semester",
     "AutoAdder",
     "AvailChecker",
     "BookGrabber",
@@ -9,16 +8,15 @@ __all__ = [
     "NewEditionCheckerThread",
     "recreateElsaFile",
     "recreateFile",
-    "Catalogue"
+    "Catalogue",
 ]
 
 from .admin_console import AdminCommands
+from .catalogue import Catalogue
 from .create_file import recreateElsaFile, recreateFile
 from .database import Database
 from .documentation_thread import DocumentationThread
-from .semester import Semester
 from .thread_bookgrabber import BookGrabber
 from .thread_neweditions import NewEditionCheckerThread
 from .threads_autoadder import AutoAdder
 from .threads_availchecker import AvailChecker
-from .catalogue import Catalogue
diff --git a/src/backend/catalogue.py b/src/backend/catalogue.py
index 439972d..fb4cff7 100644
--- a/src/backend/catalogue.py
+++ b/src/backend/catalogue.py
@@ -2,6 +2,7 @@ import sys
 from datetime import datetime
 
 import loguru
+import regex
 import requests
 from bs4 import BeautifulSoup
 
@@ -24,7 +25,7 @@ log.add(
 
 
 class Catalogue:
-    def __init__(self, timeout=5):
+    def __init__(self, timeout=15):
         self.timeout = timeout
         reachable = self.check_connection()
         if not reachable:
@@ -61,8 +62,8 @@ class Catalogue:
 
         links = self.get_book_links(searchterm)
         print(links)
-        for link in links:
-            result = self.search(link)
+        for elink in links:
+            result = self.search(elink)
             # in result search for class col-xs-12 rds-dl RDS_LOCATION
             # if found, return text of href
             soup = BeautifulSoup(result, "html.parser")
@@ -74,8 +75,45 @@ class Catalogue:
             ppn_el = soup.find(
                 "div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PPN"
             )
-            ppn = ppn_el.get_text(strip=True) if ppn_el else None
+            # in ppn_el, get text of div col-xs-12 col-md-7 col-lg-8 rds-dl-panel
+            ppn = (
+                ppn_el.find_next_sibling(
+                    "div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
+                ).get_text(strip=True)
+                if ppn_el
+                else None
+            )
 
+            # get edition text at div class col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_EDITION
+            edition_el = soup.find(
+                "div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_EDITION"
+            )
+            edition = (
+                edition_el.find_next_sibling(
+                    "div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
+                ).get_text(strip=True)
+                if edition_el
+                else None
+            )
+
+            authors = soup.find_all(
+                "div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON"
+            )
+            author = None
+            if authors:
+                # get the names of the a href links in the div col-xs-12 col-md-7 col-lg-8 rds-dl-panel
+                author_names = []
+                for author in authors:
+                    panel = author.find_next_sibling(
+                        "div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
+                    )
+                    if panel:
+                        links = panel.find_all("a")
+                        for link in links:
+                            author_names.append(link.text.strip())
+                author = (
+                    ";".join(author_names) if len(author_names) > 1 else author_names[0]
+                )
             signature = None
 
             panel = soup.select_one("div.panel-body")
@@ -121,4 +159,147 @@ class Catalogue:
                             title=title,
                             ppn=ppn,
                             signature=signature,
+                            library_location=loc.split("-")[-1],
+                            link=elink,
+                            author=author,
+                            edition=edition,
                         )
+                    else:
+                        return Book(
+                            title=title,
+                            ppn=ppn,
+                            signature=signature,
+                            library_location=loc.split("\n\n")[-1],
+                            link=elink,
+                            author=author,
+                            edition=edition,
+                        )
+
+    def get(self, ppn: str) -> Book | None:
+        # based on PPN, get title, people, edition, year, language, pages, isbn,
+        link = f"https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{ppn}"
+        result = self.search(link)
+        soup = BeautifulSoup(result, "html.parser")
+
+    def get_ppn(self, searchterm: str) -> str | None:
+        links = self.get_book_links(searchterm)
+        ppn = None
+        for link in links:
+            result = self.search(link)
+            soup = BeautifulSoup(result, "html.parser")
+            print(link)
+            ppn = link.split("/")[-1]
+            if ppn and regex.match(r"^\d{8,10}[X\d]?$", ppn):
+                return ppn
+        return ppn
+
+    def get_semesterapparat_number(self, searchterm: str) -> int:
+        links = self.get_book_links(searchterm)
+        for link in links:
+            result = self.search(link)
+            # in result search for class col-xs-12 rds-dl RDS_LOCATION
+            # if found, return text of href
+            soup = BeautifulSoup(result, "html.parser")
+
+            locations = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION")
+            for location_el in locations:
+                if "Semesterapparat-" in location_el.text:
+                    match = regex.search(r"Semesterapparat-(\d+)", location_el.text)
+                    if match:
+                        return int(match.group(1))
+                if "Handbibliothek-" in location_el.text:
+                    return location_el.text.strip().split("\n\n")[-1].strip()
+                return location_el.text.strip().split("\n\n")[-1].strip()
+        return 0
+
+    def get_author(self, link: str) -> str:
+        links = self.get_book_links(f"kid:{link}")
+        author = None
+        for link in links:
+            # print(link)
+            result = self.search(link)
+            soup = BeautifulSoup(result, "html.parser")
+            # get all authors, return them as a string seperated by ;
+            authors = soup.find_all(
+                "div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON"
+            )
+            if authors:
+                # get the names of the a href links in the div col-xs-12 col-md-7 col-lg-8 rds-dl-panel
+                author_names = []
+                for author in authors:
+                    panel = author.find_next_sibling(
+                        "div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
+                    )
+                    if panel:
+                        links = panel.find_all("a")
+                        for link in links:
+                            author_names.append(link.text.strip())
+                author = "; ".join(author_names)
+        return author
+
+    def get_signature(self, isbn: str):
+        links = self.get_book_links(f"{isbn}")
+        signature = None
+        for link in links:
+            result = self.search(link)
+            soup = BeautifulSoup(result, "html.parser")
+            panel = soup.select_one("div.panel-body")
+            if panel:
+                # Collect the RDS_* blocks in order, using the 'space' divs as separators
+                groups = []
+                cur = {}
+                for node in panel.select(
+                    "div.rds-dl.RDS_SIGNATURE, div.rds-dl.RDS_STATUS, div.rds-dl.RDS_LOCATION, div.col-xs-12.space"
+                ):
+                    classes = node.get("class", [])
+                    # Separator between entries
+                    if "space" in classes:
+                        if cur:
+                            groups.append(cur)
+                            cur = {}
+                        continue
+
+                    # Read the value from the corresponding panel cell
+                    val_el = node.select_one(".rds-dl-panel")
+                    val = (
+                        val_el.get_text(" ", strip=True)
+                        if val_el
+                        else node.get_text(" ", strip=True)
+                    )
+
+                    if "RDS_SIGNATURE" in classes:
+                        cur["signature"] = val
+                    elif "RDS_STATUS" in classes:
+                        cur["status"] = val
+                    elif "RDS_LOCATION" in classes:
+                        cur["location"] = val
+
+                if cur:  # append the last group if not followed by a space
+                    groups.append(cur)
+
+                # Find the signature for the entry whose location mentions "Semesterapparat"
+                for g in groups:
+                    print(g)
+                    loc = g.get("location", "").lower()
+                    if "semesterapparat" in loc:
+                        signature = g.get("signature")
+                        return signature
+                    else:
+                        signature = g.get("signature")
+                        return signature
+        print("No signature found")
+        return signature
+
+    def in_library(self, ppn: str) -> bool:
+        if ppn is None:
+            return False
+        links = self.get_book_links(f"kid:{ppn}")
+        return len(links) > 0
+
+    def get_location(self, ppn: str) -> str | None:
+        if ppn is None:
+            return None
+        link = self.get_book(f"{ppn}")
+        if link is None:
+            return None
+        return link.library_location
diff --git a/src/backend/database.py b/src/backend/database.py
index ab47368..603e936 100644
--- a/src/backend/database.py
+++ b/src/backend/database.py
@@ -30,10 +30,9 @@ from src.backend.db import (
 from src.errors import AppPresentError, NoResultError
 from src.logic import ELSA, Apparat, ApparatData, BookData, Prof
 from src.logic.constants import SEMAP_MEDIA_ACCOUNTS
+from src.logic.semester import Semester
 from src.utils.blob import create_blob
 
-from .semester import Semester
-
 log = loguru.logger
 log.remove()
 log.add(sys.stdout, level="INFO")
@@ -1873,7 +1872,7 @@ class Database:
         Returns:
             list[tuple]: A list of tuples containing the new editions data
         """
-        query = "SELECT * FROM neweditions WHERE for_apparat=?"
+        query = "SELECT * FROM neweditions WHERE for_apparat=? AND ordered=0"
         results = self.query_db(query, (apparat_id,))
         res = []
         for result in results:
@@ -1887,9 +1886,25 @@ class Database:
         query = "UPDATE neweditions SET ordered=1 WHERE id=?"
         self.query_db(query, (newBook_id,))
 
+    def getBooksWithNewEditions(self, app_id) -> List[BookData]:
+        # select all bookdata from media, based on the old_edition_id in neweditions where for_apparat = app_id; also get the new_edition bookdata
+
+        query = "SELECT m.bookdata, new_bookdata FROM media m JOIN neweditions n ON m.id = n.old_edition_id WHERE n.for_apparat = ?"
+        results = self.query_db(query, (app_id,))
+        # store results in tuple old,new
+        res = []
+        for result in results:
+            oldedition = BookData().from_string(result[0])
+            newedition = BookData().from_string(result[1])
+            res.append((oldedition, newedition))
+        return res
+
     def getNewEditionId(self, newBook: BookData):
-        query = "SELECT id FROM neweditions WHERE new_bookdata=?"
-        params = (newBook.to_dict,)
+        query = "SELECT id FROM neweditions WHERE new_bookdata LIKE ?"
+        args = (
+            newBook.isbn[0] if newBook.isbn and len(newBook.isbn) > 0 else newBook.ppn
+        )
+        params = (f"%{args}%",)
         data = self.query_db(query, params, one=True)
         if data:
             return data[0]
@@ -1897,6 +1912,14 @@ class Database:
             return None
 
     def insertNewEdition(self, newBook: BookData, oldBookId: int, for_apparat: int):
+        # check if new edition already in table, check based on newBook.ppn
+        check_query = "SELECT id FROM neweditions WHERE new_bookdata LIKE ?"
+        check_params = (f"%{newBook.ppn}%",)
+        data = self.query_db(check_query, check_params, one=True)
+        if data:
+            log.info("New edition already in table, skipping insert")
+            return
+
         query = "INSERT INTO neweditions (new_bookdata, old_edition_id, for_apparat) VALUES (?,?,?)"
         params = (newBook.to_dict, oldBookId, for_apparat)
 
diff --git a/src/logic/swb.py b/src/logic/SRU.py
similarity index 71%
rename from src/logic/swb.py
rename to src/logic/SRU.py
index ad88ff4..0141217 100644
--- a/src/logic/swb.py
+++ b/src/logic/SRU.py
@@ -2,6 +2,7 @@ import sys
 import xml.etree.ElementTree as ET
 from dataclasses import dataclass, field
 from datetime import datetime
+from enum import Enum
 from typing import Dict, Iterable, List, Optional, Tuple
 
 import loguru
@@ -97,7 +98,7 @@ def _text(elem: Optional[ET.Element]) -> str:
 def _req_text(parent: ET.Element, path: str) -> str:
     el = parent.find(path, NS)
     if el is None or el.text is None:
-        raise ValueError(f"Required element not found or empty: {path}")
+        return None
     return el.text
 
 
@@ -188,7 +189,7 @@ def parse_search_retrieve_response(xml_str: str) -> SearchRetrieveResponse:
 
     # Root is zs:searchRetrieveResponse
     version = _req_text(root, "zs:version")
-    numberOfRecords = int(_req_text(root, "zs:numberOfRecords"))
+    numberOfRecords = int(_req_text(root, "zs:numberOfRecords") or "0")
 
     records_parent = root.find("zs:records", NS)
     records: List[Record] = []
@@ -408,8 +409,12 @@ def book_from_marc(rec: MarcRecord) -> BookData:
         rec, "264", "c"
     )
     isbn = subfield_values(rec, "020", "a")
-
+    mediatype = first_subfield_value(rec, "338", "a")
     lang = subfield_values(rec, "041", "a")
+    authors = subfield_values(rec, "700", "a")
+    author = None
+    if authors:
+        author = "; ".join(authors)
 
     return BookData(
         ppn=ppn,
@@ -422,32 +427,162 @@ def book_from_marc(rec: MarcRecord) -> BookData:
         isbn=isbn,
         language=lang,
         link="",
+        author=author,
+        media_type=mediatype,
     )
 
 
-class SWB:
-    def __init__(self):
-        self.url = "https://sru.k10plus.de/opac-de-627!rec=1?version=1.1&operation=searchRetrieve&query={}&maximumRecords=10&recordSchema=marcxml"
-        self.bib_id = 20735
+class SWBData(Enum):
+    URL = "https://sru.k10plus.de/opac-de-627!rec=1?version=1.1&operation=searchRetrieve&query={}&maximumRecords=100&recordSchema=marcxml"
+    ARGSCHEMA = "pica."
+    NAME = "SWB"
+
+
+class DNBData(Enum):
+    URL = "https://services.dnb.de/sru/dnb?version=1.1&operation=searchRetrieve&query={}&maximumRecords=100&recordSchema=MARC21-xml"
+    ARGSCHEMA = ""
+    NAME = "DNB"
+
+
+class SRUSite(Enum):
+    SWB = SWBData
+    DNB = DNBData
+
+
+RVK_ALLOWED = r"[A-Z0-9.\-\/]"  # conservative char set typically seen in RVK notations
+
+
+def find_newer_edition(
+    swb_result: BookData, dnb_result: List[BookData]
+) -> Optional[List[BookData]]:
+    """
+    New edition if:
+      - year > swb.year OR
+      - edition_number > swb.edition_number
+
+    Additional guards & preferences:
+      - If both have signatures and they differ, skip (not the same work).
+      - For duplicates (same ppn): keep the one that has a signature, and
+        prefer a signature that matches swb_result.signature.
+      - If multiple remain: keep the single 'latest' by (year desc,
+        edition_number desc, best-signature-match desc, has-signature desc).
+    """
+
+    def norm_sig(s: Optional[str]) -> str:
+        if not s:
+            return ""
+        # normalize: lowercase, collapse whitespace, keep alnum + a few separators
+        s = s.lower()
+        s = re.sub(r"\s+", " ", s).strip()
+        # remove obvious noise; adjust if your signature format differs
+        s = re.sub(r"[^a-z0-9\-_/\. ]+", "", s)
+        return s
+
+    def has_sig(b: BookData) -> bool:
+        return bool(getattr(b, "signature", None))
+
+    def sig_matches_swb(b: BookData) -> bool:
+        if not has_sig(b) or not has_sig(swb_result):
+            return False
+        return norm_sig(b.signature) == norm_sig(swb_result.signature)
+
+    def strictly_newer(b: BookData) -> bool:
+        by_year = (
+            b.year is not None
+            and swb_result.year is not None
+            and b.year > swb_result.year
+        )
+        by_edition = (
+            b.edition_number is not None
+            and swb_result.edition_number is not None
+            and b.edition_number > swb_result.edition_number
+        )
+        return by_year or by_edition
+
+    swb_sig_norm = norm_sig(getattr(swb_result, "signature", None))
+
+    # 1) Filter to same-work AND newer
+    candidates: List[BookData] = []
+    for b in dnb_result:
+        # Skip if both signatures exist and don't match (different work)
+        b_sig = getattr(b, "signature", None)
+        if b_sig and swb_result.signature:
+            if norm_sig(b_sig) != swb_sig_norm:
+                continue  # not the same work
+
+        # Keep only if newer by rules
+        if strictly_newer(b):
+            candidates.append(b)
+
+    if not candidates:
+        return None
+
+    # 2) Dedupe by PPN, preferring signature (and matching signature if possible)
+    by_ppn: dict[Optional[str], BookData] = {}
+    for b in candidates:
+        key = getattr(b, "ppn", None)
+        prev = by_ppn.get(key)
+        if prev is None:
+            by_ppn[key] = b
+            continue
+
+        # Compute preference score for both
+        def ppn_pref_score(x: BookData) -> tuple[int, int]:
+            # (signature matches swb, has signature)
+            return (1 if sig_matches_swb(x) else 0, 1 if has_sig(x) else 0)
+
+        if ppn_pref_score(b) > ppn_pref_score(prev):
+            by_ppn[key] = b
+
+    deduped = list(by_ppn.values())
+    if not deduped:
+        return None
+
+    # 3) If multiple remain, keep only the latest one.
+    # Order: year desc, edition_number desc, signature-match desc, has-signature desc
+    def sort_key(b: BookData):
+        year = b.year if b.year is not None else -1
+        ed = b.edition_number if b.edition_number is not None else -1
+        sig_match = 1 if sig_matches_swb(b) else 0
+        sig_present = 1 if has_sig(b) else 0
+        return (year, ed, sig_match, sig_present)
+
+    best = max(deduped, key=sort_key)
+    return [best] if best else None
+
+
+class Api:
+    def __init__(self, site: str, url: str, prefix: str):
+        self.site = site
+        self.url = url
+        self.prefix = prefix
+        pass
 
     def get(self, query_args: Iterable[str]) -> List[Record]:
         # if any query_arg ends with =, remove it
-        query_args = [arg for arg in query_args if not arg.endswith("=")]
+        if self.site == "DNB":
+            args = [arg for arg in query_args if not arg.startswith("pica.")]
+            if args == []:
+                raise ValueError("DNB queries must include at least one search term")
+            query_args = args
+        # query_args = [f"{self.prefix}{arg}" for arg in query_args]
         query = "+and+".join(query_args)
         query = query.replace(" ", "%20").replace("&", "%26")
-
+        # query_args = [arg for arg in query_args if not arg.endswith("=")]
+        # query = "+and+".join(query_args)
+        # query = query.replace(" ", "%20").replace("&", "%26")
+        # insert the query into the url url is
         url = self.url.format(query)
 
         log.debug(url)
         headers = {
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
+            "User-Agent": f"{self.site} SRU Client, <alexander.kirchner@ph-freiburg.de>",
             "Accept": "application/xml",
             "Accept-Charset": "latin1,utf-8;q=0.7,*;q=0.3",
         }
         response = requests.get(url, headers=headers)
         if response.status_code != 200:
             raise Exception(f"Error fetching data from SWB: {response.status_code}")
-        # #print(response.text)
         data = response.content
 
         # extract top-level response
@@ -456,6 +591,7 @@ class SWB:
 
     def getBooks(self, query_args: Iterable[str]) -> List[BookData]:
         records: List[Record] = self.get(query_args)
+        print(f"{self.site} found {len(records)} records")
         books: List[BookData] = []
         # extract title from query_args if present
         title = None
@@ -476,3 +612,11 @@ class SWB:
 
     def getLinkForBook(self, book: BookData) -> str:
         results = self.getBooks()
+
+
+class SWB(Api):
+    def __init__(self):
+        self.site = SWBData.NAME.value
+        self.url = SWBData.URL.value
+        self.prefix = SWBData.ARGSCHEMA.value
+        super().__init__(self.site, self.url, self.prefix)
diff --git a/src/logic/__init__.py b/src/logic/__init__.py
index dc21d7a..4d5690e 100644
--- a/src/logic/__init__.py
+++ b/src/logic/__init__.py
@@ -1,6 +1,35 @@
-from .dataclass import ApparatData, BookData, Prof, Apparat, ELSA
+__all__ = [
+    "custom_sort",
+    "sort_semesters_list",
+    "APP_NRS",
+    "PROF_TITLES",
+    "SEMAP_MEDIA_ACCOUNTS",
+    "csv_to_list",
+    "ELSA",
+    "Apparat",
+    "ApparatData",
+    "BookData",
+    "Prof",
+    "Semester",
+    "SemapDocument",
+    "elsa_word_to_csv",
+    "pdf_to_semap",
+    "word_docx_to_csv",
+    "word_to_semap",
+    "ZoteroController",
+    "eml_to_semap",
+]
 from .c_sort import custom_sort, sort_semesters_list
 from .constants import APP_NRS, PROF_TITLES, SEMAP_MEDIA_ACCOUNTS
 from .csvparser import csv_to_list
-from .wordparser import elsa_word_to_csv, word_docx_to_csv, word_to_semap, SemapDocument
+from .dataclass import ELSA, Apparat, ApparatData, BookData, Prof
+from .semester import Semester
+from .wordparser import (
+    SemapDocument,
+    elsa_word_to_csv,
+    pdf_to_semap,
+    word_docx_to_csv,
+    word_to_semap,
+)
+from .xmlparser import eml_to_semap
 from .zotero import ZoteroController
diff --git a/src/logic/c_sort.py b/src/logic/c_sort.py
index ef0e4c7..ae0285d 100644
--- a/src/logic/c_sort.py
+++ b/src/logic/c_sort.py
@@ -83,4 +83,4 @@ if __name__ == "__main__":
         "SoSe 25",
     ]
 
-    print(sort_semesters_list(unsorted))
+    # print(sort_semesters_list(unsorted))
diff --git a/src/logic/csvparser.py b/src/logic/csvparser.py
index e41f2e7..750d8e8 100644
--- a/src/logic/csvparser.py
+++ b/src/logic/csvparser.py
@@ -1,4 +1,5 @@
 import csv
+
 from charset_normalizer import detect
 
 
@@ -19,4 +20,4 @@ def csv_to_list(path: str) -> list[str]:
 if __name__ == "__main__":
     text = csv_to_list("C:/Users/aky547/Desktop/semap/71.csv")
     # remove linebreaks
-    # print(text)
+    # #print(text)
diff --git a/src/logic/dataclass.py b/src/logic/dataclass.py
index 0f90d54..01a95c2 100644
--- a/src/logic/dataclass.py
+++ b/src/logic/dataclass.py
@@ -3,6 +3,11 @@ from dataclasses import dataclass, field
 from enum import Enum
 from typing import Any, Optional, Union
 
+import regex
+
+from src.logic.openai import name_tester, run_shortener, semester_converter
+from src.logic.semester import Semester
+
 
 @dataclass
 class Prof:
@@ -67,21 +72,63 @@ class BookData:
     language: Union[str, list[str], None] = field(default_factory=list)
     publisher: str | None = None
     place: str | None = None
-    year: str | None = None
+    year: int | None = None
     pages: str | None = None
-    library_location: int | None = None
+    library_location: str | None = None
     in_apparat: bool | None = False
     adis_idn: str | None = None
+    old_book: Any | None = None
+    media_type: str | None = None  #
+    in_library: bool | None = None  # whether the book is in the library or not
+
+    def __post_init__(self):
+        self.library_location = (
+            str(self.library_location) if self.library_location else None
+        )
+        if isinstance(self.language, list) and self.language:
+            self.language = [lang.strip() for lang in self.language if lang.strip()]
+            self.language = ",".join(self.language)
+        self.year = regex.sub(r"[^\d]", "", str(self.year)) if self.year else None
+        self.in_library = True if self.signature else False
 
     def from_dict(self, data: dict) -> "BookData":
         for key, value in data.items():
             setattr(self, key, value)
         return self
 
+    def merge(self, other: "BookData") -> "BookData":
+        for key, value in other.__dict__.items():
+            # merge lists, if the attribute is a list, extend it
+            if isinstance(value, list):
+                current_value = getattr(self, key)
+                if current_value is None:
+                    current_value = []
+                elif not isinstance(current_value, list):
+                    current_value = [current_value]
+                # extend the list with the new values, but only if they are not already in the list
+                for v in value:
+                    if v not in current_value:
+                        current_value.append(v)
+                setattr(self, key, current_value)
+            if value is not None and (
+                getattr(self, key) is None or getattr(self, key) == ""
+            ):
+                setattr(self, key, value)
+        # in language, drop all entries that are longer than 3 characters
+        if isinstance(self.language, list):
+            self.language = [lang for lang in self.language if len(lang) <= 4]
+        return self
+
     @property
     def to_dict(self) -> str:
         """Convert the dataclass to a dictionary."""
-        return json.dumps(self.__dict__, ensure_ascii=False)
+        data_dict = {
+            key: value for key, value in self.__dict__.items() if value is not None
+        }
+        # remove old_book from data_dict
+        if "old_book" in data_dict:
+            del data_dict["old_book"]
+        return json.dumps(data_dict, ensure_ascii=False)
 
     def from_dataclass(self, dataclass: Optional[Any]) -> None:
         if dataclass is None:
@@ -89,8 +136,15 @@ class BookData:
         for key, value in dataclass.__dict__.items():
             setattr(self, key, value)
 
+    def get_book_type(self) -> str:
+        if "Online" in self.pages:
+            return "eBook"
+        else:
+            return "Druckausgabe"
+
     def from_string(self, data: str) -> "BookData":
         ndata = json.loads(data)
+
         return BookData(**ndata)
 
     def from_LehmannsSearchResult(self, result: Any) -> "BookData":
@@ -111,6 +165,15 @@ class BookData:
         # self.pages = str(result.pages) if result.pages else None
         return self
 
+    @property
+    def edition_number(self) -> Optional[int]:
+        if self.edition is None:
+            return 0
+        match = regex.search(r"(\d+)", self.edition)
+        if match:
+            return int(match.group(1))
+        return 0
+
 
 @dataclass
 class MailData:
@@ -222,3 +285,124 @@ class ELSA:
 class ApparatData:
     prof: Prof = field(default_factory=Prof)
     apparat: Apparat = field(default_factory=Apparat)
+
+
+@dataclass
+class XMLMailSubmission:
+    name: Optional[str] = None
+    lastname: Optional[str] = None
+    title: Optional[str] = None
+    telno: Optional[int] = None
+    email: Optional[str] = None
+    app_name: Optional[str] = None
+    subject: Optional[str] = None
+    semester: Optional[Semester] = None
+    books: Optional[list[BookData]] = None
+
+
+@dataclass
+class Book:
+    author: str = None
+    year: str = None
+    edition: str = None
+    title: str = None
+    location: str = None
+    publisher: str = None
+    signature: str = None
+    internal_notes: str = None
+
+    @property
+    def has_signature(self) -> bool:
+        return self.signature is not None and self.signature != ""
+
+    @property
+    def is_empty(self) -> bool:
+        return all(
+            [
+                self.author == "",
+                self.year == "",
+                self.edition == "",
+                self.title == "",
+                self.location == "",
+                self.publisher == "",
+                self.signature == "",
+                self.internal_notes == "",
+            ]
+        )
+
+    def from_dict(self, data: dict[str, Any]):
+        for key, value in data.items():
+            value = value.strip()
+            if value == "\u2002\u2002\u2002\u2002\u2002":
+                value = ""
+
+            if key == "Autorenname(n):Nachname, Vorname":
+                self.author = value
+            elif key == "Jahr/Auflage":
+                self.year = value.split("/")[0] if "/" in value else value
+                self.edition = value.split("/")[1] if "/" in value else ""
+            elif key == "Titel":
+                self.title = value
+            elif key == "Ort und Verlag":
+                self.location = value.split(",")[0] if "," in value else value
+                self.publisher = value.split(",")[1] if "," in value else ""
+            elif key == "Standnummer":
+                self.signature = value.strip()
+            elif key == "Interne Vermerke":
+                self.internal_notes = value
+
+
+@dataclass
+class SemapDocument:
+    subject: str = None
+    phoneNumber: int = None
+    mail: str = None
+    title: str = None
+    title_suggestions: list[str] = None
+    semester: Union[str, Semester] = None
+    books: list[Book] = None
+    eternal: bool = False
+    personName: str = None
+    personTitle: str = None
+    title_length = 0
+    title_max_length = 0
+
+    def __post_init__(self):
+        self.title_suggestions = []
+
+    @property
+    def nameSetter(self):
+        data = name_tester(self.personTitle)
+        name = f"{data['last_name']}, {data['first_name']}"
+        if data["title"] is not None:
+            title = data["title"]
+            self.personTitle = title
+        self.personName = name
+        self.title_length = len(self.title) + 3 + len(self.personName.split(",")[0])
+        if self.title_length > 40:
+            name_len = len(self.personName.split(",")[0])
+            self.title_max_length = 38 - name_len
+            suggestions = run_shortener(self.title, self.title_max_length)
+            for suggestion in suggestions:
+                self.title_suggestions.append(suggestion["shortened_string"])
+        else:
+            self.title_suggestions = []
+        pass
+
+    @property
+    def renameSemester(self) -> None:
+        if self.semester:
+            if ", Dauer" in self.semester:
+                self.semester = self.semester.split(",")[0]
+                self.eternal = True
+                self.semester = Semester().from_string(self.semester)
+            else:
+                self.semester = Semester().from_string(
+                    semester_converter(self.semester)
+                )
+
+    @property
+    def signatures(self) -> list[str]:
+        if self.books is not None:
+            return [book.signature for book in self.books if book.has_signature]
+        return []
diff --git a/src/logic/lehmannsapi.py b/src/logic/lehmannsapi.py
index 6e5a4b2..4be1495 100644
--- a/src/logic/lehmannsapi.py
+++ b/src/logic/lehmannsapi.py
@@ -1,13 +1,15 @@
 from __future__ import annotations
 
 import re
-from dataclasses import dataclass, asdict, field
-from typing import Optional, List, Iterable
-from urllib.parse import urljoin, quote_plus
+from dataclasses import asdict, dataclass, field
+from typing import Iterable, List, Optional
+from urllib.parse import quote_plus, urljoin
 
 import httpx
 from bs4 import BeautifulSoup
 
+from src.logic.dataclass import BookData
+
 BASE = "https://www.lehmanns.de"
 SEARCH_URL = "https://www.lehmanns.de/search/quick?mediatype_id=&q="
 
@@ -33,9 +35,11 @@ class LehmannsSearchResult:
     image: Optional[str] = None
 
     # From detail page:
-    pages: Optional[str] = None              # "<N> Seiten"
-    buyable: bool = True                     # set in enrich_pages (detail page)
-    unavailable_hint: Optional[str] = None   # e.g. "Titel ist leider vergriffen; keine Neuauflage"
+    pages: Optional[str] = None  # "<N> Seiten"
+    buyable: bool = True  # set in enrich_pages (detail page)
+    unavailable_hint: Optional[str] = (
+        None  # e.g. "Titel ist leider vergriffen; keine Neuauflage"
+    )
 
     def to_dict(self) -> dict:
         return asdict(self)
@@ -73,31 +77,45 @@ class LehmannsClient:
         # spaces -> '+'
         return SEARCH_URL + quote_plus(title)
 
-    def search_by_title(self, title: str, limit: Optional[int] = None, strict: bool = False) -> List[LehmannsSearchResult]:
+    def search_by_title(
+        self,
+        title: str,
+        limit: Optional[int] = None,
+        strict: bool = False,
+        only_latest: bool = True,
+    ) -> List[BookData]:
         """
         Parse the listing page only (no availability check here).
         Use enrich_pages(...) afterwards to fetch detail pages, add 'pages',
         and drop unbuyable items.
         """
-        url = self.build_search_url(title)
+        url = self.build_search_url(title=title)
         html = self._get(url)
         if not html:
             return []
         results = self._parse_results(html)
         self.enrich_pages(results)
+
+        results = [BookData().from_LehmannsSearchResult(r) for r in results]
         if strict:
             # filter results to only those with exact title match (case-insensitive)
             title_lower = title.lower()
             results = [r for r in results if r.title and r.title.lower() == title_lower]
-            results = [r for r in results if r.buyable]
+            # results = [r for r in results if r.buyable]
             return results
         if limit is not None:
-            results = results[:max(0, limit)]
+            results = results[: max(0, limit)]
+        if only_latest and len(results) > 1:
+            # keep only the latest edition (highest edition number)
+            results.sort(key=lambda r: (r.edition_number or 0), reverse=True)
+            results = [results[0]]
         return results
 
     # ------------------- Detail enrichment & filtering -------------------
 
-    def enrich_pages(self, results: Iterable[LehmannsSearchResult], drop_unbuyable: bool = True) -> List[LehmannsSearchResult]:
+    def enrich_pages(
+        self, results: Iterable[LehmannsSearchResult], drop_unbuyable: bool = True
+    ) -> List[LehmannsSearchResult]:
         """
         Fetch each result.url, extract:
           - pages: from <span class="book-meta meta-seiten" itemprop="numberOfPages">...</span>
@@ -135,11 +153,15 @@ class LehmannsClient:
                 # Availability via li.availability-3
                 avail_li = soup.select_one("li.availability-3")
                 if avail_li:
-                    avail_text = " ".join(avail_li.get_text(" ", strip=True).split()).lower()
+                    avail_text = " ".join(
+                        avail_li.get_text(" ", strip=True).split()
+                    ).lower()
                     if "titel ist leider vergriffen" in avail_text:
                         r.buyable = False
                         if "keine neuauflage" in avail_text:
-                            r.unavailable_hint = "Titel ist leider vergriffen; keine Neuauflage"
+                            r.unavailable_hint = (
+                                "Titel ist leider vergriffen; keine Neuauflage"
+                            )
                         else:
                             r.unavailable_hint = "Titel ist leider vergriffen"
 
@@ -161,7 +183,9 @@ class LehmannsClient:
         try:
             r = self.client.get(url)
             r.encoding = "utf-8"
-            if r.status_code == 200 and "text/html" in (r.headers.get("content-type") or ""):
+            if r.status_code == 200 and "text/html" in (
+                r.headers.get("content-type") or ""
+            ):
                 return r.text
         except httpx.HTTPError:
             pass
@@ -176,12 +200,18 @@ class LehmannsClient:
             if not a:
                 continue
             url = urljoin(BASE, a["href"].strip())
-            base_title = (block.select_one(".title [itemprop='name']") or a).get_text(strip=True)
+            base_title = (block.select_one(".title [itemprop='name']") or a).get_text(
+                strip=True
+            )
 
             # Alternative headline => extend title
             alt_tag = block.select_one(".description[itemprop='alternativeHeadline']")
             alternative_headline = alt_tag.get_text(strip=True) if alt_tag else None
-            title = f"{base_title} : {alternative_headline}" if alternative_headline else base_title
+            title = (
+                f"{base_title} : {alternative_headline}"
+                if alternative_headline
+                else base_title
+            )
             description = alternative_headline
 
             # Authors from .author
@@ -227,7 +257,9 @@ class LehmannsClient:
 
             # Publisher
             publisher = None
-            pub = block.select_one(".publisherprop [itemprop='name']") or block.select_one(".publisher [itemprop='name']")
+            pub = block.select_one(
+                ".publisherprop [itemprop='name']"
+            ) or block.select_one(".publisher [itemprop='name']")
             if pub:
                 publisher = pub.get_text(strip=True)
 
diff --git a/src/logic/pdfparser.py b/src/logic/pdfparser.py
index 6be0d3f..07c9409 100644
--- a/src/logic/pdfparser.py
+++ b/src/logic/pdfparser.py
@@ -21,4 +21,4 @@ if __name__ == "__main__":
     text = pdf_to_csv("54_pdf.pdf")
     # remove linebreaks
     text = text.replace("\n", "")
-    print(text)
+    # print(text)
diff --git a/src/logic/wordparser.py b/src/logic/wordparser.py
index 4cba286..d6b137c 100644
--- a/src/logic/wordparser.py
+++ b/src/logic/wordparser.py
@@ -1,16 +1,15 @@
 import sys
 import zipfile
-from dataclasses import dataclass
-from typing import Any, Union
+from typing import Any
 
+import fitz  # PyMuPDF
 import loguru
 import pandas as pd
 from bs4 import BeautifulSoup
 from docx import Document
 
 from src import LOG_DIR
-from src.backend.semester import Semester
-from src.logic.openai import name_tester, run_shortener, semester_converter
+from src.logic.dataclass import Book, SemapDocument
 
 log = loguru.logger
 log.remove()
@@ -18,116 +17,6 @@ log.add(sys.stdout, level="INFO")
 log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
 
 
-letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
-
-
-@dataclass
-class Book:
-    author: str = None
-    year: str = None
-    edition: str = None
-    title: str = None
-    location: str = None
-    publisher: str = None
-    signature: str = None
-    internal_notes: str = None
-
-    @property
-    def has_signature(self) -> bool:
-        return self.signature is not None and self.signature != ""
-
-    @property
-    def is_empty(self) -> bool:
-        return all(
-            [
-                self.author == "",
-                self.year == "",
-                self.edition == "",
-                self.title == "",
-                self.location == "",
-                self.publisher == "",
-                self.signature == "",
-                self.internal_notes == "",
-            ]
-        )
-
-    def from_dict(self, data: dict[str, Any]):
-        for key, value in data.items():
-            value = value.strip()
-            if value == "\u2002\u2002\u2002\u2002\u2002":
-                value = ""
-
-            if key == "Autorenname(n):Nachname, Vorname":
-                self.author = value
-            elif key == "Jahr/Auflage":
-                self.year = value.split("/")[0] if "/" in value else value
-                self.edition = value.split("/")[1] if "/" in value else ""
-            elif key == "Titel":
-                self.title = value
-            elif key == "Ort und Verlag":
-                self.location = value.split(",")[0] if "," in value else value
-                self.publisher = value.split(",")[1] if "," in value else ""
-            elif key == "Standnummer":
-                self.signature = value.strip()
-            elif key == "Interne Vermerke":
-                self.internal_notes = value
-
-
-@dataclass
-class SemapDocument:
-    subject: str = None
-    phoneNumber: int = None
-    mail: str = None
-    title: str = None
-    title_suggestions: list[str] = None
-    semester: Union[str, Semester] = None
-    books: list[Book] = None
-    eternal: bool = False
-    personName: str = None
-    personTitle: str = None
-    title_length = 0
-    title_max_length = 0
-
-    def __post_init__(self):
-        self.title_suggestions = []
-
-    @property
-    def nameSetter(self):
-        data = name_tester(self.personTitle)
-        name = f"{data['last_name']}, {data['first_name']}"
-        if data["title"] is not None:
-            title = data["title"]
-            self.personTitle = title
-        self.personName = name
-        self.title_length = len(self.title) + 3 + len(self.personName.split(",")[0])
-        if self.title_length > 40:
-            log.warning("Title is too long")
-            name_len = len(self.personName.split(",")[0])
-            self.title_max_length = 38 - name_len
-            suggestions = run_shortener(self.title, self.title_max_length)
-            for suggestion in suggestions:
-                self.title_suggestions.append(suggestion["shortened_string"])
-        else:
-            self.title_suggestions = []
-        pass
-
-    @property
-    def renameSemester(self) -> None:
-        if ", Dauer" in self.semester:
-            self.semester = self.semester.split(",")[0]
-            self.eternal = True
-            self.semester = Semester().from_string(self.semester)
-        else:
-            log.warning("Semester {} is not valid", self.semester)
-            self.semester = Semester().from_string(semester_converter(self.semester))
-
-    @property
-    def signatures(self) -> list[str]:
-        if self.books is not None:
-            return [book.signature for book in self.books if book.has_signature]
-        return []
-
-
 def word_docx_to_csv(path: str) -> list[pd.DataFrame]:
     doc = Document(path)
     tables = doc.tables
@@ -272,7 +161,7 @@ def word_to_semap(word_path: str, ai: bool = True) -> SemapDocument:
     apparatdata = df[0]
     apparatdata = apparatdata.to_dict()
     keys = list(apparatdata.keys())
-    print(apparatdata, keys)
+    # print(apparatdata, keys)
 
     appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys) - 1, 2)}
     semap.phoneNumber = appdata["Telefon:"]
@@ -309,6 +198,182 @@ def word_to_semap(word_path: str, ai: bool = True) -> SemapDocument:
     return semap
 
 
+def pdf_to_semap(pdf_path: str, ai: bool = True) -> SemapDocument:
+    """
+    Parse a Semesterapparat PDF like the sample you provided and return a SemapDocument.
+    - No external programs, only PyMuPDF.
+    - Robust to multi-line field values (e.g., hyphenated emails) and multi-line table cells.
+    - Works across multiple pages; headers only need to exist on the first page.
+    """
+    doc = fitz.open(pdf_path)
+    semap = SemapDocument()
+
+    # ---------- helpers ----------
+    def _join_tokens(tokens: list[str]) -> str:
+        """Join tokens, preserving hyphen/URL joins across line wraps."""
+        parts = []
+        for tok in tokens:
+            if parts and (
+                parts[-1].endswith("-")
+                or parts[-1].endswith("/")
+                or parts[-1].endswith(":")
+            ):
+                parts[-1] = parts[-1] + tok  # no space after '-', '/' or ':'
+            else:
+                parts.append(tok)
+        return " ".join(parts).strip()
+
+    def _extract_row_values_multiline(
+        page, labels: list[str], y_window: float = 24
+    ) -> dict[str, str]:
+        """For a row of inline labels (e.g., Name/Fach/Telefon/Mail), grab text to the right of each label."""
+        rects = []
+        for lab in labels:
+            hits = page.search_for(lab)
+            if hits:
+                rects.append((lab, hits[0]))
+        if not rects:
+            return {}
+
+        rects.sort(key=lambda t: t[1].x0)
+        words = page.get_text("words")
+        out = {}
+        for i, (lab, r) in enumerate(rects):
+            x0 = r.x1 + 1
+            x1 = rects[i + 1][1].x0 - 1 if i + 1 < len(rects) else page.rect.width - 5
+            y0 = r.y0 - 3
+            y1 = r.y0 + y_window
+            toks = [w for w in words if x0 <= w[0] <= x1 and y0 <= w[1] <= y1]
+            toks.sort(key=lambda w: (w[1], w[0]))  # line, then x
+            out[lab] = _join_tokens([w[4] for w in toks])
+        return out
+
+    def _compute_columns_from_headers(page0):
+        """Find column headers (once) and derive column centers + header baseline."""
+        headers = [
+            ("Autorenname(n):", "Autorenname(n):Nachname, Vorname"),
+            ("Jahr/Auflage", "Jahr/Auflage"),
+            ("Titel", "Titel"),
+            ("Ort und Verlag", "Ort und Verlag"),
+            ("Standnummer", "Standnummer"),
+            ("Interne Vermerke", "Interne Vermerke"),
+        ]
+        found = []
+        for label, canon in headers:
+            rects = [
+                r for r in page0.search_for(label) if r.y0 > 200
+            ]  # skip top-of-form duplicates
+            if rects:
+                found.append((canon, rects[0]))
+        found.sort(key=lambda t: t[1].x0)
+        cols = [(canon, r.x0, r.x1, (r.x0 + r.x1) / 2.0) for canon, r in found]
+        header_y = min(r.y0 for _, r in found) if found else 0
+        return cols, header_y
+
+    def _extract_table_rows_from_page(
+        page, cols, header_y, y_top_margin=5, y_bottom_margin=40, y_tol=26.0
+    ):
+        """
+        Group words into logical rows (tolerant to wrapped lines), then map each word
+        to the nearest column by x-center and join tokens per column.
+        """
+        words = [
+            w
+            for w in page.get_text("words")
+            if w[1] > header_y + y_top_margin
+            and w[3] < page.rect.height - y_bottom_margin
+        ]
+
+        # group into row bands by y (tolerance big enough to capture wrapped lines, but below next row gap)
+        rows = []
+        for w in sorted(words, key=lambda w: w[1]):
+            y = w[1]
+            for row in rows:
+                if abs(row["y_mean"] - y) <= y_tol:
+                    row["ys"].append(y)
+                    row["y_mean"] = sum(row["ys"]) / len(row["ys"])
+                    row["words"].append(w)
+                    break
+            else:
+                rows.append({"y_mean": y, "ys": [y], "words": [w]})
+
+        # map to columns + join
+        joined_rows = []
+        for row in rows:
+            rowdict = {canon: "" for canon, *_ in cols}
+            words_by_col = {canon: [] for canon, *_ in cols}
+            for w in sorted(row["words"], key=lambda w: (w[1], w[0])):
+                xmid = (w[0] + w[2]) / 2.0
+                canon = min(cols, key=lambda c: abs(xmid - c[3]))[0]
+                words_by_col[canon].append(w[4])
+            for canon, toks in words_by_col.items():
+                rowdict[canon] = _join_tokens(toks)
+            if any(v for v in rowdict.values()):
+                joined_rows.append(rowdict)
+        return joined_rows
+
+    # ---------- top-of-form fields ----------
+    p0 = doc[0]
+    row1 = _extract_row_values_multiline(
+        p0,
+        ["Ihr Name und Titel:", "Ihr Fach:", "Telefon:", "Mailadresse:"],
+        y_window=22,
+    )
+    row2 = _extract_row_values_multiline(
+        p0, ["Veranstaltung:", "Semester:"], y_window=20
+    )
+
+    name_title = row1.get("Ihr Name und Titel:", "") or ""
+    semap.subject = row1.get("Ihr Fach:", None)
+    semap.phoneNumber = row1.get("Telefon:", None)  # keep as-is (string like "682-308")
+    semap.mail = row1.get("Mailadresse:", None)
+    semap.personName = ",".join(name_title.split(",")[:-1]) if name_title else None
+    semap.personTitle = (
+        ",".join(name_title.split(",")[-1:]).strip() if name_title else None
+    )
+
+    semap.title = row2.get("Veranstaltung:", None)
+    semap.semester = row2.get("Semester:", None)
+
+    # ---------- table extraction (all pages) ----------
+    cols, header_y = _compute_columns_from_headers(p0)
+    all_rows: list[dict[str, Any]] = []
+    for pn in range(len(doc)):
+        all_rows.extend(_extract_table_rows_from_page(doc[pn], cols, header_y))
+
+    # drop the sub-header line "Nachname, Vorname" etc.
+    filtered = []
+    for r in all_rows:
+        if r.get("Autorenname(n):Nachname, Vorname", "").strip() in (
+            "",
+            "Nachname, Vorname",
+        ):
+            # skip if it's just the sub-header line
+            if all(not r[c] for c in r if c != "Autorenname(n):Nachname, Vorname"):
+                continue
+        filtered.append(r)
+
+    # build Book objects (same filters as your word parser)
+    booklist: list[Book] = []
+    for row in filtered:
+        b = Book()
+        b.from_dict(row)
+        if b.is_empty:
+            continue
+        if not b.has_signature:
+            continue
+        booklist.append(b)
+
+    semap.books = booklist
+
+    # keep parity with your post-processing
+    if ai:
+        _ = semap.renameSemester
+        _ = semap.nameSetter
+
+    return semap
+
+
 if __name__ == "__main__":
-    else_df = word_to_semap("C:/Users/aky547/Desktop/semap/db/temp/tmpzsz_hgdr.docx")
-    print(else_df)
+    else_df = pdf_to_semap("C:/Users/aky547/Dokumente/testsemap.pdf")
+    # print(else_df)
diff --git a/src/logic/xmlparser.py b/src/logic/xmlparser.py
new file mode 100644
index 0000000..e16471f
--- /dev/null
+++ b/src/logic/xmlparser.py
@@ -0,0 +1,67 @@
+import xml.etree.ElementTree as ET
+
+from src.logic.dataclass import Apparat, BookData, SemapDocument, XMLMailSubmission
+from src.logic.semester import Semester
+
+
+def parse_xml_submission(xml_string: str) -> XMLMailSubmission:
+    """
+    Parse an XML string representing a mail submission and return an XMLMailSubmission object.
+    """
+    submission = XMLMailSubmission()
+    root = ET.fromstring(xml_string)
+    static_data = root.find("static")
+    static_info = {child.tag: child.text for child in static_data}
+    books = root.find("books")
+    books_info = []
+    for book in books:
+        book_details = {detail.tag: detail.text for detail in book}
+        book = BookData(
+            author=book_details.get("authorname"),
+            year=book_details.get("year").split("/")[0]
+            if "/" in book_details.get("year")
+            else book_details.get("year"),
+            edition=book_details.get("year").split("/")[1]
+            if "/" in book_details.get("year")
+            else None,
+            title=book_details.get("title"),
+            signature=book_details.get("signature"),
+        )
+        books_info.append(book)
+    # Extract static data
+    submission.name = static_info.get("name")
+    submission.lastname = static_info.get("lastname")
+    submission.title = static_info.get("title")
+    submission.telno = int(static_info.get("telno"))
+    submission.email = static_info.get("mail")
+    submission.app_name = static_info.get("apparatsname")
+    submission.subject = static_info.get("subject")
+    sem_year = static_info.get("semester").split()[1]
+    sem_term = static_info.get("semester").split()[0]
+    submission.semester = Semester(semester=sem_term, year=int(sem_year))
+    submission.books = books_info
+    # Extract book information
+    # book_info = []
+    # for book in books:
+    #     book_details = {detail.tag: detail.text for detail in book}
+    #     book_info.append(book_details)
+    return submission
+
+
+def eml_parser(path: str) -> XMLMailSubmission:
+    with open(path, "r", encoding="utf-8") as file:
+        xml_content = file.read().split("\n\n", 1)[1]  # Skip headers
+    print("EML content loaded, parsing XML...")
+    print(xml_content)
+    return parse_xml_submission(xml_content)
+
+
+def eml_to_semap(path: str) -> SemapDocument:
+    submission = eml_parser(path)
+    semap_doc = SemapDocument(
+        # prof=Prof(name=submission.name, lastname=submission.lastname, email=submission.email),
+        apparat=Apparat(name=submission.app_name, subject=submission.subject),
+        semester=submission.semester,
+        books=submission.books,
+    )
+    return semap_doc
diff --git a/src/logic/zotero.py b/src/logic/zotero.py
index 14860e0..6c2de8b 100644
--- a/src/logic/zotero.py
+++ b/src/logic/zotero.py
@@ -1,7 +1,9 @@
-from pyzotero import zotero
 from dataclasses import dataclass
-from src.logic.webrequest import WebRequest, BibTextTransformer
+
+from pyzotero import zotero
+
 from src import settings
+from src.logic.webrequest import BibTextTransformer, WebRequest
 
 
 @dataclass
@@ -187,7 +189,7 @@ class ZoteroController:
         book = bib.return_data()
         return book
 
-    # # print(zot.item_template("bookSection"))
+    # # #print(zot.item_template("bookSection"))
     def createBook(self, isbn):
         book = self.__get_data(isbn)
 
@@ -210,7 +212,7 @@ class ZoteroController:
     def createItem(self, item):
         resp = self.zot.create_items([item])
         if "successful" in resp.keys():
-            # print(resp["successful"]["0"]["key"])
+            # #print(resp["successful"]["0"]["key"])
             return resp["successful"]["0"]["key"]
         else:
             return None
@@ -220,7 +222,7 @@ class ZoteroController:
         for item in items:
             if item["key"] == key:
                 self.zot.delete_item(item)
-                # print(item)
+                # #print(item)
                 break
 
     def createHGSection(self, book: Book, data: dict):
@@ -241,7 +243,7 @@ class ZoteroController:
         ]
         chapter.creators += authors
 
-        # print(chapter.to_dict())
+        # #print(chapter.to_dict())
         return self.createItem(chapter.to_dict())
         pass
 
@@ -257,7 +259,7 @@ class ZoteroController:
         # chapter.creators
 
     def createJournalArticle(self, journal, article):
-        # print(type(article))
+        # #print(type(article))
         journalarticle = JournalArticle()
         journalarticle.assign(journal)
         journalarticle.itemType = "journalArticle"
@@ -273,7 +275,7 @@ class ZoteroController:
         journalarticle.issue = article["issue"]
         journalarticle.url = article["isbn"]
 
-        # print(journalarticle.to_dict())
+        # #print(journalarticle.to_dict())
 
         return self.createItem(journalarticle.to_dict())
 
@@ -319,16 +321,16 @@ if __name__ == "__main__":
     # if isinstance(publishers, str):
     #     publishers = [publishers]
     # for publisher in publishers:
-    #     # print(publisher)
+    #     # #print(publisher)
     #     creator = Creator().from_string(publisher)
     #     creator.creatorType = "editor"
     #     authors.append(creator.__dict__)
 
     # chapter.creators = authors
     # chapter.publisher = book.publisher
-    # # print(chapter.to_dict())
+    # # #print(chapter.to_dict())
     # createBookSection(chapter.to_dict())
     # get_citation("9ZXH8DDE")
-    # # # print()
-    # # print(get_books())
-    # # print(zot.item_creator_types("bookSection"))
+    # # # #print()
+    # # #print(get_books())
+    # # #print(zot.item_creator_types("bookSection"))
diff --git a/src/ui/dialogs/__init__.py b/src/ui/dialogs/__init__.py
index 1940b77..f08323f 100644
--- a/src/ui/dialogs/__init__.py
+++ b/src/ui/dialogs/__init__.py
@@ -12,20 +12,21 @@ __all__ = [
     "ElsaAddEntry",
     "ApparatExtendDialog",
     "DocumentPrintDialog",
+    "NewEditionDialog",
     "Settings",
 ]
+from .about import About
+from .app_ext import ApparatExtendDialog
 from .bookdata import BookDataUI
+from .docuprint import DocumentPrintDialog
+from .elsa_add_entry import ElsaAddEntry
+from .elsa_gen_confirm import ElsaGenConfirm
 from .login import LoginDialog
 from .mail import Mail_Dialog
 from .mailTemplate import MailTemplateDialog
 from .medienadder import MedienAdder
+from .newEdition import NewEditionDialog
 from .parsed_titles import ParsedTitles
 from .popup_confirm import ConfirmDialog as popus_confirm
 from .reminder import ReminderDialog
-from .about import About
-from .elsa_gen_confirm import ElsaGenConfirm
-from .elsa_add_entry import ElsaAddEntry
-from .app_ext import ApparatExtendDialog
-from .docuprint import DocumentPrintDialog
-
 from .settings import Settings
diff --git a/src/ui/dialogs/docuprint.py b/src/ui/dialogs/docuprint.py
index 98b399d..3ffaacd 100644
--- a/src/ui/dialogs/docuprint.py
+++ b/src/ui/dialogs/docuprint.py
@@ -2,7 +2,8 @@ from natsort import natsorted
 from PySide6 import QtWidgets
 
 from src import Icon
-from src.backend import Database, Semester
+from src.backend import Database
+from src.logic import Semester
 from src.utils.richtext import SemapSchilder, SemesterDocument
 
 from .dialog_sources.documentprint_ui import Ui_Dialog
diff --git a/src/ui/dialogs/mail.py b/src/ui/dialogs/mail.py
index caaf826..51a7194 100644
--- a/src/ui/dialogs/mail.py
+++ b/src/ui/dialogs/mail.py
@@ -1,4 +1,6 @@
 import os
+import re
+import smtplib
 import sys
 
 import loguru
@@ -7,7 +9,7 @@ from PySide6 import QtWidgets
 from src import LOG_DIR, Icon
 from src import settings as config
 
-from .dialog_sources.Ui_mail_preview import Ui_eMailPreview as MailPreviewDialog
+from .dialog_sources.mail_preview_ui import Ui_eMailPreview as MailPreviewDialog
 from .mailTemplate import MailTemplateDialog
 
 log = loguru.logger
@@ -15,37 +17,61 @@ log.remove()
 log.add(sys.stdout, level="INFO")
 log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
 
+CSS_RESET = "<style>html,body{margin:0;padding:0}p{margin:0}</style>"
 
-empty_signature = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN" "http://www.w3.org/TR/REC-html40/strict.dtd">
+empty_signature = """"""
 
-    <html><head><meta name="qrichtext" content="1" /><meta charset="utf-8" /><style
-    type="text/css">
 
-    p, li { white-space: pre-wrap; }
+def _escape_braces_in_style(html: str) -> str:
+    """
+    Double curly braces ONLY inside <style>...</style> blocks so that
+    str.format(...) won't treat CSS as placeholders. The doubled braces
+    will automatically render back to single braces after formatting.
+    """
 
-    hr { height: 1px; border-width: 0; }
+    def repl(m):
+        start, css, end = m.group(1), m.group(2), m.group(3)
+        css_escaped = css.replace("{", "{{").replace("}", "}}")
+        return f"{start}{css_escaped}{end}"
 
-    li.unchecked::marker { content: "\2610"; }
+    return re.sub(
+        r"(<style[^>]*>)(.*?)(</style>)",
+        repl,
+        html,
+        flags=re.IGNORECASE | re.DOTALL,
+    )
 
-    li.checked::marker { content: "\2612"; }
 
-    </style></head><body style=" font-family:''Segoe UI''; font-size:9pt; font-weight:400;
-    font-style:normal;">
+def _split_eml_headers_body(eml_text: str) -> tuple[str, str]:
+    """
+    Return (headers, body_html). Robustly split on first blank line.
+    Accepts lines that contain only spaces/tabs as the separator.
+    """
 
-    <p style="-qt-paragraph-type:empty; margin-top:0px; margin-bottom:0px; margin-left:0px;
-    margin-right:0px; -qt-block-indent:0; text-indent:0px;"><br /></p></body></html>
-"""
+    parts = re.split(r"\r?\n[ \t]*\r?\n", eml_text, maxsplit=1)
+    if len(parts) == 2:
+        return parts[0], parts[1]
+    # Fallback: try to split right after the Content-Transfer-Encoding line
+    m = re.search(
+        r"(?:^|\r?\n)Content-Transfer-Encoding:.*?(?:\r?\n)",
+        eml_text,
+        flags=re.I | re.S,
+    )
+    if m:
+        return eml_text[: m.end()], eml_text[m.end() :]
+    return "", eml_text  # last resort: treat entire content as body
 
 
 class Mail_Dialog(QtWidgets.QDialog, MailPreviewDialog):
     def __init__(
         self,
-        app_id,
-        app_name,
-        app_subject,
-        prof_name,
-        prof_mail,
+        app_id=None,
+        app_name=None,
+        app_subject=None,
+        prof_name=None,
+        prof_mail=None,
         accepted_books=None,
+        ordered_books=None,
         parent=None,
         default_mail="Information zum Semesterapparat",
     ):
@@ -58,6 +84,7 @@ class Mail_Dialog(QtWidgets.QDialog, MailPreviewDialog):
         self.subject = app_subject
         self.profname = prof_name
         self.books = accepted_books if accepted_books is not None else []
+        self.ordered_books = ordered_books if ordered_books is not None else []
         self.mail_data = ""
         self.signature = self.determine_signature()
         self.prof_mail = prof_mail
@@ -65,52 +92,29 @@ class Mail_Dialog(QtWidgets.QDialog, MailPreviewDialog):
         self.prof_name.setText(prof_name)
         self.mail_name.setText(self.prof_mail)
         self.load_mail_templates()
-        # if none of the radio buttons is checked, disable the accept button of the dialog
         self.setWindowIcon(Icon("mail").icon)
         self.btn_okay.setEnabled(False)
         Icon("edit_note", self.newTemplate)
         self.newTemplate.clicked.connect(self.open_new_template)
 
         if default_mail is not None:
-            # get the nearest match to the default mail
             for i in range(self.comboBox.count()):
                 if default_mail in self.comboBox.itemText(i):
                     default_mail = self.comboBox.itemText(i)
                     break
         self.comboBox.setCurrentText(default_mail)
+        self.comboBox.currentIndexChanged.connect(self.set_mail)
 
+        # re-render when user changes greeting via radio buttons
         self.gender_female.clicked.connect(self.set_mail)
         self.gender_male.clicked.connect(self.set_mail)
         self.gender_non.clicked.connect(self.set_mail)
+
+        # reflect initial state (OK disabled until a greeting is chosen)
+        self._update_ok_button()
         self.btn_okay.clicked.connect(self.createAndSendMail)
 
-    def open_new_template(self):
-        log.info("Opening new template dialog")
-        # TODO: implement new mail template dialog
-        dialog = MailTemplateDialog()
-        dialog.updateSignal.connect(self.load_mail_templates)
-        dialog.exec()
-
-        pass
-
-    def determine_signature(self):
-        if config.mail.signature is empty_signature or config.mail.signature == "":
-            return """Mit freundlichen Grüßen
-Ihr Semesterapparatsteam
-Mail: semesterapparate@ph-freiburg.de
-Tel.: 0761/682-778 | 07617682-545"""
-        else:
-            return config.mail.signature
-
-    def load_mail_templates(self):
-        # print("loading mail templates")
-        log.info("Loading mail templates")
-        mail_templates = os.listdir("mail_vorlagen")
-        log.info(f"Mail templates: {mail_templates}")
-        self.comboBox.clear()
-        for template in mail_templates:
-            self.comboBox.addItem(template)
-
+    # add these helpers inside Mail_Dialog
     def get_greeting(self):
         prof = self.profname.split(" ")[0]
         if self.gender_male.isChecked():
@@ -124,45 +128,104 @@ Tel.: 0761/682-778 | 07617682-545"""
             name = f"{self.profname.split(' ')[1]} {self.profname.split(' ')[0]}"
             return f"Guten Tag {name},"
 
+    def _update_ok_button(self):
+        checked = (
+            self.gender_male.isChecked()
+            or self.gender_female.isChecked()
+            or self.gender_non.isChecked()
+        )
+        self.btn_okay.setEnabled(checked)
+
+    def _on_gender_toggled(self, checked: bool):
+        # Only refresh when a button becomes checked
+        if checked:
+            self.set_mail()
+
+    def open_new_template(self):
+        log.info("Opening new template dialog")
+        dialog = MailTemplateDialog()
+        dialog.updateSignal.connect(self.load_mail_templates)
+        dialog.exec()
+
+    def determine_signature(self):
+        # use equality, not identity
+        if (
+            config.mail.signature == empty_signature
+            or config.mail.signature.strip() == ""
+        ):
+            return """Mit freundlichen Grüßen
+Ihr Semesterapparatsteam
+Mail: semesterapparate@ph-freiburg.de
+Tel.: 0761/682-778 | 0761/682-545"""
+        else:
+            return config.mail.signature
+
+    def load_mail_templates(self):
+        log.info("Loading mail templates")
+        mail_templates = [
+            f for f in os.listdir("mail_vorlagen") if f.lower().endswith(".eml")
+        ]
+        log.info(f"Mail templates: {mail_templates}")
+        self.comboBox.clear()
+        for template in mail_templates:
+            self.comboBox.addItem(template)
+
     def set_mail(self):
         log.info("Setting mail")
+        self._update_ok_button()  # keep OK enabled state in sync
+
         email_template = self.comboBox.currentText()
-        if email_template == "":
+        if not email_template:
             log.debug("No mail template selected")
             return
+
         with open(f"mail_vorlagen/{email_template}", "r", encoding="utf-8") as f:
-            mail_template = f.read()
+            eml_text = f.read()
+
+        # header label for UI (unchanged)
         email_header = email_template.split(".eml")[0]
         if "{AppNr}" in email_template:
-            email_header = email_template.split(".eml")[0]
-        email_header = email_header.format(AppNr=self.appid, AppName=self.appname)
+            email_header = email_header.format(AppNr=self.appid, AppName=self.appname)
         self.mail_header.setText(email_header)
-        self.mail_data = mail_template.split("<html>")[0]
-        mail_html = mail_template.split("<html>")[1]
-        mail_html = "<html>" + mail_html
-        Appname = self.appname
-        mail_html = mail_html.format(
-            Profname=self.profname.split(" ")[0],
-            Appname=Appname,
-            AppNr=self.appid,
-            AppSubject=self.subject,
-            greeting=self.get_greeting(),
-            signature=self.signature,
-            newEditions="<br>".join(
-                [
-                    f"{book.title} von {book.author} (ISBN: {book.isbn}, Auflage: {book.edition}, In Bibliothek: {'ja' if getattr(book, 'library_location', 1) == 1 else 'nein'})"
-                    for book in self.books
-                ]
-            )
-            if self.books
-            else "keine neuen Auflagen gefunden",
-        )
 
-        self.mail_body.setHtml(mail_html)
+        headers, body_html = _split_eml_headers_body(eml_text)
+        body_html = _escape_braces_in_style(body_html)
+
+        # compute greeting from the current toggle selection
+        greeting = self.get_greeting()
+
+        try:
+            body_html = body_html.format(
+                Profname=self.profname.split(" ")[
+                    0
+                ],  # last name if your template uses {Profname}
+                Appname=self.appname,
+                AppNr=self.appid,
+                AppSubject=self.subject,
+                greeting=greeting,
+                signature=self.signature,
+                newEditions="\n".join(
+                    [
+                        f"- {book.title} (ISBN: {','.join(book.isbn)}, Auflage: {book.edition if book.edition else 'nicht bekannt'}, In Bibliothek: {'ja' if getattr(book, 'signature', None) is not None and 'Handbibliothek' not in str(book.library_location) else 'nein'}, Typ: {book.get_book_type()}) Aktuelle Auflage: {book.old_book.edition if book.old_book and book.old_book.edition else 'nicht bekannt'}"
+                        for book in (self.books or [])
+                    ]
+                )
+                if self.books
+                else "keine neuen Auflagen gefunden",
+                newEditionsOrdered="\n".join(
+                    [
+                        f" - {book.title}, ISBN: {','.join(book.isbn)}, Bibliotheksstandort : {book.library_location if book.library_location else 'N/A'}, Link: {book.link}"
+                        for book in (self.ordered_books or [])
+                    ]
+                ),
+            )
+        except Exception as e:
+            log.error(f"Template formatting failed: {e}")
+
+        self.mail_body.setPlainText(body_html)
 
     def createAndSendMail(self):
         log.info("Sending mail")
-        import smtplib
         from email.mime.multipart import MIMEMultipart
         from email.mime.text import MIMEText
 
@@ -176,31 +239,29 @@ Tel.: 0761/682-778 | 07617682-545"""
         message["From"] = sender_email
         message["To"] = self.prof_mail
         message["Subject"] = self.mail_header.text()
-        # include a Fcc to the senders sent folder
-        message["cc"] = "semesterapparate@ph-freiburg.de"
+        message["Cc"] = "semesterapparate@ph-freiburg.de"
+
+        mail_body = self.mail_body.toPlainText()
+        #         strange_string = """p, li { white-space: pre-wrap; }
+        # hr { height: 1px; border-width: 0; }
+        # li.unchecked::marker { content: "\2610"; }
+        # li.checked::marker { content: "\2612"; }
+        # """
+        #         mail_body.replace(strange_string, "")
+        message.attach(MIMEText(mail_body, "Plain", "utf-8"))
 
-        mail_body = self.mail_body.toHtml()
-        message.attach(MIMEText(mail_body, "html"))
         mail = message.as_string()
 
         with smtplib.SMTP_SSL(smtp_server, port) as server:
-            server.connect(smtp_server, port)
-            # server.connect(smtp_server, port)
-            # server.auth(mechanism="PLAIN")
+            server.connect(smtp_server, port)  # not needed for SMTP_SSL
             if config.mail.use_user_name is True:
-                # print(config["mail"]["user_name"])
-
                 server.login(config.mail.user_name, password)
             else:
                 server.login(sender_email, password)
             server.sendmail(sender_email, tolist, mail)
-
-            # print("Mail sent")
-            # end active process
             server.quit()
+            pass
         log.info("Mail sent, closing connection to server and dialog")
-        # close the dialog
-
         self.accept()
 
 
@@ -225,8 +286,6 @@ def launch_gui(
 
 
 if __name__ == "__main__":
-    import sys
-
     app = QtWidgets.QApplication(sys.argv)
     Dialog = QtWidgets.QDialog()
     ui = Mail_Dialog()
diff --git a/src/ui/dialogs/progress.py b/src/ui/dialogs/progress.py
index 470a94b..3d83f26 100644
--- a/src/ui/dialogs/progress.py
+++ b/src/ui/dialogs/progress.py
@@ -7,7 +7,7 @@ from qtqdm import Qtqdm, QtqdmProgressBar
 
 from src.logic import BookData
 from src.logic.lehmannsapi import LehmannsClient
-from src.logic.swb import SWB
+from src.logic.SRU import SWB
 
 
 class CheckThread(QtCore.QThread):
diff --git a/src/ui/semesterapparat_ui.ui b/src/ui/semesterapparat_ui.ui
index 053d3d1..76f210e 100644
--- a/src/ui/semesterapparat_ui.ui
+++ b/src/ui/semesterapparat_ui.ui
@@ -250,7 +250,7 @@
               <rect>
                <x>0</x>
                <y>180</y>
-               <width>1261</width>
+               <width>1412</width>
                <height>511</height>
               </rect>
              </property>
@@ -275,11 +275,30 @@
                 </item>
                 <item>
                  <widget class="QCheckBox" name="chkbx_show_del_media">
+                  <property name="enabled">
+                   <bool>false</bool>
+                  </property>
+                  <property name="toolTip">
+                   <string>WIP - Broken</string>
+                  </property>
                   <property name="text">
                    <string>gel. Medien anzeigen</string>
                   </property>
                  </widget>
                 </item>
+                <item>
+                 <widget class="QCheckBox" name="chkbx_show_only_wit_neweditions">
+                  <property name="enabled">
+                   <bool>false</bool>
+                  </property>
+                  <property name="toolTip">
+                   <string>WIP - Broken</string>
+                  </property>
+                  <property name="text">
+                   <string>Nur Titel mit Neuauflagen anzeigen</string>
+                  </property>
+                 </widget>
+                </item>
                 <item>
                  <spacer name="horizontalSpacer_3">
                   <property name="orientation">
@@ -298,8 +317,11 @@
                 </item>
                 <item>
                  <widget class="QPushButton" name="btn_reserve">
+                  <property name="toolTip">
+                   <string>Dieser Knopf prüft alle Werke, die mit einem roten X vermerkt sind. Sollten diese inzwischen im Apparat sein, wird dies aktualisiert</string>
+                  </property>
                   <property name="text">
-                   <string>im Apparat?</string>
+                   <string>Medien mit ❌ im Apparat?</string>
                   </property>
                  </widget>
                 </item>
diff --git a/src/ui/semesterapparat_ui_ui.py b/src/ui/semesterapparat_ui_ui.py
index 844c5b9..e62f60c 100644
--- a/src/ui/semesterapparat_ui_ui.py
+++ b/src/ui/semesterapparat_ui_ui.py
@@ -157,7 +157,7 @@ class Ui_MainWindow(object):
         self.gridLayoutWidget_2 = QWidget(self.createApparat)
         self.gridLayoutWidget_2.setObjectName(u"gridLayoutWidget_2")
         self.gridLayoutWidget_2.setEnabled(True)
-        self.gridLayoutWidget_2.setGeometry(QRect(0, 180, 1261, 511))
+        self.gridLayoutWidget_2.setGeometry(QRect(0, 180, 1412, 511))
         self.gridLayout_2 = QGridLayout(self.gridLayoutWidget_2)
         self.gridLayout_2.setObjectName(u"gridLayout_2")
         self.gridLayout_2.setContentsMargins(0, 0, 0, 0)
@@ -169,9 +169,16 @@ class Ui_MainWindow(object):
 
         self.chkbx_show_del_media = QCheckBox(self.gridLayoutWidget_2)
         self.chkbx_show_del_media.setObjectName(u"chkbx_show_del_media")
+        self.chkbx_show_del_media.setEnabled(False)
 
         self.horizontalLayout_5.addWidget(self.chkbx_show_del_media)
 
+        self.chkbx_show_only_wit_neweditions = QCheckBox(self.gridLayoutWidget_2)
+        self.chkbx_show_only_wit_neweditions.setObjectName(u"chkbx_show_only_wit_neweditions")
+        self.chkbx_show_only_wit_neweditions.setEnabled(False)
+
+        self.horizontalLayout_5.addWidget(self.chkbx_show_only_wit_neweditions)
+
         self.horizontalSpacer_3 = QSpacerItem(40, 20, QSizePolicy.Policy.Fixed, QSizePolicy.Policy.Minimum)
 
         self.horizontalLayout_5.addItem(self.horizontalSpacer_3)
@@ -880,8 +887,18 @@ class Ui_MainWindow(object):
         ___qtablewidgetitem4.setText(QCoreApplication.translate("MainWindow", u"Dauerapparat", None));
         ___qtablewidgetitem5 = self.tableWidget_apparate.horizontalHeaderItem(5)
         ___qtablewidgetitem5.setText(QCoreApplication.translate("MainWindow", u"KontoNr", None));
+#if QT_CONFIG(tooltip)
+        self.chkbx_show_del_media.setToolTip(QCoreApplication.translate("MainWindow", u"WIP - Broken", None))
+#endif // QT_CONFIG(tooltip)
         self.chkbx_show_del_media.setText(QCoreApplication.translate("MainWindow", u"gel. Medien anzeigen", None))
-        self.btn_reserve.setText(QCoreApplication.translate("MainWindow", u"im Apparat?", None))
+#if QT_CONFIG(tooltip)
+        self.chkbx_show_only_wit_neweditions.setToolTip(QCoreApplication.translate("MainWindow", u"WIP - Broken", None))
+#endif // QT_CONFIG(tooltip)
+        self.chkbx_show_only_wit_neweditions.setText(QCoreApplication.translate("MainWindow", u"Nur Titel mit Neuauflagen anzeigen", None))
+#if QT_CONFIG(tooltip)
+        self.btn_reserve.setToolTip(QCoreApplication.translate("MainWindow", u"Dieser Knopf pr\u00fcft alle Werke, die mit einem roten X vermerkt sind. Sollten diese inzwischen im Apparat sein, wird dies aktualisiert", None))
+#endif // QT_CONFIG(tooltip)
+        self.btn_reserve.setText(QCoreApplication.translate("MainWindow", u"Medien mit \u274c im Apparat?", None))
         self.label_info.setText(QCoreApplication.translate("MainWindow", u"Medien werden hinzugef\u00fcgt", None))
         self.progress_label.setText(QCoreApplication.translate("MainWindow", u"Medium x/y", None))
         self.label_20.setText(QCoreApplication.translate("MainWindow", u"Medien werden gepr\u00fcft", None))
diff --git a/src/ui/userInterface.py b/src/ui/userInterface.py
index 47488c9..5c4ef19 100644
--- a/src/ui/userInterface.py
+++ b/src/ui/userInterface.py
@@ -26,7 +26,6 @@ from src.backend import (
 )
 from src.backend.create_file import recreateFile
 from src.backend.delete_temp_contents import delete_temp_contents as tempdelete
-from src.backend.semester import Semester
 from src.logic import (
     APP_NRS,
     Apparat,
@@ -34,7 +33,9 @@ from src.logic import (
     BookData,
     Prof,
     SemapDocument,
+    Semester,
     csv_to_list,
+    eml_to_semap,
     pdf_to_semap,
     word_to_semap,
 )
@@ -207,6 +208,7 @@ class Ui(QtWidgets.QMainWindow, Ui_Semesterapparat):
         self.progressBar.setMinimum(0)
         self.avail_status.hide()
         self.chkbx_show_del_media.hide()
+        self.chkbx_show_only_wit_neweditions.hide()
         self.automation_add_selected_books.hide()
         # self.btn_del_select_apparats.setEnabled(False)
 
@@ -896,7 +898,7 @@ class Ui(QtWidgets.QMainWindow, Ui_Semesterapparat):
         )
 
         # thread = QThread()
-        appnumber = self.active_apparat
+        appnumber = self.drpdwn_app_nr.currentText()
         # #log.debug(links)
         self.availChecker = AvailChecker(links, appnumber, books=books)
         # availcheck.moveToThread(thread)
@@ -939,16 +941,14 @@ class Ui(QtWidgets.QMainWindow, Ui_Semesterapparat):
         self.validate_semester()
 
     def update_app_media_list(self):
-        deleted = 0 if not self.chkbx_show_del_media.isChecked() else 1
         app_id = self.db.getId(self.app_name.text())
         prof_id = self.db.getProfId(self.profdata)
-        books: list[dict[int, BookData, int]] = self.db.getBooks(
-            app_id, prof_id, deleted
-        )
+        books: list[dict[int, BookData, int]] = self.db.getBooks(app_id, prof_id, 0)
 
         # # #log.debug(books)
         # take the dataclass from the tuple
         # booklist:list[BookData]=[book[0] for book in books]
+        self.tableWidget_apparat_media.clearContents()
         self.tableWidget_apparat_media.setRowCount(0)
         for book in books:
             book["id"]
@@ -1198,6 +1198,8 @@ class Ui(QtWidgets.QMainWindow, Ui_Semesterapparat):
                         self.db.addBookToDatabase(
                             bookdata=book, app_id=app_id, prof_id=prof_id
                         )
+                if file_type == "eml":
+                    data = eml_to_semap(file)
                 self.update_app_media_list()
                 # #log.debug(len(signatures))
 
@@ -1590,8 +1592,8 @@ class Ui(QtWidgets.QMainWindow, Ui_Semesterapparat):
         mail_data = {
             "prof_name": "Erwerbung",
             "prof_mail": "erw@ph-freiburg.de",
-            "app_id": app_nr,
-            "app_name": self.db.getApparatName(app_id, prof_id),
+            "app_nr": app_nr,
+            "app_name": self.db.getApparatName(app_nr, prof_id),
         }
         orderDialog = NewEditionDialog(app_id, mail_data)
         orderDialog.exec()
@@ -1672,12 +1674,13 @@ WHERE m.id = ?""",
         newEditionChecker.exec()
 
         accepted_books = newEditionChecker.accepted_books
-        print(accepted_books)
         if accepted_books == []:
             return
         for book in accepted_books:
             oldBookId = self.db.getBookIdByPPN(book.old_book.ppn)
-            
+            apparats_id = self.db.getId(
+                self.db.getApparatNameByAppNr(book.old_book.library_location)
+            )
             self.db.insertNewEdition(book, oldBookId, apparats_id)
             pass
 
@@ -1763,11 +1766,17 @@ WHERE m.id = ?""",
         apparat_add_action = QtGui.QAction("Zum Apparat hinzufügen")
         apparat_move_action = QtGui.QAction("In Apparat verschieben")
         apparat_copy_action = QtGui.QAction("In Apparat kopieren")
+        replace_old_editions = QtGui.QAction("Neuauflagen ersetzen")
 
         apparatmenu = menu.addMenu("Apparate")
         generalmenu = menu.addMenu("Allgemeines")
         apparatmenu.addActions(  # type: ignore
-            [apparat_add_action, apparat_copy_action, apparat_move_action]
+            [
+                apparat_add_action,
+                apparat_copy_action,
+                apparat_move_action,
+                replace_old_editions,
+            ]
         )
         generalmenu.addActions([edit_action, delete_action, update_data_action])  # type: ignore
         # disable apparat_add_action
@@ -1778,8 +1787,37 @@ WHERE m.id = ?""",
         apparat_copy_action.triggered.connect(self.copy_to_apparat)  # type: ignore
         apparat_move_action.triggered.connect(self.move_to_apparat)  # type: ignore
         update_data_action.triggered.connect(self.update_data)  # type: ignore
+        replace_old_editions.triggered.connect(self.replace_old_edition)  # type: ignore
         menu.exec(self.tableWidget_apparat_media.mapToGlobal(position))  # type: ignore
 
+    def replace_old_edition(self):
+        # open dialog
+        dialog = QtWidgets.QDialog()
+        dialog.setWindowTitle("Neuauflagen:")
+        layout = QtWidgets.QVBoxLayout()
+        label = QtWidgets.QLabel("Folgende Medien haben Neuauflagen:")
+        layout.addWidget(label)
+        table = QtWidgets.QTableWidget()
+        table.setColumnCount(4)
+        table.setHorizontalHeaderLabels(["Titel", "Auflage", "Signatur", "Neues Werk"])
+        table.horizontalHeader().setStretchLastSection(True)
+        new_editions = self.db.getBooksWithNewEditions(
+            self.active_apparat,
+        )
+        for book in new_editions:
+            table.insertRow(0)
+            table.setItem(0, 0, QtWidgets.QTableWidgetItem(book[0].title))
+            table.setItem(0, 1, QtWidgets.QTableWidgetItem(str(book[0].edition)))
+            table.setItem(0, 2, QtWidgets.QTableWidgetItem(book[0].signature))
+            new_ed_data = (
+                f"{book[1].title} (Auflage {book[1].edition}, {book[1].signature})"
+            )
+            table.setItem(0, 3, QtWidgets.QTableWidgetItem(new_ed_data))
+
+        layout.addWidget(table)
+        dialog.setLayout(layout)
+        dialog.exec()
+
     def update_data(self):
         signatures = [
             self.tableWidget_apparat_media.item(row, 1).text()
diff --git a/src/ui/widgets/admin_query.py b/src/ui/widgets/admin_query.py
index 5c933f4..c4d6fe1 100644
--- a/src/ui/widgets/admin_query.py
+++ b/src/ui/widgets/admin_query.py
@@ -1,9 +1,10 @@
-from .widget_sources.admin_query_ui import Ui_Form
+from PySide6 import QtCore, QtWidgets
 
-from PySide6 import QtWidgets, QtCore
 from src import Icon
 from src.backend import Database
 
+from .widget_sources. import Ui_Form
+
 
 class AdminQueryWidget(QtWidgets.QWidget, Ui_Form):
     def __init__(self, parent=None):
@@ -22,7 +23,7 @@ class AdminQueryWidget(QtWidgets.QWidget, Ui_Form):
             return
 
         data = self.db.query_db(request_text)
-        print(data)
+        # print(data)
         table_names = (
             request_text.lower().split("select")[1].split("from")[0].split(",")
         )
diff --git a/src/ui/widgets/elsa_main.py b/src/ui/widgets/elsa_main.py
index c56e7ec..5bb5fe0 100644
--- a/src/ui/widgets/elsa_main.py
+++ b/src/ui/widgets/elsa_main.py
@@ -7,8 +7,8 @@ from PySide6.QtCore import QDate
 from PySide6.QtGui import QRegularExpressionValidator
 
 from src import LOG_DIR, Icon
-from src.backend import Database, Semester, recreateElsaFile
-from src.logic import Prof, elsa_word_to_csv
+from src.backend import Database, recreateElsaFile
+from src.logic import Prof, Semester, elsa_word_to_csv
 from src.ui.dialogs import ElsaAddEntry, popus_confirm
 from src.ui.widgets.filepicker import FilePicker
 from src.ui.widgets.graph import DataQtGraph
diff --git a/src/ui/widgets/graph.py b/src/ui/widgets/graph.py
index 4be10ba..2960ecd 100644
--- a/src/ui/widgets/graph.py
+++ b/src/ui/widgets/graph.py
@@ -8,7 +8,7 @@ from PySide6.QtCharts import QCategoryAxis, QChart, QChartView, QLineSeries, QVa
 from PySide6.QtGui import QColor, QPainter, QPen
 
 from src import LOG_DIR
-from src.backend.semester import Semester
+from src.logic.semester import Semester
 
 log = loguru.logger
 log.remove()
diff --git a/src/ui/widgets/searchPage.py b/src/ui/widgets/searchPage.py
index 63992a0..2c0ad86 100644
--- a/src/ui/widgets/searchPage.py
+++ b/src/ui/widgets/searchPage.py
@@ -1,4 +1,5 @@
 import sys
+from typing import List
 
 import loguru
 from natsort import natsorted
@@ -6,8 +7,9 @@ from PySide6 import QtCore, QtGui, QtWidgets
 from PySide6.QtCore import Signal
 
 from src import LOG_DIR
-from src.backend import Database, Semester
-from src.logic import BookData, Prof, custom_sort, sort_semesters_list
+from src.backend import Database
+from src.logic import BookData, Prof, Semester, custom_sort, sort_semesters_list
+from src.logic.dataclass import Apparat
 from src.ui.dialogs import ApparatExtendDialog, Mail_Dialog, ReminderDialog
 from src.ui.widgets import DataQtGraph, StatusWidget
 from src.ui.widgets.signature_update import UpdaterThread
@@ -343,8 +345,7 @@ class SearchStatisticPage(QtWidgets.QDialog, Ui_Dialog):
         apparats = natsorted(appnrs)
         apparats = [str(apparat) for apparat in apparats]
         self.box_appnrs.addItems(apparats)
-        persons = self.db.getProfs()
-        persons = sorted(persons, key=lambda x: x.lastname)
+        persons: List[Prof] = sorted(self.db.getProfs(), key=lambda x: x.lastname)
         self.box_person.addItems(
             [f"{person.lastname}, {person.firstname}" for person in persons]
         )
@@ -398,7 +399,12 @@ class SearchStatisticPage(QtWidgets.QDialog, Ui_Dialog):
         selected_apparat_rows = []
         for i in range(self.tableWidget.rowCount()):
             if self.tableWidget.cellWidget(i, 0).isChecked():
-                selected_apparats.append(self.tableWidget.item(i, 2).text())
+                selected_apparats.append(
+                    Apparat(
+                        appnr=self.tableWidget.item(i, 2).text(),
+                        name=self.tableWidget.item(i, 1).text(),
+                    )
+                )
                 selected_apparat_rows.append(i)
         # delete all selected apparats
         # # ##print(selected_apparats)
diff --git a/src/ui/widgets/signature_update.py b/src/ui/widgets/signature_update.py
index 135eeae..63a88db 100644
--- a/src/ui/widgets/signature_update.py
+++ b/src/ui/widgets/signature_update.py
@@ -8,7 +8,7 @@ from PySide6.QtMultimedia import QAudioOutput, QMediaPlayer
 from src import LOG_DIR
 from src.backend.catalogue import Catalogue
 from src.backend.database import Database
-from src.logic.swb import SWB
+from src.logic.SRU import SWB
 
 from .widget_sources.admin_update_signatures_ui import Ui_Dialog