minor and major reworks: rename swb to SRU, add a test for pdf parsing

major: rework mail to send mail as plaintext instead of html, preventing the bleed-in of html text
This commit is contained in:
2025-10-07 14:15:10 +02:00
parent 0df7fd9fe6
commit 06965db26a
25 changed files with 1174 additions and 303 deletions

View File

@@ -1,6 +1,5 @@
__all__ = [ __all__ = [
"AdminCommands", "AdminCommands",
"Semester",
"AutoAdder", "AutoAdder",
"AvailChecker", "AvailChecker",
"BookGrabber", "BookGrabber",
@@ -9,16 +8,15 @@ __all__ = [
"NewEditionCheckerThread", "NewEditionCheckerThread",
"recreateElsaFile", "recreateElsaFile",
"recreateFile", "recreateFile",
"Catalogue" "Catalogue",
] ]
from .admin_console import AdminCommands from .admin_console import AdminCommands
from .catalogue import Catalogue
from .create_file import recreateElsaFile, recreateFile from .create_file import recreateElsaFile, recreateFile
from .database import Database from .database import Database
from .documentation_thread import DocumentationThread from .documentation_thread import DocumentationThread
from .semester import Semester
from .thread_bookgrabber import BookGrabber from .thread_bookgrabber import BookGrabber
from .thread_neweditions import NewEditionCheckerThread from .thread_neweditions import NewEditionCheckerThread
from .threads_autoadder import AutoAdder from .threads_autoadder import AutoAdder
from .threads_availchecker import AvailChecker from .threads_availchecker import AvailChecker
from .catalogue import Catalogue

View File

@@ -2,6 +2,7 @@ import sys
from datetime import datetime from datetime import datetime
import loguru import loguru
import regex
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@@ -24,7 +25,7 @@ log.add(
class Catalogue: class Catalogue:
def __init__(self, timeout=5): def __init__(self, timeout=15):
self.timeout = timeout self.timeout = timeout
reachable = self.check_connection() reachable = self.check_connection()
if not reachable: if not reachable:
@@ -61,8 +62,8 @@ class Catalogue:
links = self.get_book_links(searchterm) links = self.get_book_links(searchterm)
print(links) print(links)
for link in links: for elink in links:
result = self.search(link) result = self.search(elink)
# in result search for class col-xs-12 rds-dl RDS_LOCATION # in result search for class col-xs-12 rds-dl RDS_LOCATION
# if found, return text of href # if found, return text of href
soup = BeautifulSoup(result, "html.parser") soup = BeautifulSoup(result, "html.parser")
@@ -74,8 +75,45 @@ class Catalogue:
ppn_el = soup.find( ppn_el = soup.find(
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PPN" "div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PPN"
) )
ppn = ppn_el.get_text(strip=True) if ppn_el else None # in ppn_el, get text of div col-xs-12 col-md-7 col-lg-8 rds-dl-panel
ppn = (
ppn_el.find_next_sibling(
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
).get_text(strip=True)
if ppn_el
else None
)
# get edition text at div class col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_EDITION
edition_el = soup.find(
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_EDITION"
)
edition = (
edition_el.find_next_sibling(
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
).get_text(strip=True)
if edition_el
else None
)
authors = soup.find_all(
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON"
)
author = None
if authors:
# get the names of the a href links in the div col-xs-12 col-md-7 col-lg-8 rds-dl-panel
author_names = []
for author in authors:
panel = author.find_next_sibling(
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
)
if panel:
links = panel.find_all("a")
for link in links:
author_names.append(link.text.strip())
author = (
";".join(author_names) if len(author_names) > 1 else author_names[0]
)
signature = None signature = None
panel = soup.select_one("div.panel-body") panel = soup.select_one("div.panel-body")
@@ -121,4 +159,147 @@ class Catalogue:
title=title, title=title,
ppn=ppn, ppn=ppn,
signature=signature, signature=signature,
library_location=loc.split("-")[-1],
link=elink,
author=author,
edition=edition,
) )
else:
return Book(
title=title,
ppn=ppn,
signature=signature,
library_location=loc.split("\n\n")[-1],
link=elink,
author=author,
edition=edition,
)
def get(self, ppn: str) -> Book | None:
# based on PPN, get title, people, edition, year, language, pages, isbn,
link = f"https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{ppn}"
result = self.search(link)
soup = BeautifulSoup(result, "html.parser")
def get_ppn(self, searchterm: str) -> str | None:
links = self.get_book_links(searchterm)
ppn = None
for link in links:
result = self.search(link)
soup = BeautifulSoup(result, "html.parser")
print(link)
ppn = link.split("/")[-1]
if ppn and regex.match(r"^\d{8,10}[X\d]?$", ppn):
return ppn
return ppn
def get_semesterapparat_number(self, searchterm: str) -> int:
links = self.get_book_links(searchterm)
for link in links:
result = self.search(link)
# in result search for class col-xs-12 rds-dl RDS_LOCATION
# if found, return text of href
soup = BeautifulSoup(result, "html.parser")
locations = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION")
for location_el in locations:
if "Semesterapparat-" in location_el.text:
match = regex.search(r"Semesterapparat-(\d+)", location_el.text)
if match:
return int(match.group(1))
if "Handbibliothek-" in location_el.text:
return location_el.text.strip().split("\n\n")[-1].strip()
return location_el.text.strip().split("\n\n")[-1].strip()
return 0
def get_author(self, link: str) -> str:
links = self.get_book_links(f"kid:{link}")
author = None
for link in links:
# print(link)
result = self.search(link)
soup = BeautifulSoup(result, "html.parser")
# get all authors, return them as a string seperated by ;
authors = soup.find_all(
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON"
)
if authors:
# get the names of the a href links in the div col-xs-12 col-md-7 col-lg-8 rds-dl-panel
author_names = []
for author in authors:
panel = author.find_next_sibling(
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
)
if panel:
links = panel.find_all("a")
for link in links:
author_names.append(link.text.strip())
author = "; ".join(author_names)
return author
def get_signature(self, isbn: str):
links = self.get_book_links(f"{isbn}")
signature = None
for link in links:
result = self.search(link)
soup = BeautifulSoup(result, "html.parser")
panel = soup.select_one("div.panel-body")
if panel:
# Collect the RDS_* blocks in order, using the 'space' divs as separators
groups = []
cur = {}
for node in panel.select(
"div.rds-dl.RDS_SIGNATURE, div.rds-dl.RDS_STATUS, div.rds-dl.RDS_LOCATION, div.col-xs-12.space"
):
classes = node.get("class", [])
# Separator between entries
if "space" in classes:
if cur:
groups.append(cur)
cur = {}
continue
# Read the value from the corresponding panel cell
val_el = node.select_one(".rds-dl-panel")
val = (
val_el.get_text(" ", strip=True)
if val_el
else node.get_text(" ", strip=True)
)
if "RDS_SIGNATURE" in classes:
cur["signature"] = val
elif "RDS_STATUS" in classes:
cur["status"] = val
elif "RDS_LOCATION" in classes:
cur["location"] = val
if cur: # append the last group if not followed by a space
groups.append(cur)
# Find the signature for the entry whose location mentions "Semesterapparat"
for g in groups:
print(g)
loc = g.get("location", "").lower()
if "semesterapparat" in loc:
signature = g.get("signature")
return signature
else:
signature = g.get("signature")
return signature
print("No signature found")
return signature
def in_library(self, ppn: str) -> bool:
if ppn is None:
return False
links = self.get_book_links(f"kid:{ppn}")
return len(links) > 0
def get_location(self, ppn: str) -> str | None:
if ppn is None:
return None
link = self.get_book(f"{ppn}")
if link is None:
return None
return link.library_location

View File

@@ -30,10 +30,9 @@ from src.backend.db import (
from src.errors import AppPresentError, NoResultError from src.errors import AppPresentError, NoResultError
from src.logic import ELSA, Apparat, ApparatData, BookData, Prof from src.logic import ELSA, Apparat, ApparatData, BookData, Prof
from src.logic.constants import SEMAP_MEDIA_ACCOUNTS from src.logic.constants import SEMAP_MEDIA_ACCOUNTS
from src.logic.semester import Semester
from src.utils.blob import create_blob from src.utils.blob import create_blob
from .semester import Semester
log = loguru.logger log = loguru.logger
log.remove() log.remove()
log.add(sys.stdout, level="INFO") log.add(sys.stdout, level="INFO")
@@ -1873,7 +1872,7 @@ class Database:
Returns: Returns:
list[tuple]: A list of tuples containing the new editions data list[tuple]: A list of tuples containing the new editions data
""" """
query = "SELECT * FROM neweditions WHERE for_apparat=?" query = "SELECT * FROM neweditions WHERE for_apparat=? AND ordered=0"
results = self.query_db(query, (apparat_id,)) results = self.query_db(query, (apparat_id,))
res = [] res = []
for result in results: for result in results:
@@ -1887,9 +1886,25 @@ class Database:
query = "UPDATE neweditions SET ordered=1 WHERE id=?" query = "UPDATE neweditions SET ordered=1 WHERE id=?"
self.query_db(query, (newBook_id,)) self.query_db(query, (newBook_id,))
def getBooksWithNewEditions(self, app_id) -> List[BookData]:
# select all bookdata from media, based on the old_edition_id in neweditions where for_apparat = app_id; also get the new_edition bookdata
query = "SELECT m.bookdata, new_bookdata FROM media m JOIN neweditions n ON m.id = n.old_edition_id WHERE n.for_apparat = ?"
results = self.query_db(query, (app_id,))
# store results in tuple old,new
res = []
for result in results:
oldedition = BookData().from_string(result[0])
newedition = BookData().from_string(result[1])
res.append((oldedition, newedition))
return res
def getNewEditionId(self, newBook: BookData): def getNewEditionId(self, newBook: BookData):
query = "SELECT id FROM neweditions WHERE new_bookdata=?" query = "SELECT id FROM neweditions WHERE new_bookdata LIKE ?"
params = (newBook.to_dict,) args = (
newBook.isbn[0] if newBook.isbn and len(newBook.isbn) > 0 else newBook.ppn
)
params = (f"%{args}%",)
data = self.query_db(query, params, one=True) data = self.query_db(query, params, one=True)
if data: if data:
return data[0] return data[0]
@@ -1897,6 +1912,14 @@ class Database:
return None return None
def insertNewEdition(self, newBook: BookData, oldBookId: int, for_apparat: int): def insertNewEdition(self, newBook: BookData, oldBookId: int, for_apparat: int):
# check if new edition already in table, check based on newBook.ppn
check_query = "SELECT id FROM neweditions WHERE new_bookdata LIKE ?"
check_params = (f"%{newBook.ppn}%",)
data = self.query_db(check_query, check_params, one=True)
if data:
log.info("New edition already in table, skipping insert")
return
query = "INSERT INTO neweditions (new_bookdata, old_edition_id, for_apparat) VALUES (?,?,?)" query = "INSERT INTO neweditions (new_bookdata, old_edition_id, for_apparat) VALUES (?,?,?)"
params = (newBook.to_dict, oldBookId, for_apparat) params = (newBook.to_dict, oldBookId, for_apparat)

View File

@@ -2,6 +2,7 @@ import sys
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from dataclasses import dataclass, field from dataclasses import dataclass, field
from datetime import datetime from datetime import datetime
from enum import Enum
from typing import Dict, Iterable, List, Optional, Tuple from typing import Dict, Iterable, List, Optional, Tuple
import loguru import loguru
@@ -97,7 +98,7 @@ def _text(elem: Optional[ET.Element]) -> str:
def _req_text(parent: ET.Element, path: str) -> str: def _req_text(parent: ET.Element, path: str) -> str:
el = parent.find(path, NS) el = parent.find(path, NS)
if el is None or el.text is None: if el is None or el.text is None:
raise ValueError(f"Required element not found or empty: {path}") return None
return el.text return el.text
@@ -188,7 +189,7 @@ def parse_search_retrieve_response(xml_str: str) -> SearchRetrieveResponse:
# Root is zs:searchRetrieveResponse # Root is zs:searchRetrieveResponse
version = _req_text(root, "zs:version") version = _req_text(root, "zs:version")
numberOfRecords = int(_req_text(root, "zs:numberOfRecords")) numberOfRecords = int(_req_text(root, "zs:numberOfRecords") or "0")
records_parent = root.find("zs:records", NS) records_parent = root.find("zs:records", NS)
records: List[Record] = [] records: List[Record] = []
@@ -408,8 +409,12 @@ def book_from_marc(rec: MarcRecord) -> BookData:
rec, "264", "c" rec, "264", "c"
) )
isbn = subfield_values(rec, "020", "a") isbn = subfield_values(rec, "020", "a")
mediatype = first_subfield_value(rec, "338", "a")
lang = subfield_values(rec, "041", "a") lang = subfield_values(rec, "041", "a")
authors = subfield_values(rec, "700", "a")
author = None
if authors:
author = "; ".join(authors)
return BookData( return BookData(
ppn=ppn, ppn=ppn,
@@ -422,32 +427,162 @@ def book_from_marc(rec: MarcRecord) -> BookData:
isbn=isbn, isbn=isbn,
language=lang, language=lang,
link="", link="",
author=author,
media_type=mediatype,
) )
class SWB: class SWBData(Enum):
def __init__(self): URL = "https://sru.k10plus.de/opac-de-627!rec=1?version=1.1&operation=searchRetrieve&query={}&maximumRecords=100&recordSchema=marcxml"
self.url = "https://sru.k10plus.de/opac-de-627!rec=1?version=1.1&operation=searchRetrieve&query={}&maximumRecords=10&recordSchema=marcxml" ARGSCHEMA = "pica."
self.bib_id = 20735 NAME = "SWB"
class DNBData(Enum):
URL = "https://services.dnb.de/sru/dnb?version=1.1&operation=searchRetrieve&query={}&maximumRecords=100&recordSchema=MARC21-xml"
ARGSCHEMA = ""
NAME = "DNB"
class SRUSite(Enum):
SWB = SWBData
DNB = DNBData
RVK_ALLOWED = r"[A-Z0-9.\-\/]" # conservative char set typically seen in RVK notations
def find_newer_edition(
swb_result: BookData, dnb_result: List[BookData]
) -> Optional[List[BookData]]:
"""
New edition if:
- year > swb.year OR
- edition_number > swb.edition_number
Additional guards & preferences:
- If both have signatures and they differ, skip (not the same work).
- For duplicates (same ppn): keep the one that has a signature, and
prefer a signature that matches swb_result.signature.
- If multiple remain: keep the single 'latest' by (year desc,
edition_number desc, best-signature-match desc, has-signature desc).
"""
def norm_sig(s: Optional[str]) -> str:
if not s:
return ""
# normalize: lowercase, collapse whitespace, keep alnum + a few separators
s = s.lower()
s = re.sub(r"\s+", " ", s).strip()
# remove obvious noise; adjust if your signature format differs
s = re.sub(r"[^a-z0-9\-_/\. ]+", "", s)
return s
def has_sig(b: BookData) -> bool:
return bool(getattr(b, "signature", None))
def sig_matches_swb(b: BookData) -> bool:
if not has_sig(b) or not has_sig(swb_result):
return False
return norm_sig(b.signature) == norm_sig(swb_result.signature)
def strictly_newer(b: BookData) -> bool:
by_year = (
b.year is not None
and swb_result.year is not None
and b.year > swb_result.year
)
by_edition = (
b.edition_number is not None
and swb_result.edition_number is not None
and b.edition_number > swb_result.edition_number
)
return by_year or by_edition
swb_sig_norm = norm_sig(getattr(swb_result, "signature", None))
# 1) Filter to same-work AND newer
candidates: List[BookData] = []
for b in dnb_result:
# Skip if both signatures exist and don't match (different work)
b_sig = getattr(b, "signature", None)
if b_sig and swb_result.signature:
if norm_sig(b_sig) != swb_sig_norm:
continue # not the same work
# Keep only if newer by rules
if strictly_newer(b):
candidates.append(b)
if not candidates:
return None
# 2) Dedupe by PPN, preferring signature (and matching signature if possible)
by_ppn: dict[Optional[str], BookData] = {}
for b in candidates:
key = getattr(b, "ppn", None)
prev = by_ppn.get(key)
if prev is None:
by_ppn[key] = b
continue
# Compute preference score for both
def ppn_pref_score(x: BookData) -> tuple[int, int]:
# (signature matches swb, has signature)
return (1 if sig_matches_swb(x) else 0, 1 if has_sig(x) else 0)
if ppn_pref_score(b) > ppn_pref_score(prev):
by_ppn[key] = b
deduped = list(by_ppn.values())
if not deduped:
return None
# 3) If multiple remain, keep only the latest one.
# Order: year desc, edition_number desc, signature-match desc, has-signature desc
def sort_key(b: BookData):
year = b.year if b.year is not None else -1
ed = b.edition_number if b.edition_number is not None else -1
sig_match = 1 if sig_matches_swb(b) else 0
sig_present = 1 if has_sig(b) else 0
return (year, ed, sig_match, sig_present)
best = max(deduped, key=sort_key)
return [best] if best else None
class Api:
def __init__(self, site: str, url: str, prefix: str):
self.site = site
self.url = url
self.prefix = prefix
pass
def get(self, query_args: Iterable[str]) -> List[Record]: def get(self, query_args: Iterable[str]) -> List[Record]:
# if any query_arg ends with =, remove it # if any query_arg ends with =, remove it
query_args = [arg for arg in query_args if not arg.endswith("=")] if self.site == "DNB":
args = [arg for arg in query_args if not arg.startswith("pica.")]
if args == []:
raise ValueError("DNB queries must include at least one search term")
query_args = args
# query_args = [f"{self.prefix}{arg}" for arg in query_args]
query = "+and+".join(query_args) query = "+and+".join(query_args)
query = query.replace(" ", "%20").replace("&", "%26") query = query.replace(" ", "%20").replace("&", "%26")
# query_args = [arg for arg in query_args if not arg.endswith("=")]
# query = "+and+".join(query_args)
# query = query.replace(" ", "%20").replace("&", "%26")
# insert the query into the url url is
url = self.url.format(query) url = self.url.format(query)
log.debug(url) log.debug(url)
headers = { headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3", "User-Agent": f"{self.site} SRU Client, <alexander.kirchner@ph-freiburg.de>",
"Accept": "application/xml", "Accept": "application/xml",
"Accept-Charset": "latin1,utf-8;q=0.7,*;q=0.3", "Accept-Charset": "latin1,utf-8;q=0.7,*;q=0.3",
} }
response = requests.get(url, headers=headers) response = requests.get(url, headers=headers)
if response.status_code != 200: if response.status_code != 200:
raise Exception(f"Error fetching data from SWB: {response.status_code}") raise Exception(f"Error fetching data from SWB: {response.status_code}")
# #print(response.text)
data = response.content data = response.content
# extract top-level response # extract top-level response
@@ -456,6 +591,7 @@ class SWB:
def getBooks(self, query_args: Iterable[str]) -> List[BookData]: def getBooks(self, query_args: Iterable[str]) -> List[BookData]:
records: List[Record] = self.get(query_args) records: List[Record] = self.get(query_args)
print(f"{self.site} found {len(records)} records")
books: List[BookData] = [] books: List[BookData] = []
# extract title from query_args if present # extract title from query_args if present
title = None title = None
@@ -476,3 +612,11 @@ class SWB:
def getLinkForBook(self, book: BookData) -> str: def getLinkForBook(self, book: BookData) -> str:
results = self.getBooks() results = self.getBooks()
class SWB(Api):
def __init__(self):
self.site = SWBData.NAME.value
self.url = SWBData.URL.value
self.prefix = SWBData.ARGSCHEMA.value
super().__init__(self.site, self.url, self.prefix)

View File

@@ -1,6 +1,35 @@
from .dataclass import ApparatData, BookData, Prof, Apparat, ELSA __all__ = [
"custom_sort",
"sort_semesters_list",
"APP_NRS",
"PROF_TITLES",
"SEMAP_MEDIA_ACCOUNTS",
"csv_to_list",
"ELSA",
"Apparat",
"ApparatData",
"BookData",
"Prof",
"Semester",
"SemapDocument",
"elsa_word_to_csv",
"pdf_to_semap",
"word_docx_to_csv",
"word_to_semap",
"ZoteroController",
"eml_to_semap",
]
from .c_sort import custom_sort, sort_semesters_list from .c_sort import custom_sort, sort_semesters_list
from .constants import APP_NRS, PROF_TITLES, SEMAP_MEDIA_ACCOUNTS from .constants import APP_NRS, PROF_TITLES, SEMAP_MEDIA_ACCOUNTS
from .csvparser import csv_to_list from .csvparser import csv_to_list
from .wordparser import elsa_word_to_csv, word_docx_to_csv, word_to_semap, SemapDocument from .dataclass import ELSA, Apparat, ApparatData, BookData, Prof
from .semester import Semester
from .wordparser import (
SemapDocument,
elsa_word_to_csv,
pdf_to_semap,
word_docx_to_csv,
word_to_semap,
)
from .xmlparser import eml_to_semap
from .zotero import ZoteroController from .zotero import ZoteroController

View File

@@ -83,4 +83,4 @@ if __name__ == "__main__":
"SoSe 25", "SoSe 25",
] ]
print(sort_semesters_list(unsorted)) # print(sort_semesters_list(unsorted))

View File

@@ -1,4 +1,5 @@
import csv import csv
from charset_normalizer import detect from charset_normalizer import detect
@@ -19,4 +20,4 @@ def csv_to_list(path: str) -> list[str]:
if __name__ == "__main__": if __name__ == "__main__":
text = csv_to_list("C:/Users/aky547/Desktop/semap/71.csv") text = csv_to_list("C:/Users/aky547/Desktop/semap/71.csv")
# remove linebreaks # remove linebreaks
# print(text) # #print(text)

View File

@@ -3,6 +3,11 @@ from dataclasses import dataclass, field
from enum import Enum from enum import Enum
from typing import Any, Optional, Union from typing import Any, Optional, Union
import regex
from src.logic.openai import name_tester, run_shortener, semester_converter
from src.logic.semester import Semester
@dataclass @dataclass
class Prof: class Prof:
@@ -67,21 +72,63 @@ class BookData:
language: Union[str, list[str], None] = field(default_factory=list) language: Union[str, list[str], None] = field(default_factory=list)
publisher: str | None = None publisher: str | None = None
place: str | None = None place: str | None = None
year: str | None = None year: int | None = None
pages: str | None = None pages: str | None = None
library_location: int | None = None library_location: str | None = None
in_apparat: bool | None = False in_apparat: bool | None = False
adis_idn: str | None = None adis_idn: str | None = None
old_book: Any | None = None
media_type: str | None = None #
in_library: bool | None = None # whether the book is in the library or not
def __post_init__(self):
self.library_location = (
str(self.library_location) if self.library_location else None
)
if isinstance(self.language, list) and self.language:
self.language = [lang.strip() for lang in self.language if lang.strip()]
self.language = ",".join(self.language)
self.year = regex.sub(r"[^\d]", "", str(self.year)) if self.year else None
self.in_library = True if self.signature else False
def from_dict(self, data: dict) -> "BookData": def from_dict(self, data: dict) -> "BookData":
for key, value in data.items(): for key, value in data.items():
setattr(self, key, value) setattr(self, key, value)
return self return self
def merge(self, other: "BookData") -> "BookData":
for key, value in other.__dict__.items():
# merge lists, if the attribute is a list, extend it
if isinstance(value, list):
current_value = getattr(self, key)
if current_value is None:
current_value = []
elif not isinstance(current_value, list):
current_value = [current_value]
# extend the list with the new values, but only if they are not already in the list
for v in value:
if v not in current_value:
current_value.append(v)
setattr(self, key, current_value)
if value is not None and (
getattr(self, key) is None or getattr(self, key) == ""
):
setattr(self, key, value)
# in language, drop all entries that are longer than 3 characters
if isinstance(self.language, list):
self.language = [lang for lang in self.language if len(lang) <= 4]
return self
@property @property
def to_dict(self) -> str: def to_dict(self) -> str:
"""Convert the dataclass to a dictionary.""" """Convert the dataclass to a dictionary."""
return json.dumps(self.__dict__, ensure_ascii=False) data_dict = {
key: value for key, value in self.__dict__.items() if value is not None
}
# remove old_book from data_dict
if "old_book" in data_dict:
del data_dict["old_book"]
return json.dumps(data_dict, ensure_ascii=False)
def from_dataclass(self, dataclass: Optional[Any]) -> None: def from_dataclass(self, dataclass: Optional[Any]) -> None:
if dataclass is None: if dataclass is None:
@@ -89,8 +136,15 @@ class BookData:
for key, value in dataclass.__dict__.items(): for key, value in dataclass.__dict__.items():
setattr(self, key, value) setattr(self, key, value)
def get_book_type(self) -> str:
if "Online" in self.pages:
return "eBook"
else:
return "Druckausgabe"
def from_string(self, data: str) -> "BookData": def from_string(self, data: str) -> "BookData":
ndata = json.loads(data) ndata = json.loads(data)
return BookData(**ndata) return BookData(**ndata)
def from_LehmannsSearchResult(self, result: Any) -> "BookData": def from_LehmannsSearchResult(self, result: Any) -> "BookData":
@@ -111,6 +165,15 @@ class BookData:
# self.pages = str(result.pages) if result.pages else None # self.pages = str(result.pages) if result.pages else None
return self return self
@property
def edition_number(self) -> Optional[int]:
if self.edition is None:
return 0
match = regex.search(r"(\d+)", self.edition)
if match:
return int(match.group(1))
return 0
@dataclass @dataclass
class MailData: class MailData:
@@ -222,3 +285,124 @@ class ELSA:
class ApparatData: class ApparatData:
prof: Prof = field(default_factory=Prof) prof: Prof = field(default_factory=Prof)
apparat: Apparat = field(default_factory=Apparat) apparat: Apparat = field(default_factory=Apparat)
@dataclass
class XMLMailSubmission:
name: Optional[str] = None
lastname: Optional[str] = None
title: Optional[str] = None
telno: Optional[int] = None
email: Optional[str] = None
app_name: Optional[str] = None
subject: Optional[str] = None
semester: Optional[Semester] = None
books: Optional[list[BookData]] = None
@dataclass
class Book:
author: str = None
year: str = None
edition: str = None
title: str = None
location: str = None
publisher: str = None
signature: str = None
internal_notes: str = None
@property
def has_signature(self) -> bool:
return self.signature is not None and self.signature != ""
@property
def is_empty(self) -> bool:
return all(
[
self.author == "",
self.year == "",
self.edition == "",
self.title == "",
self.location == "",
self.publisher == "",
self.signature == "",
self.internal_notes == "",
]
)
def from_dict(self, data: dict[str, Any]):
for key, value in data.items():
value = value.strip()
if value == "\u2002\u2002\u2002\u2002\u2002":
value = ""
if key == "Autorenname(n):Nachname, Vorname":
self.author = value
elif key == "Jahr/Auflage":
self.year = value.split("/")[0] if "/" in value else value
self.edition = value.split("/")[1] if "/" in value else ""
elif key == "Titel":
self.title = value
elif key == "Ort und Verlag":
self.location = value.split(",")[0] if "," in value else value
self.publisher = value.split(",")[1] if "," in value else ""
elif key == "Standnummer":
self.signature = value.strip()
elif key == "Interne Vermerke":
self.internal_notes = value
@dataclass
class SemapDocument:
subject: str = None
phoneNumber: int = None
mail: str = None
title: str = None
title_suggestions: list[str] = None
semester: Union[str, Semester] = None
books: list[Book] = None
eternal: bool = False
personName: str = None
personTitle: str = None
title_length = 0
title_max_length = 0
def __post_init__(self):
self.title_suggestions = []
@property
def nameSetter(self):
data = name_tester(self.personTitle)
name = f"{data['last_name']}, {data['first_name']}"
if data["title"] is not None:
title = data["title"]
self.personTitle = title
self.personName = name
self.title_length = len(self.title) + 3 + len(self.personName.split(",")[0])
if self.title_length > 40:
name_len = len(self.personName.split(",")[0])
self.title_max_length = 38 - name_len
suggestions = run_shortener(self.title, self.title_max_length)
for suggestion in suggestions:
self.title_suggestions.append(suggestion["shortened_string"])
else:
self.title_suggestions = []
pass
@property
def renameSemester(self) -> None:
if self.semester:
if ", Dauer" in self.semester:
self.semester = self.semester.split(",")[0]
self.eternal = True
self.semester = Semester().from_string(self.semester)
else:
self.semester = Semester().from_string(
semester_converter(self.semester)
)
@property
def signatures(self) -> list[str]:
if self.books is not None:
return [book.signature for book in self.books if book.has_signature]
return []

View File

@@ -1,13 +1,15 @@
from __future__ import annotations from __future__ import annotations
import re import re
from dataclasses import dataclass, asdict, field from dataclasses import asdict, dataclass, field
from typing import Optional, List, Iterable from typing import Iterable, List, Optional
from urllib.parse import urljoin, quote_plus from urllib.parse import quote_plus, urljoin
import httpx import httpx
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from src.logic.dataclass import BookData
BASE = "https://www.lehmanns.de" BASE = "https://www.lehmanns.de"
SEARCH_URL = "https://www.lehmanns.de/search/quick?mediatype_id=&q=" SEARCH_URL = "https://www.lehmanns.de/search/quick?mediatype_id=&q="
@@ -33,9 +35,11 @@ class LehmannsSearchResult:
image: Optional[str] = None image: Optional[str] = None
# From detail page: # From detail page:
pages: Optional[str] = None # "<N> Seiten" pages: Optional[str] = None # "<N> Seiten"
buyable: bool = True # set in enrich_pages (detail page) buyable: bool = True # set in enrich_pages (detail page)
unavailable_hint: Optional[str] = None # e.g. "Titel ist leider vergriffen; keine Neuauflage" unavailable_hint: Optional[str] = (
None # e.g. "Titel ist leider vergriffen; keine Neuauflage"
)
def to_dict(self) -> dict: def to_dict(self) -> dict:
return asdict(self) return asdict(self)
@@ -73,31 +77,45 @@ class LehmannsClient:
# spaces -> '+' # spaces -> '+'
return SEARCH_URL + quote_plus(title) return SEARCH_URL + quote_plus(title)
def search_by_title(self, title: str, limit: Optional[int] = None, strict: bool = False) -> List[LehmannsSearchResult]: def search_by_title(
self,
title: str,
limit: Optional[int] = None,
strict: bool = False,
only_latest: bool = True,
) -> List[BookData]:
""" """
Parse the listing page only (no availability check here). Parse the listing page only (no availability check here).
Use enrich_pages(...) afterwards to fetch detail pages, add 'pages', Use enrich_pages(...) afterwards to fetch detail pages, add 'pages',
and drop unbuyable items. and drop unbuyable items.
""" """
url = self.build_search_url(title) url = self.build_search_url(title=title)
html = self._get(url) html = self._get(url)
if not html: if not html:
return [] return []
results = self._parse_results(html) results = self._parse_results(html)
self.enrich_pages(results) self.enrich_pages(results)
results = [BookData().from_LehmannsSearchResult(r) for r in results]
if strict: if strict:
# filter results to only those with exact title match (case-insensitive) # filter results to only those with exact title match (case-insensitive)
title_lower = title.lower() title_lower = title.lower()
results = [r for r in results if r.title and r.title.lower() == title_lower] results = [r for r in results if r.title and r.title.lower() == title_lower]
results = [r for r in results if r.buyable] # results = [r for r in results if r.buyable]
return results return results
if limit is not None: if limit is not None:
results = results[:max(0, limit)] results = results[: max(0, limit)]
if only_latest and len(results) > 1:
# keep only the latest edition (highest edition number)
results.sort(key=lambda r: (r.edition_number or 0), reverse=True)
results = [results[0]]
return results return results
# ------------------- Detail enrichment & filtering ------------------- # ------------------- Detail enrichment & filtering -------------------
def enrich_pages(self, results: Iterable[LehmannsSearchResult], drop_unbuyable: bool = True) -> List[LehmannsSearchResult]: def enrich_pages(
self, results: Iterable[LehmannsSearchResult], drop_unbuyable: bool = True
) -> List[LehmannsSearchResult]:
""" """
Fetch each result.url, extract: Fetch each result.url, extract:
- pages: from <span class="book-meta meta-seiten" itemprop="numberOfPages">...</span> - pages: from <span class="book-meta meta-seiten" itemprop="numberOfPages">...</span>
@@ -135,11 +153,15 @@ class LehmannsClient:
# Availability via li.availability-3 # Availability via li.availability-3
avail_li = soup.select_one("li.availability-3") avail_li = soup.select_one("li.availability-3")
if avail_li: if avail_li:
avail_text = " ".join(avail_li.get_text(" ", strip=True).split()).lower() avail_text = " ".join(
avail_li.get_text(" ", strip=True).split()
).lower()
if "titel ist leider vergriffen" in avail_text: if "titel ist leider vergriffen" in avail_text:
r.buyable = False r.buyable = False
if "keine neuauflage" in avail_text: if "keine neuauflage" in avail_text:
r.unavailable_hint = "Titel ist leider vergriffen; keine Neuauflage" r.unavailable_hint = (
"Titel ist leider vergriffen; keine Neuauflage"
)
else: else:
r.unavailable_hint = "Titel ist leider vergriffen" r.unavailable_hint = "Titel ist leider vergriffen"
@@ -161,7 +183,9 @@ class LehmannsClient:
try: try:
r = self.client.get(url) r = self.client.get(url)
r.encoding = "utf-8" r.encoding = "utf-8"
if r.status_code == 200 and "text/html" in (r.headers.get("content-type") or ""): if r.status_code == 200 and "text/html" in (
r.headers.get("content-type") or ""
):
return r.text return r.text
except httpx.HTTPError: except httpx.HTTPError:
pass pass
@@ -176,12 +200,18 @@ class LehmannsClient:
if not a: if not a:
continue continue
url = urljoin(BASE, a["href"].strip()) url = urljoin(BASE, a["href"].strip())
base_title = (block.select_one(".title [itemprop='name']") or a).get_text(strip=True) base_title = (block.select_one(".title [itemprop='name']") or a).get_text(
strip=True
)
# Alternative headline => extend title # Alternative headline => extend title
alt_tag = block.select_one(".description[itemprop='alternativeHeadline']") alt_tag = block.select_one(".description[itemprop='alternativeHeadline']")
alternative_headline = alt_tag.get_text(strip=True) if alt_tag else None alternative_headline = alt_tag.get_text(strip=True) if alt_tag else None
title = f"{base_title} : {alternative_headline}" if alternative_headline else base_title title = (
f"{base_title} : {alternative_headline}"
if alternative_headline
else base_title
)
description = alternative_headline description = alternative_headline
# Authors from .author # Authors from .author
@@ -227,7 +257,9 @@ class LehmannsClient:
# Publisher # Publisher
publisher = None publisher = None
pub = block.select_one(".publisherprop [itemprop='name']") or block.select_one(".publisher [itemprop='name']") pub = block.select_one(
".publisherprop [itemprop='name']"
) or block.select_one(".publisher [itemprop='name']")
if pub: if pub:
publisher = pub.get_text(strip=True) publisher = pub.get_text(strip=True)

View File

@@ -21,4 +21,4 @@ if __name__ == "__main__":
text = pdf_to_csv("54_pdf.pdf") text = pdf_to_csv("54_pdf.pdf")
# remove linebreaks # remove linebreaks
text = text.replace("\n", "") text = text.replace("\n", "")
print(text) # print(text)

View File

@@ -1,16 +1,15 @@
import sys import sys
import zipfile import zipfile
from dataclasses import dataclass from typing import Any
from typing import Any, Union
import fitz # PyMuPDF
import loguru import loguru
import pandas as pd import pandas as pd
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from docx import Document from docx import Document
from src import LOG_DIR from src import LOG_DIR
from src.backend.semester import Semester from src.logic.dataclass import Book, SemapDocument
from src.logic.openai import name_tester, run_shortener, semester_converter
log = loguru.logger log = loguru.logger
log.remove() log.remove()
@@ -18,116 +17,6 @@ log.add(sys.stdout, level="INFO")
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days") log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
@dataclass
class Book:
author: str = None
year: str = None
edition: str = None
title: str = None
location: str = None
publisher: str = None
signature: str = None
internal_notes: str = None
@property
def has_signature(self) -> bool:
return self.signature is not None and self.signature != ""
@property
def is_empty(self) -> bool:
return all(
[
self.author == "",
self.year == "",
self.edition == "",
self.title == "",
self.location == "",
self.publisher == "",
self.signature == "",
self.internal_notes == "",
]
)
def from_dict(self, data: dict[str, Any]):
for key, value in data.items():
value = value.strip()
if value == "\u2002\u2002\u2002\u2002\u2002":
value = ""
if key == "Autorenname(n):Nachname, Vorname":
self.author = value
elif key == "Jahr/Auflage":
self.year = value.split("/")[0] if "/" in value else value
self.edition = value.split("/")[1] if "/" in value else ""
elif key == "Titel":
self.title = value
elif key == "Ort und Verlag":
self.location = value.split(",")[0] if "," in value else value
self.publisher = value.split(",")[1] if "," in value else ""
elif key == "Standnummer":
self.signature = value.strip()
elif key == "Interne Vermerke":
self.internal_notes = value
@dataclass
class SemapDocument:
subject: str = None
phoneNumber: int = None
mail: str = None
title: str = None
title_suggestions: list[str] = None
semester: Union[str, Semester] = None
books: list[Book] = None
eternal: bool = False
personName: str = None
personTitle: str = None
title_length = 0
title_max_length = 0
def __post_init__(self):
self.title_suggestions = []
@property
def nameSetter(self):
data = name_tester(self.personTitle)
name = f"{data['last_name']}, {data['first_name']}"
if data["title"] is not None:
title = data["title"]
self.personTitle = title
self.personName = name
self.title_length = len(self.title) + 3 + len(self.personName.split(",")[0])
if self.title_length > 40:
log.warning("Title is too long")
name_len = len(self.personName.split(",")[0])
self.title_max_length = 38 - name_len
suggestions = run_shortener(self.title, self.title_max_length)
for suggestion in suggestions:
self.title_suggestions.append(suggestion["shortened_string"])
else:
self.title_suggestions = []
pass
@property
def renameSemester(self) -> None:
if ", Dauer" in self.semester:
self.semester = self.semester.split(",")[0]
self.eternal = True
self.semester = Semester().from_string(self.semester)
else:
log.warning("Semester {} is not valid", self.semester)
self.semester = Semester().from_string(semester_converter(self.semester))
@property
def signatures(self) -> list[str]:
if self.books is not None:
return [book.signature for book in self.books if book.has_signature]
return []
def word_docx_to_csv(path: str) -> list[pd.DataFrame]: def word_docx_to_csv(path: str) -> list[pd.DataFrame]:
doc = Document(path) doc = Document(path)
tables = doc.tables tables = doc.tables
@@ -272,7 +161,7 @@ def word_to_semap(word_path: str, ai: bool = True) -> SemapDocument:
apparatdata = df[0] apparatdata = df[0]
apparatdata = apparatdata.to_dict() apparatdata = apparatdata.to_dict()
keys = list(apparatdata.keys()) keys = list(apparatdata.keys())
print(apparatdata, keys) # print(apparatdata, keys)
appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys) - 1, 2)} appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys) - 1, 2)}
semap.phoneNumber = appdata["Telefon:"] semap.phoneNumber = appdata["Telefon:"]
@@ -309,6 +198,182 @@ def word_to_semap(word_path: str, ai: bool = True) -> SemapDocument:
return semap return semap
def pdf_to_semap(pdf_path: str, ai: bool = True) -> SemapDocument:
"""
Parse a Semesterapparat PDF like the sample you provided and return a SemapDocument.
- No external programs, only PyMuPDF.
- Robust to multi-line field values (e.g., hyphenated emails) and multi-line table cells.
- Works across multiple pages; headers only need to exist on the first page.
"""
doc = fitz.open(pdf_path)
semap = SemapDocument()
# ---------- helpers ----------
def _join_tokens(tokens: list[str]) -> str:
"""Join tokens, preserving hyphen/URL joins across line wraps."""
parts = []
for tok in tokens:
if parts and (
parts[-1].endswith("-")
or parts[-1].endswith("/")
or parts[-1].endswith(":")
):
parts[-1] = parts[-1] + tok # no space after '-', '/' or ':'
else:
parts.append(tok)
return " ".join(parts).strip()
def _extract_row_values_multiline(
page, labels: list[str], y_window: float = 24
) -> dict[str, str]:
"""For a row of inline labels (e.g., Name/Fach/Telefon/Mail), grab text to the right of each label."""
rects = []
for lab in labels:
hits = page.search_for(lab)
if hits:
rects.append((lab, hits[0]))
if not rects:
return {}
rects.sort(key=lambda t: t[1].x0)
words = page.get_text("words")
out = {}
for i, (lab, r) in enumerate(rects):
x0 = r.x1 + 1
x1 = rects[i + 1][1].x0 - 1 if i + 1 < len(rects) else page.rect.width - 5
y0 = r.y0 - 3
y1 = r.y0 + y_window
toks = [w for w in words if x0 <= w[0] <= x1 and y0 <= w[1] <= y1]
toks.sort(key=lambda w: (w[1], w[0])) # line, then x
out[lab] = _join_tokens([w[4] for w in toks])
return out
def _compute_columns_from_headers(page0):
"""Find column headers (once) and derive column centers + header baseline."""
headers = [
("Autorenname(n):", "Autorenname(n):Nachname, Vorname"),
("Jahr/Auflage", "Jahr/Auflage"),
("Titel", "Titel"),
("Ort und Verlag", "Ort und Verlag"),
("Standnummer", "Standnummer"),
("Interne Vermerke", "Interne Vermerke"),
]
found = []
for label, canon in headers:
rects = [
r for r in page0.search_for(label) if r.y0 > 200
] # skip top-of-form duplicates
if rects:
found.append((canon, rects[0]))
found.sort(key=lambda t: t[1].x0)
cols = [(canon, r.x0, r.x1, (r.x0 + r.x1) / 2.0) for canon, r in found]
header_y = min(r.y0 for _, r in found) if found else 0
return cols, header_y
def _extract_table_rows_from_page(
page, cols, header_y, y_top_margin=5, y_bottom_margin=40, y_tol=26.0
):
"""
Group words into logical rows (tolerant to wrapped lines), then map each word
to the nearest column by x-center and join tokens per column.
"""
words = [
w
for w in page.get_text("words")
if w[1] > header_y + y_top_margin
and w[3] < page.rect.height - y_bottom_margin
]
# group into row bands by y (tolerance big enough to capture wrapped lines, but below next row gap)
rows = []
for w in sorted(words, key=lambda w: w[1]):
y = w[1]
for row in rows:
if abs(row["y_mean"] - y) <= y_tol:
row["ys"].append(y)
row["y_mean"] = sum(row["ys"]) / len(row["ys"])
row["words"].append(w)
break
else:
rows.append({"y_mean": y, "ys": [y], "words": [w]})
# map to columns + join
joined_rows = []
for row in rows:
rowdict = {canon: "" for canon, *_ in cols}
words_by_col = {canon: [] for canon, *_ in cols}
for w in sorted(row["words"], key=lambda w: (w[1], w[0])):
xmid = (w[0] + w[2]) / 2.0
canon = min(cols, key=lambda c: abs(xmid - c[3]))[0]
words_by_col[canon].append(w[4])
for canon, toks in words_by_col.items():
rowdict[canon] = _join_tokens(toks)
if any(v for v in rowdict.values()):
joined_rows.append(rowdict)
return joined_rows
# ---------- top-of-form fields ----------
p0 = doc[0]
row1 = _extract_row_values_multiline(
p0,
["Ihr Name und Titel:", "Ihr Fach:", "Telefon:", "Mailadresse:"],
y_window=22,
)
row2 = _extract_row_values_multiline(
p0, ["Veranstaltung:", "Semester:"], y_window=20
)
name_title = row1.get("Ihr Name und Titel:", "") or ""
semap.subject = row1.get("Ihr Fach:", None)
semap.phoneNumber = row1.get("Telefon:", None) # keep as-is (string like "682-308")
semap.mail = row1.get("Mailadresse:", None)
semap.personName = ",".join(name_title.split(",")[:-1]) if name_title else None
semap.personTitle = (
",".join(name_title.split(",")[-1:]).strip() if name_title else None
)
semap.title = row2.get("Veranstaltung:", None)
semap.semester = row2.get("Semester:", None)
# ---------- table extraction (all pages) ----------
cols, header_y = _compute_columns_from_headers(p0)
all_rows: list[dict[str, Any]] = []
for pn in range(len(doc)):
all_rows.extend(_extract_table_rows_from_page(doc[pn], cols, header_y))
# drop the sub-header line "Nachname, Vorname" etc.
filtered = []
for r in all_rows:
if r.get("Autorenname(n):Nachname, Vorname", "").strip() in (
"",
"Nachname, Vorname",
):
# skip if it's just the sub-header line
if all(not r[c] for c in r if c != "Autorenname(n):Nachname, Vorname"):
continue
filtered.append(r)
# build Book objects (same filters as your word parser)
booklist: list[Book] = []
for row in filtered:
b = Book()
b.from_dict(row)
if b.is_empty:
continue
if not b.has_signature:
continue
booklist.append(b)
semap.books = booklist
# keep parity with your post-processing
if ai:
_ = semap.renameSemester
_ = semap.nameSetter
return semap
if __name__ == "__main__": if __name__ == "__main__":
else_df = word_to_semap("C:/Users/aky547/Desktop/semap/db/temp/tmpzsz_hgdr.docx") else_df = pdf_to_semap("C:/Users/aky547/Dokumente/testsemap.pdf")
print(else_df) # print(else_df)

67
src/logic/xmlparser.py Normal file
View File

@@ -0,0 +1,67 @@
import xml.etree.ElementTree as ET
from src.logic.dataclass import Apparat, BookData, SemapDocument, XMLMailSubmission
from src.logic.semester import Semester
def parse_xml_submission(xml_string: str) -> XMLMailSubmission:
"""
Parse an XML string representing a mail submission and return an XMLMailSubmission object.
"""
submission = XMLMailSubmission()
root = ET.fromstring(xml_string)
static_data = root.find("static")
static_info = {child.tag: child.text for child in static_data}
books = root.find("books")
books_info = []
for book in books:
book_details = {detail.tag: detail.text for detail in book}
book = BookData(
author=book_details.get("authorname"),
year=book_details.get("year").split("/")[0]
if "/" in book_details.get("year")
else book_details.get("year"),
edition=book_details.get("year").split("/")[1]
if "/" in book_details.get("year")
else None,
title=book_details.get("title"),
signature=book_details.get("signature"),
)
books_info.append(book)
# Extract static data
submission.name = static_info.get("name")
submission.lastname = static_info.get("lastname")
submission.title = static_info.get("title")
submission.telno = int(static_info.get("telno"))
submission.email = static_info.get("mail")
submission.app_name = static_info.get("apparatsname")
submission.subject = static_info.get("subject")
sem_year = static_info.get("semester").split()[1]
sem_term = static_info.get("semester").split()[0]
submission.semester = Semester(semester=sem_term, year=int(sem_year))
submission.books = books_info
# Extract book information
# book_info = []
# for book in books:
# book_details = {detail.tag: detail.text for detail in book}
# book_info.append(book_details)
return submission
def eml_parser(path: str) -> XMLMailSubmission:
with open(path, "r", encoding="utf-8") as file:
xml_content = file.read().split("\n\n", 1)[1] # Skip headers
print("EML content loaded, parsing XML...")
print(xml_content)
return parse_xml_submission(xml_content)
def eml_to_semap(path: str) -> SemapDocument:
submission = eml_parser(path)
semap_doc = SemapDocument(
# prof=Prof(name=submission.name, lastname=submission.lastname, email=submission.email),
apparat=Apparat(name=submission.app_name, subject=submission.subject),
semester=submission.semester,
books=submission.books,
)
return semap_doc

View File

@@ -1,7 +1,9 @@
from pyzotero import zotero
from dataclasses import dataclass from dataclasses import dataclass
from src.logic.webrequest import WebRequest, BibTextTransformer
from pyzotero import zotero
from src import settings from src import settings
from src.logic.webrequest import BibTextTransformer, WebRequest
@dataclass @dataclass
@@ -187,7 +189,7 @@ class ZoteroController:
book = bib.return_data() book = bib.return_data()
return book return book
# # print(zot.item_template("bookSection")) # # #print(zot.item_template("bookSection"))
def createBook(self, isbn): def createBook(self, isbn):
book = self.__get_data(isbn) book = self.__get_data(isbn)
@@ -210,7 +212,7 @@ class ZoteroController:
def createItem(self, item): def createItem(self, item):
resp = self.zot.create_items([item]) resp = self.zot.create_items([item])
if "successful" in resp.keys(): if "successful" in resp.keys():
# print(resp["successful"]["0"]["key"]) # #print(resp["successful"]["0"]["key"])
return resp["successful"]["0"]["key"] return resp["successful"]["0"]["key"]
else: else:
return None return None
@@ -220,7 +222,7 @@ class ZoteroController:
for item in items: for item in items:
if item["key"] == key: if item["key"] == key:
self.zot.delete_item(item) self.zot.delete_item(item)
# print(item) # #print(item)
break break
def createHGSection(self, book: Book, data: dict): def createHGSection(self, book: Book, data: dict):
@@ -241,7 +243,7 @@ class ZoteroController:
] ]
chapter.creators += authors chapter.creators += authors
# print(chapter.to_dict()) # #print(chapter.to_dict())
return self.createItem(chapter.to_dict()) return self.createItem(chapter.to_dict())
pass pass
@@ -257,7 +259,7 @@ class ZoteroController:
# chapter.creators # chapter.creators
def createJournalArticle(self, journal, article): def createJournalArticle(self, journal, article):
# print(type(article)) # #print(type(article))
journalarticle = JournalArticle() journalarticle = JournalArticle()
journalarticle.assign(journal) journalarticle.assign(journal)
journalarticle.itemType = "journalArticle" journalarticle.itemType = "journalArticle"
@@ -273,7 +275,7 @@ class ZoteroController:
journalarticle.issue = article["issue"] journalarticle.issue = article["issue"]
journalarticle.url = article["isbn"] journalarticle.url = article["isbn"]
# print(journalarticle.to_dict()) # #print(journalarticle.to_dict())
return self.createItem(journalarticle.to_dict()) return self.createItem(journalarticle.to_dict())
@@ -319,16 +321,16 @@ if __name__ == "__main__":
# if isinstance(publishers, str): # if isinstance(publishers, str):
# publishers = [publishers] # publishers = [publishers]
# for publisher in publishers: # for publisher in publishers:
# # print(publisher) # # #print(publisher)
# creator = Creator().from_string(publisher) # creator = Creator().from_string(publisher)
# creator.creatorType = "editor" # creator.creatorType = "editor"
# authors.append(creator.__dict__) # authors.append(creator.__dict__)
# chapter.creators = authors # chapter.creators = authors
# chapter.publisher = book.publisher # chapter.publisher = book.publisher
# # print(chapter.to_dict()) # # #print(chapter.to_dict())
# createBookSection(chapter.to_dict()) # createBookSection(chapter.to_dict())
# get_citation("9ZXH8DDE") # get_citation("9ZXH8DDE")
# # # print() # # # #print()
# # print(get_books()) # # #print(get_books())
# # print(zot.item_creator_types("bookSection")) # # #print(zot.item_creator_types("bookSection"))

View File

@@ -12,20 +12,21 @@ __all__ = [
"ElsaAddEntry", "ElsaAddEntry",
"ApparatExtendDialog", "ApparatExtendDialog",
"DocumentPrintDialog", "DocumentPrintDialog",
"NewEditionDialog",
"Settings", "Settings",
] ]
from .about import About
from .app_ext import ApparatExtendDialog
from .bookdata import BookDataUI from .bookdata import BookDataUI
from .docuprint import DocumentPrintDialog
from .elsa_add_entry import ElsaAddEntry
from .elsa_gen_confirm import ElsaGenConfirm
from .login import LoginDialog from .login import LoginDialog
from .mail import Mail_Dialog from .mail import Mail_Dialog
from .mailTemplate import MailTemplateDialog from .mailTemplate import MailTemplateDialog
from .medienadder import MedienAdder from .medienadder import MedienAdder
from .newEdition import NewEditionDialog
from .parsed_titles import ParsedTitles from .parsed_titles import ParsedTitles
from .popup_confirm import ConfirmDialog as popus_confirm from .popup_confirm import ConfirmDialog as popus_confirm
from .reminder import ReminderDialog from .reminder import ReminderDialog
from .about import About
from .elsa_gen_confirm import ElsaGenConfirm
from .elsa_add_entry import ElsaAddEntry
from .app_ext import ApparatExtendDialog
from .docuprint import DocumentPrintDialog
from .settings import Settings from .settings import Settings

View File

@@ -2,7 +2,8 @@ from natsort import natsorted
from PySide6 import QtWidgets from PySide6 import QtWidgets
from src import Icon from src import Icon
from src.backend import Database, Semester from src.backend import Database
from src.logic import Semester
from src.utils.richtext import SemapSchilder, SemesterDocument from src.utils.richtext import SemapSchilder, SemesterDocument
from .dialog_sources.documentprint_ui import Ui_Dialog from .dialog_sources.documentprint_ui import Ui_Dialog

View File

@@ -1,4 +1,6 @@
import os import os
import re
import smtplib
import sys import sys
import loguru import loguru
@@ -7,7 +9,7 @@ from PySide6 import QtWidgets
from src import LOG_DIR, Icon from src import LOG_DIR, Icon
from src import settings as config from src import settings as config
from .dialog_sources.Ui_mail_preview import Ui_eMailPreview as MailPreviewDialog from .dialog_sources.mail_preview_ui import Ui_eMailPreview as MailPreviewDialog
from .mailTemplate import MailTemplateDialog from .mailTemplate import MailTemplateDialog
log = loguru.logger log = loguru.logger
@@ -15,37 +17,61 @@ log.remove()
log.add(sys.stdout, level="INFO") log.add(sys.stdout, level="INFO")
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days") log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
CSS_RESET = "<style>html,body{margin:0;padding:0}p{margin:0}</style>"
empty_signature = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN" "http://www.w3.org/TR/REC-html40/strict.dtd"> empty_signature = """"""
<html><head><meta name="qrichtext" content="1" /><meta charset="utf-8" /><style
type="text/css">
p, li { white-space: pre-wrap; } def _escape_braces_in_style(html: str) -> str:
"""
Double curly braces ONLY inside <style>...</style> blocks so that
str.format(...) won't treat CSS as placeholders. The doubled braces
will automatically render back to single braces after formatting.
"""
hr { height: 1px; border-width: 0; } def repl(m):
start, css, end = m.group(1), m.group(2), m.group(3)
css_escaped = css.replace("{", "{{").replace("}", "}}")
return f"{start}{css_escaped}{end}"
li.unchecked::marker { content: "\2610"; } return re.sub(
r"(<style[^>]*>)(.*?)(</style>)",
repl,
html,
flags=re.IGNORECASE | re.DOTALL,
)
li.checked::marker { content: "\2612"; }
</style></head><body style=" font-family:''Segoe UI''; font-size:9pt; font-weight:400; def _split_eml_headers_body(eml_text: str) -> tuple[str, str]:
font-style:normal;"> """
Return (headers, body_html). Robustly split on first blank line.
Accepts lines that contain only spaces/tabs as the separator.
"""
<p style="-qt-paragraph-type:empty; margin-top:0px; margin-bottom:0px; margin-left:0px; parts = re.split(r"\r?\n[ \t]*\r?\n", eml_text, maxsplit=1)
margin-right:0px; -qt-block-indent:0; text-indent:0px;"><br /></p></body></html> if len(parts) == 2:
""" return parts[0], parts[1]
# Fallback: try to split right after the Content-Transfer-Encoding line
m = re.search(
r"(?:^|\r?\n)Content-Transfer-Encoding:.*?(?:\r?\n)",
eml_text,
flags=re.I | re.S,
)
if m:
return eml_text[: m.end()], eml_text[m.end() :]
return "", eml_text # last resort: treat entire content as body
class Mail_Dialog(QtWidgets.QDialog, MailPreviewDialog): class Mail_Dialog(QtWidgets.QDialog, MailPreviewDialog):
def __init__( def __init__(
self, self,
app_id, app_id=None,
app_name, app_name=None,
app_subject, app_subject=None,
prof_name, prof_name=None,
prof_mail, prof_mail=None,
accepted_books=None, accepted_books=None,
ordered_books=None,
parent=None, parent=None,
default_mail="Information zum Semesterapparat", default_mail="Information zum Semesterapparat",
): ):
@@ -58,6 +84,7 @@ class Mail_Dialog(QtWidgets.QDialog, MailPreviewDialog):
self.subject = app_subject self.subject = app_subject
self.profname = prof_name self.profname = prof_name
self.books = accepted_books if accepted_books is not None else [] self.books = accepted_books if accepted_books is not None else []
self.ordered_books = ordered_books if ordered_books is not None else []
self.mail_data = "" self.mail_data = ""
self.signature = self.determine_signature() self.signature = self.determine_signature()
self.prof_mail = prof_mail self.prof_mail = prof_mail
@@ -65,52 +92,29 @@ class Mail_Dialog(QtWidgets.QDialog, MailPreviewDialog):
self.prof_name.setText(prof_name) self.prof_name.setText(prof_name)
self.mail_name.setText(self.prof_mail) self.mail_name.setText(self.prof_mail)
self.load_mail_templates() self.load_mail_templates()
# if none of the radio buttons is checked, disable the accept button of the dialog
self.setWindowIcon(Icon("mail").icon) self.setWindowIcon(Icon("mail").icon)
self.btn_okay.setEnabled(False) self.btn_okay.setEnabled(False)
Icon("edit_note", self.newTemplate) Icon("edit_note", self.newTemplate)
self.newTemplate.clicked.connect(self.open_new_template) self.newTemplate.clicked.connect(self.open_new_template)
if default_mail is not None: if default_mail is not None:
# get the nearest match to the default mail
for i in range(self.comboBox.count()): for i in range(self.comboBox.count()):
if default_mail in self.comboBox.itemText(i): if default_mail in self.comboBox.itemText(i):
default_mail = self.comboBox.itemText(i) default_mail = self.comboBox.itemText(i)
break break
self.comboBox.setCurrentText(default_mail) self.comboBox.setCurrentText(default_mail)
self.comboBox.currentIndexChanged.connect(self.set_mail)
# re-render when user changes greeting via radio buttons
self.gender_female.clicked.connect(self.set_mail) self.gender_female.clicked.connect(self.set_mail)
self.gender_male.clicked.connect(self.set_mail) self.gender_male.clicked.connect(self.set_mail)
self.gender_non.clicked.connect(self.set_mail) self.gender_non.clicked.connect(self.set_mail)
# reflect initial state (OK disabled until a greeting is chosen)
self._update_ok_button()
self.btn_okay.clicked.connect(self.createAndSendMail) self.btn_okay.clicked.connect(self.createAndSendMail)
def open_new_template(self): # add these helpers inside Mail_Dialog
log.info("Opening new template dialog")
# TODO: implement new mail template dialog
dialog = MailTemplateDialog()
dialog.updateSignal.connect(self.load_mail_templates)
dialog.exec()
pass
def determine_signature(self):
if config.mail.signature is empty_signature or config.mail.signature == "":
return """Mit freundlichen Grüßen
Ihr Semesterapparatsteam
Mail: semesterapparate@ph-freiburg.de
Tel.: 0761/682-778 | 07617682-545"""
else:
return config.mail.signature
def load_mail_templates(self):
# print("loading mail templates")
log.info("Loading mail templates")
mail_templates = os.listdir("mail_vorlagen")
log.info(f"Mail templates: {mail_templates}")
self.comboBox.clear()
for template in mail_templates:
self.comboBox.addItem(template)
def get_greeting(self): def get_greeting(self):
prof = self.profname.split(" ")[0] prof = self.profname.split(" ")[0]
if self.gender_male.isChecked(): if self.gender_male.isChecked():
@@ -124,45 +128,104 @@ Tel.: 0761/682-778 | 07617682-545"""
name = f"{self.profname.split(' ')[1]} {self.profname.split(' ')[0]}" name = f"{self.profname.split(' ')[1]} {self.profname.split(' ')[0]}"
return f"Guten Tag {name}," return f"Guten Tag {name},"
def _update_ok_button(self):
checked = (
self.gender_male.isChecked()
or self.gender_female.isChecked()
or self.gender_non.isChecked()
)
self.btn_okay.setEnabled(checked)
def _on_gender_toggled(self, checked: bool):
# Only refresh when a button becomes checked
if checked:
self.set_mail()
def open_new_template(self):
log.info("Opening new template dialog")
dialog = MailTemplateDialog()
dialog.updateSignal.connect(self.load_mail_templates)
dialog.exec()
def determine_signature(self):
# use equality, not identity
if (
config.mail.signature == empty_signature
or config.mail.signature.strip() == ""
):
return """Mit freundlichen Grüßen
Ihr Semesterapparatsteam
Mail: semesterapparate@ph-freiburg.de
Tel.: 0761/682-778 | 0761/682-545"""
else:
return config.mail.signature
def load_mail_templates(self):
log.info("Loading mail templates")
mail_templates = [
f for f in os.listdir("mail_vorlagen") if f.lower().endswith(".eml")
]
log.info(f"Mail templates: {mail_templates}")
self.comboBox.clear()
for template in mail_templates:
self.comboBox.addItem(template)
def set_mail(self): def set_mail(self):
log.info("Setting mail") log.info("Setting mail")
self._update_ok_button() # keep OK enabled state in sync
email_template = self.comboBox.currentText() email_template = self.comboBox.currentText()
if email_template == "": if not email_template:
log.debug("No mail template selected") log.debug("No mail template selected")
return return
with open(f"mail_vorlagen/{email_template}", "r", encoding="utf-8") as f: with open(f"mail_vorlagen/{email_template}", "r", encoding="utf-8") as f:
mail_template = f.read() eml_text = f.read()
# header label for UI (unchanged)
email_header = email_template.split(".eml")[0] email_header = email_template.split(".eml")[0]
if "{AppNr}" in email_template: if "{AppNr}" in email_template:
email_header = email_template.split(".eml")[0] email_header = email_header.format(AppNr=self.appid, AppName=self.appname)
email_header = email_header.format(AppNr=self.appid, AppName=self.appname)
self.mail_header.setText(email_header) self.mail_header.setText(email_header)
self.mail_data = mail_template.split("<html>")[0]
mail_html = mail_template.split("<html>")[1]
mail_html = "<html>" + mail_html
Appname = self.appname
mail_html = mail_html.format(
Profname=self.profname.split(" ")[0],
Appname=Appname,
AppNr=self.appid,
AppSubject=self.subject,
greeting=self.get_greeting(),
signature=self.signature,
newEditions="<br>".join(
[
f"{book.title} von {book.author} (ISBN: {book.isbn}, Auflage: {book.edition}, In Bibliothek: {'ja' if getattr(book, 'library_location', 1) == 1 else 'nein'})"
for book in self.books
]
)
if self.books
else "keine neuen Auflagen gefunden",
)
self.mail_body.setHtml(mail_html) headers, body_html = _split_eml_headers_body(eml_text)
body_html = _escape_braces_in_style(body_html)
# compute greeting from the current toggle selection
greeting = self.get_greeting()
try:
body_html = body_html.format(
Profname=self.profname.split(" ")[
0
], # last name if your template uses {Profname}
Appname=self.appname,
AppNr=self.appid,
AppSubject=self.subject,
greeting=greeting,
signature=self.signature,
newEditions="\n".join(
[
f"- {book.title} (ISBN: {','.join(book.isbn)}, Auflage: {book.edition if book.edition else 'nicht bekannt'}, In Bibliothek: {'ja' if getattr(book, 'signature', None) is not None and 'Handbibliothek' not in str(book.library_location) else 'nein'}, Typ: {book.get_book_type()}) Aktuelle Auflage: {book.old_book.edition if book.old_book and book.old_book.edition else 'nicht bekannt'}"
for book in (self.books or [])
]
)
if self.books
else "keine neuen Auflagen gefunden",
newEditionsOrdered="\n".join(
[
f" - {book.title}, ISBN: {','.join(book.isbn)}, Bibliotheksstandort : {book.library_location if book.library_location else 'N/A'}, Link: {book.link}"
for book in (self.ordered_books or [])
]
),
)
except Exception as e:
log.error(f"Template formatting failed: {e}")
self.mail_body.setPlainText(body_html)
def createAndSendMail(self): def createAndSendMail(self):
log.info("Sending mail") log.info("Sending mail")
import smtplib
from email.mime.multipart import MIMEMultipart from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText from email.mime.text import MIMEText
@@ -176,31 +239,29 @@ Tel.: 0761/682-778 | 07617682-545"""
message["From"] = sender_email message["From"] = sender_email
message["To"] = self.prof_mail message["To"] = self.prof_mail
message["Subject"] = self.mail_header.text() message["Subject"] = self.mail_header.text()
# include a Fcc to the senders sent folder message["Cc"] = "semesterapparate@ph-freiburg.de"
message["cc"] = "semesterapparate@ph-freiburg.de"
mail_body = self.mail_body.toPlainText()
# strange_string = """p, li { white-space: pre-wrap; }
# hr { height: 1px; border-width: 0; }
# li.unchecked::marker { content: "\2610"; }
# li.checked::marker { content: "\2612"; }
# """
# mail_body.replace(strange_string, "")
message.attach(MIMEText(mail_body, "Plain", "utf-8"))
mail_body = self.mail_body.toHtml()
message.attach(MIMEText(mail_body, "html"))
mail = message.as_string() mail = message.as_string()
with smtplib.SMTP_SSL(smtp_server, port) as server: with smtplib.SMTP_SSL(smtp_server, port) as server:
server.connect(smtp_server, port) server.connect(smtp_server, port) # not needed for SMTP_SSL
# server.connect(smtp_server, port)
# server.auth(mechanism="PLAIN")
if config.mail.use_user_name is True: if config.mail.use_user_name is True:
# print(config["mail"]["user_name"])
server.login(config.mail.user_name, password) server.login(config.mail.user_name, password)
else: else:
server.login(sender_email, password) server.login(sender_email, password)
server.sendmail(sender_email, tolist, mail) server.sendmail(sender_email, tolist, mail)
# print("Mail sent")
# end active process
server.quit() server.quit()
pass
log.info("Mail sent, closing connection to server and dialog") log.info("Mail sent, closing connection to server and dialog")
# close the dialog
self.accept() self.accept()
@@ -225,8 +286,6 @@ def launch_gui(
if __name__ == "__main__": if __name__ == "__main__":
import sys
app = QtWidgets.QApplication(sys.argv) app = QtWidgets.QApplication(sys.argv)
Dialog = QtWidgets.QDialog() Dialog = QtWidgets.QDialog()
ui = Mail_Dialog() ui = Mail_Dialog()

View File

@@ -7,7 +7,7 @@ from qtqdm import Qtqdm, QtqdmProgressBar
from src.logic import BookData from src.logic import BookData
from src.logic.lehmannsapi import LehmannsClient from src.logic.lehmannsapi import LehmannsClient
from src.logic.swb import SWB from src.logic.SRU import SWB
class CheckThread(QtCore.QThread): class CheckThread(QtCore.QThread):

View File

@@ -250,7 +250,7 @@
<rect> <rect>
<x>0</x> <x>0</x>
<y>180</y> <y>180</y>
<width>1261</width> <width>1412</width>
<height>511</height> <height>511</height>
</rect> </rect>
</property> </property>
@@ -275,11 +275,30 @@
</item> </item>
<item> <item>
<widget class="QCheckBox" name="chkbx_show_del_media"> <widget class="QCheckBox" name="chkbx_show_del_media">
<property name="enabled">
<bool>false</bool>
</property>
<property name="toolTip">
<string>WIP - Broken</string>
</property>
<property name="text"> <property name="text">
<string>gel. Medien anzeigen</string> <string>gel. Medien anzeigen</string>
</property> </property>
</widget> </widget>
</item> </item>
<item>
<widget class="QCheckBox" name="chkbx_show_only_wit_neweditions">
<property name="enabled">
<bool>false</bool>
</property>
<property name="toolTip">
<string>WIP - Broken</string>
</property>
<property name="text">
<string>Nur Titel mit Neuauflagen anzeigen</string>
</property>
</widget>
</item>
<item> <item>
<spacer name="horizontalSpacer_3"> <spacer name="horizontalSpacer_3">
<property name="orientation"> <property name="orientation">
@@ -298,8 +317,11 @@
</item> </item>
<item> <item>
<widget class="QPushButton" name="btn_reserve"> <widget class="QPushButton" name="btn_reserve">
<property name="toolTip">
<string>Dieser Knopf prüft alle Werke, die mit einem roten X vermerkt sind. Sollten diese inzwischen im Apparat sein, wird dies aktualisiert</string>
</property>
<property name="text"> <property name="text">
<string>im Apparat?</string> <string>Medien mit ❌ im Apparat?</string>
</property> </property>
</widget> </widget>
</item> </item>

View File

@@ -157,7 +157,7 @@ class Ui_MainWindow(object):
self.gridLayoutWidget_2 = QWidget(self.createApparat) self.gridLayoutWidget_2 = QWidget(self.createApparat)
self.gridLayoutWidget_2.setObjectName(u"gridLayoutWidget_2") self.gridLayoutWidget_2.setObjectName(u"gridLayoutWidget_2")
self.gridLayoutWidget_2.setEnabled(True) self.gridLayoutWidget_2.setEnabled(True)
self.gridLayoutWidget_2.setGeometry(QRect(0, 180, 1261, 511)) self.gridLayoutWidget_2.setGeometry(QRect(0, 180, 1412, 511))
self.gridLayout_2 = QGridLayout(self.gridLayoutWidget_2) self.gridLayout_2 = QGridLayout(self.gridLayoutWidget_2)
self.gridLayout_2.setObjectName(u"gridLayout_2") self.gridLayout_2.setObjectName(u"gridLayout_2")
self.gridLayout_2.setContentsMargins(0, 0, 0, 0) self.gridLayout_2.setContentsMargins(0, 0, 0, 0)
@@ -169,9 +169,16 @@ class Ui_MainWindow(object):
self.chkbx_show_del_media = QCheckBox(self.gridLayoutWidget_2) self.chkbx_show_del_media = QCheckBox(self.gridLayoutWidget_2)
self.chkbx_show_del_media.setObjectName(u"chkbx_show_del_media") self.chkbx_show_del_media.setObjectName(u"chkbx_show_del_media")
self.chkbx_show_del_media.setEnabled(False)
self.horizontalLayout_5.addWidget(self.chkbx_show_del_media) self.horizontalLayout_5.addWidget(self.chkbx_show_del_media)
self.chkbx_show_only_wit_neweditions = QCheckBox(self.gridLayoutWidget_2)
self.chkbx_show_only_wit_neweditions.setObjectName(u"chkbx_show_only_wit_neweditions")
self.chkbx_show_only_wit_neweditions.setEnabled(False)
self.horizontalLayout_5.addWidget(self.chkbx_show_only_wit_neweditions)
self.horizontalSpacer_3 = QSpacerItem(40, 20, QSizePolicy.Policy.Fixed, QSizePolicy.Policy.Minimum) self.horizontalSpacer_3 = QSpacerItem(40, 20, QSizePolicy.Policy.Fixed, QSizePolicy.Policy.Minimum)
self.horizontalLayout_5.addItem(self.horizontalSpacer_3) self.horizontalLayout_5.addItem(self.horizontalSpacer_3)
@@ -880,8 +887,18 @@ class Ui_MainWindow(object):
___qtablewidgetitem4.setText(QCoreApplication.translate("MainWindow", u"Dauerapparat", None)); ___qtablewidgetitem4.setText(QCoreApplication.translate("MainWindow", u"Dauerapparat", None));
___qtablewidgetitem5 = self.tableWidget_apparate.horizontalHeaderItem(5) ___qtablewidgetitem5 = self.tableWidget_apparate.horizontalHeaderItem(5)
___qtablewidgetitem5.setText(QCoreApplication.translate("MainWindow", u"KontoNr", None)); ___qtablewidgetitem5.setText(QCoreApplication.translate("MainWindow", u"KontoNr", None));
#if QT_CONFIG(tooltip)
self.chkbx_show_del_media.setToolTip(QCoreApplication.translate("MainWindow", u"WIP - Broken", None))
#endif // QT_CONFIG(tooltip)
self.chkbx_show_del_media.setText(QCoreApplication.translate("MainWindow", u"gel. Medien anzeigen", None)) self.chkbx_show_del_media.setText(QCoreApplication.translate("MainWindow", u"gel. Medien anzeigen", None))
self.btn_reserve.setText(QCoreApplication.translate("MainWindow", u"im Apparat?", None)) #if QT_CONFIG(tooltip)
self.chkbx_show_only_wit_neweditions.setToolTip(QCoreApplication.translate("MainWindow", u"WIP - Broken", None))
#endif // QT_CONFIG(tooltip)
self.chkbx_show_only_wit_neweditions.setText(QCoreApplication.translate("MainWindow", u"Nur Titel mit Neuauflagen anzeigen", None))
#if QT_CONFIG(tooltip)
self.btn_reserve.setToolTip(QCoreApplication.translate("MainWindow", u"Dieser Knopf pr\u00fcft alle Werke, die mit einem roten X vermerkt sind. Sollten diese inzwischen im Apparat sein, wird dies aktualisiert", None))
#endif // QT_CONFIG(tooltip)
self.btn_reserve.setText(QCoreApplication.translate("MainWindow", u"Medien mit \u274c im Apparat?", None))
self.label_info.setText(QCoreApplication.translate("MainWindow", u"Medien werden hinzugef\u00fcgt", None)) self.label_info.setText(QCoreApplication.translate("MainWindow", u"Medien werden hinzugef\u00fcgt", None))
self.progress_label.setText(QCoreApplication.translate("MainWindow", u"Medium x/y", None)) self.progress_label.setText(QCoreApplication.translate("MainWindow", u"Medium x/y", None))
self.label_20.setText(QCoreApplication.translate("MainWindow", u"Medien werden gepr\u00fcft", None)) self.label_20.setText(QCoreApplication.translate("MainWindow", u"Medien werden gepr\u00fcft", None))

View File

@@ -26,7 +26,6 @@ from src.backend import (
) )
from src.backend.create_file import recreateFile from src.backend.create_file import recreateFile
from src.backend.delete_temp_contents import delete_temp_contents as tempdelete from src.backend.delete_temp_contents import delete_temp_contents as tempdelete
from src.backend.semester import Semester
from src.logic import ( from src.logic import (
APP_NRS, APP_NRS,
Apparat, Apparat,
@@ -34,7 +33,9 @@ from src.logic import (
BookData, BookData,
Prof, Prof,
SemapDocument, SemapDocument,
Semester,
csv_to_list, csv_to_list,
eml_to_semap,
pdf_to_semap, pdf_to_semap,
word_to_semap, word_to_semap,
) )
@@ -207,6 +208,7 @@ class Ui(QtWidgets.QMainWindow, Ui_Semesterapparat):
self.progressBar.setMinimum(0) self.progressBar.setMinimum(0)
self.avail_status.hide() self.avail_status.hide()
self.chkbx_show_del_media.hide() self.chkbx_show_del_media.hide()
self.chkbx_show_only_wit_neweditions.hide()
self.automation_add_selected_books.hide() self.automation_add_selected_books.hide()
# self.btn_del_select_apparats.setEnabled(False) # self.btn_del_select_apparats.setEnabled(False)
@@ -896,7 +898,7 @@ class Ui(QtWidgets.QMainWindow, Ui_Semesterapparat):
) )
# thread = QThread() # thread = QThread()
appnumber = self.active_apparat appnumber = self.drpdwn_app_nr.currentText()
# #log.debug(links) # #log.debug(links)
self.availChecker = AvailChecker(links, appnumber, books=books) self.availChecker = AvailChecker(links, appnumber, books=books)
# availcheck.moveToThread(thread) # availcheck.moveToThread(thread)
@@ -939,16 +941,14 @@ class Ui(QtWidgets.QMainWindow, Ui_Semesterapparat):
self.validate_semester() self.validate_semester()
def update_app_media_list(self): def update_app_media_list(self):
deleted = 0 if not self.chkbx_show_del_media.isChecked() else 1
app_id = self.db.getId(self.app_name.text()) app_id = self.db.getId(self.app_name.text())
prof_id = self.db.getProfId(self.profdata) prof_id = self.db.getProfId(self.profdata)
books: list[dict[int, BookData, int]] = self.db.getBooks( books: list[dict[int, BookData, int]] = self.db.getBooks(app_id, prof_id, 0)
app_id, prof_id, deleted
)
# # #log.debug(books) # # #log.debug(books)
# take the dataclass from the tuple # take the dataclass from the tuple
# booklist:list[BookData]=[book[0] for book in books] # booklist:list[BookData]=[book[0] for book in books]
self.tableWidget_apparat_media.clearContents()
self.tableWidget_apparat_media.setRowCount(0) self.tableWidget_apparat_media.setRowCount(0)
for book in books: for book in books:
book["id"] book["id"]
@@ -1198,6 +1198,8 @@ class Ui(QtWidgets.QMainWindow, Ui_Semesterapparat):
self.db.addBookToDatabase( self.db.addBookToDatabase(
bookdata=book, app_id=app_id, prof_id=prof_id bookdata=book, app_id=app_id, prof_id=prof_id
) )
if file_type == "eml":
data = eml_to_semap(file)
self.update_app_media_list() self.update_app_media_list()
# #log.debug(len(signatures)) # #log.debug(len(signatures))
@@ -1590,8 +1592,8 @@ class Ui(QtWidgets.QMainWindow, Ui_Semesterapparat):
mail_data = { mail_data = {
"prof_name": "Erwerbung", "prof_name": "Erwerbung",
"prof_mail": "erw@ph-freiburg.de", "prof_mail": "erw@ph-freiburg.de",
"app_id": app_nr, "app_nr": app_nr,
"app_name": self.db.getApparatName(app_id, prof_id), "app_name": self.db.getApparatName(app_nr, prof_id),
} }
orderDialog = NewEditionDialog(app_id, mail_data) orderDialog = NewEditionDialog(app_id, mail_data)
orderDialog.exec() orderDialog.exec()
@@ -1672,12 +1674,13 @@ WHERE m.id = ?""",
newEditionChecker.exec() newEditionChecker.exec()
accepted_books = newEditionChecker.accepted_books accepted_books = newEditionChecker.accepted_books
print(accepted_books)
if accepted_books == []: if accepted_books == []:
return return
for book in accepted_books: for book in accepted_books:
oldBookId = self.db.getBookIdByPPN(book.old_book.ppn) oldBookId = self.db.getBookIdByPPN(book.old_book.ppn)
apparats_id = self.db.getId(
self.db.getApparatNameByAppNr(book.old_book.library_location)
)
self.db.insertNewEdition(book, oldBookId, apparats_id) self.db.insertNewEdition(book, oldBookId, apparats_id)
pass pass
@@ -1763,11 +1766,17 @@ WHERE m.id = ?""",
apparat_add_action = QtGui.QAction("Zum Apparat hinzufügen") apparat_add_action = QtGui.QAction("Zum Apparat hinzufügen")
apparat_move_action = QtGui.QAction("In Apparat verschieben") apparat_move_action = QtGui.QAction("In Apparat verschieben")
apparat_copy_action = QtGui.QAction("In Apparat kopieren") apparat_copy_action = QtGui.QAction("In Apparat kopieren")
replace_old_editions = QtGui.QAction("Neuauflagen ersetzen")
apparatmenu = menu.addMenu("Apparate") apparatmenu = menu.addMenu("Apparate")
generalmenu = menu.addMenu("Allgemeines") generalmenu = menu.addMenu("Allgemeines")
apparatmenu.addActions( # type: ignore apparatmenu.addActions( # type: ignore
[apparat_add_action, apparat_copy_action, apparat_move_action] [
apparat_add_action,
apparat_copy_action,
apparat_move_action,
replace_old_editions,
]
) )
generalmenu.addActions([edit_action, delete_action, update_data_action]) # type: ignore generalmenu.addActions([edit_action, delete_action, update_data_action]) # type: ignore
# disable apparat_add_action # disable apparat_add_action
@@ -1778,8 +1787,37 @@ WHERE m.id = ?""",
apparat_copy_action.triggered.connect(self.copy_to_apparat) # type: ignore apparat_copy_action.triggered.connect(self.copy_to_apparat) # type: ignore
apparat_move_action.triggered.connect(self.move_to_apparat) # type: ignore apparat_move_action.triggered.connect(self.move_to_apparat) # type: ignore
update_data_action.triggered.connect(self.update_data) # type: ignore update_data_action.triggered.connect(self.update_data) # type: ignore
replace_old_editions.triggered.connect(self.replace_old_edition) # type: ignore
menu.exec(self.tableWidget_apparat_media.mapToGlobal(position)) # type: ignore menu.exec(self.tableWidget_apparat_media.mapToGlobal(position)) # type: ignore
def replace_old_edition(self):
# open dialog
dialog = QtWidgets.QDialog()
dialog.setWindowTitle("Neuauflagen:")
layout = QtWidgets.QVBoxLayout()
label = QtWidgets.QLabel("Folgende Medien haben Neuauflagen:")
layout.addWidget(label)
table = QtWidgets.QTableWidget()
table.setColumnCount(4)
table.setHorizontalHeaderLabels(["Titel", "Auflage", "Signatur", "Neues Werk"])
table.horizontalHeader().setStretchLastSection(True)
new_editions = self.db.getBooksWithNewEditions(
self.active_apparat,
)
for book in new_editions:
table.insertRow(0)
table.setItem(0, 0, QtWidgets.QTableWidgetItem(book[0].title))
table.setItem(0, 1, QtWidgets.QTableWidgetItem(str(book[0].edition)))
table.setItem(0, 2, QtWidgets.QTableWidgetItem(book[0].signature))
new_ed_data = (
f"{book[1].title} (Auflage {book[1].edition}, {book[1].signature})"
)
table.setItem(0, 3, QtWidgets.QTableWidgetItem(new_ed_data))
layout.addWidget(table)
dialog.setLayout(layout)
dialog.exec()
def update_data(self): def update_data(self):
signatures = [ signatures = [
self.tableWidget_apparat_media.item(row, 1).text() self.tableWidget_apparat_media.item(row, 1).text()

View File

@@ -1,9 +1,10 @@
from .widget_sources.admin_query_ui import Ui_Form from PySide6 import QtCore, QtWidgets
from PySide6 import QtWidgets, QtCore
from src import Icon from src import Icon
from src.backend import Database from src.backend import Database
from .widget_sources. import Ui_Form
class AdminQueryWidget(QtWidgets.QWidget, Ui_Form): class AdminQueryWidget(QtWidgets.QWidget, Ui_Form):
def __init__(self, parent=None): def __init__(self, parent=None):
@@ -22,7 +23,7 @@ class AdminQueryWidget(QtWidgets.QWidget, Ui_Form):
return return
data = self.db.query_db(request_text) data = self.db.query_db(request_text)
print(data) # print(data)
table_names = ( table_names = (
request_text.lower().split("select")[1].split("from")[0].split(",") request_text.lower().split("select")[1].split("from")[0].split(",")
) )

View File

@@ -7,8 +7,8 @@ from PySide6.QtCore import QDate
from PySide6.QtGui import QRegularExpressionValidator from PySide6.QtGui import QRegularExpressionValidator
from src import LOG_DIR, Icon from src import LOG_DIR, Icon
from src.backend import Database, Semester, recreateElsaFile from src.backend import Database, recreateElsaFile
from src.logic import Prof, elsa_word_to_csv from src.logic import Prof, Semester, elsa_word_to_csv
from src.ui.dialogs import ElsaAddEntry, popus_confirm from src.ui.dialogs import ElsaAddEntry, popus_confirm
from src.ui.widgets.filepicker import FilePicker from src.ui.widgets.filepicker import FilePicker
from src.ui.widgets.graph import DataQtGraph from src.ui.widgets.graph import DataQtGraph

View File

@@ -8,7 +8,7 @@ from PySide6.QtCharts import QCategoryAxis, QChart, QChartView, QLineSeries, QVa
from PySide6.QtGui import QColor, QPainter, QPen from PySide6.QtGui import QColor, QPainter, QPen
from src import LOG_DIR from src import LOG_DIR
from src.backend.semester import Semester from src.logic.semester import Semester
log = loguru.logger log = loguru.logger
log.remove() log.remove()

View File

@@ -1,4 +1,5 @@
import sys import sys
from typing import List
import loguru import loguru
from natsort import natsorted from natsort import natsorted
@@ -6,8 +7,9 @@ from PySide6 import QtCore, QtGui, QtWidgets
from PySide6.QtCore import Signal from PySide6.QtCore import Signal
from src import LOG_DIR from src import LOG_DIR
from src.backend import Database, Semester from src.backend import Database
from src.logic import BookData, Prof, custom_sort, sort_semesters_list from src.logic import BookData, Prof, Semester, custom_sort, sort_semesters_list
from src.logic.dataclass import Apparat
from src.ui.dialogs import ApparatExtendDialog, Mail_Dialog, ReminderDialog from src.ui.dialogs import ApparatExtendDialog, Mail_Dialog, ReminderDialog
from src.ui.widgets import DataQtGraph, StatusWidget from src.ui.widgets import DataQtGraph, StatusWidget
from src.ui.widgets.signature_update import UpdaterThread from src.ui.widgets.signature_update import UpdaterThread
@@ -343,8 +345,7 @@ class SearchStatisticPage(QtWidgets.QDialog, Ui_Dialog):
apparats = natsorted(appnrs) apparats = natsorted(appnrs)
apparats = [str(apparat) for apparat in apparats] apparats = [str(apparat) for apparat in apparats]
self.box_appnrs.addItems(apparats) self.box_appnrs.addItems(apparats)
persons = self.db.getProfs() persons: List[Prof] = sorted(self.db.getProfs(), key=lambda x: x.lastname)
persons = sorted(persons, key=lambda x: x.lastname)
self.box_person.addItems( self.box_person.addItems(
[f"{person.lastname}, {person.firstname}" for person in persons] [f"{person.lastname}, {person.firstname}" for person in persons]
) )
@@ -398,7 +399,12 @@ class SearchStatisticPage(QtWidgets.QDialog, Ui_Dialog):
selected_apparat_rows = [] selected_apparat_rows = []
for i in range(self.tableWidget.rowCount()): for i in range(self.tableWidget.rowCount()):
if self.tableWidget.cellWidget(i, 0).isChecked(): if self.tableWidget.cellWidget(i, 0).isChecked():
selected_apparats.append(self.tableWidget.item(i, 2).text()) selected_apparats.append(
Apparat(
appnr=self.tableWidget.item(i, 2).text(),
name=self.tableWidget.item(i, 1).text(),
)
)
selected_apparat_rows.append(i) selected_apparat_rows.append(i)
# delete all selected apparats # delete all selected apparats
# # ##print(selected_apparats) # # ##print(selected_apparats)

View File

@@ -8,7 +8,7 @@ from PySide6.QtMultimedia import QAudioOutput, QMediaPlayer
from src import LOG_DIR from src import LOG_DIR
from src.backend.catalogue import Catalogue from src.backend.catalogue import Catalogue
from src.backend.database import Database from src.backend.database import Database
from src.logic.swb import SWB from src.logic.SRU import SWB
from .widget_sources.admin_update_signatures_ui import Ui_Dialog from .widget_sources.admin_update_signatures_ui import Ui_Dialog