chore: restructured project, updated readme
This commit is contained in:
5
src/admin/__init__.py
Normal file
5
src/admin/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Administrative functions and commands."""
|
||||
|
||||
from .commands import AdminCommands
|
||||
|
||||
__all__ = ["AdminCommands"]
|
||||
104
src/admin/commands.py
Normal file
104
src/admin/commands.py
Normal file
@@ -0,0 +1,104 @@
|
||||
import hashlib
|
||||
import random
|
||||
|
||||
from src.database import Database
|
||||
from src.shared.logging import log
|
||||
from src import LOG_DIR
|
||||
|
||||
|
||||
# change passwords for apparats, change passwords for users, list users, create and delete users etc
|
||||
# create a class that has all commands. for each command, create a function that does the thing
|
||||
class AdminCommands:
|
||||
"""Basic Admin commands for the admin console. This class is used to create, delete, and list users. It also has the ability to change passwords for users."""
|
||||
|
||||
def __init__(self, db_path=None):
|
||||
"""Default Constructor for the AdminCommands class."""
|
||||
if db_path is None:
|
||||
self.db = Database()
|
||||
else:
|
||||
self.db = Database(db_path=db_path)
|
||||
log.info("AdminCommands initialized with database connection.")
|
||||
log.debug("location: {}", self.db.db_path)
|
||||
|
||||
def create_password(self, password: str) -> tuple[str, str]:
|
||||
"""Create a hashed password and a salt for the password.
|
||||
|
||||
Args:
|
||||
password (str): the base password to be hashed.
|
||||
|
||||
Returns:
|
||||
tuple[str,str]: a tuple containing the hashed password and the salt used to hash the password.
|
||||
"""
|
||||
salt = self.create_salt()
|
||||
hashed_password = self.hash_password(password)
|
||||
return (hashed_password, salt)
|
||||
|
||||
def create_salt(self) -> str:
|
||||
"""Generate a random 16 digit long salt for the password.
|
||||
|
||||
Returns:
|
||||
str: the randomized salt
|
||||
"""
|
||||
return "".join(
|
||||
random.choices(
|
||||
"abcdefghijklmnopqrstuvwxyzQWERTZUIOPLKJHGFDSAYXCVBNM0123456789", k=16
|
||||
)
|
||||
)
|
||||
|
||||
def create_admin(self):
|
||||
"""Create the admin in the database. This is only used once, when the database is created."""
|
||||
salt = self.create_salt()
|
||||
hashed_password = self.hash_password("admin")
|
||||
self.db.createUser("admin", salt + hashed_password, "admin", salt)
|
||||
|
||||
def create_user(self, username: str, password: str, role: str = "user") -> bool:
|
||||
"""Create a new user in the database.
|
||||
|
||||
Args:
|
||||
username (str): the username of the user to be created.
|
||||
password (str): the password of the user to be created.
|
||||
role (str, optional): the role of the user to be created. Defaults to "user".
|
||||
"""
|
||||
hashed_password, salt = self.create_password(password)
|
||||
status = self.db.createUser(
|
||||
user=username, password=salt + hashed_password, role=role, salt=salt
|
||||
)
|
||||
return status
|
||||
|
||||
def hash_password(self, password: str) -> str:
|
||||
"""Hash a password using SHA256.
|
||||
|
||||
Args:
|
||||
password (str): the password to be hashed.
|
||||
|
||||
Returns:
|
||||
str: the hashed password.
|
||||
"""
|
||||
hashed = hashlib.sha256((password).encode("utf-8")).hexdigest()
|
||||
return hashed
|
||||
|
||||
def list_users(self) -> list[tuple]:
|
||||
"""List all available users in the database.
|
||||
|
||||
Returns:
|
||||
list[tuple]: a list of all users, containing all stored data for each user in a tuple.
|
||||
"""
|
||||
return self.db.getUsers()
|
||||
|
||||
def delete_user(self, username: str):
|
||||
"""Delete a selected user from the database.
|
||||
|
||||
Args:
|
||||
username (str): the username of the user to be deleted.
|
||||
"""
|
||||
self.db.deleteUser(username)
|
||||
|
||||
def change_password(self, username, password):
|
||||
"""change the password for a user.
|
||||
|
||||
Args:
|
||||
username (str): username of the user to change the password for.
|
||||
password (str): the new, non-hashed password to change to.
|
||||
"""
|
||||
hashed_password = self.hash_password(password)
|
||||
self.db.changePassword(username, hashed_password)
|
||||
16
src/background/__init__.py
Normal file
16
src/background/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
||||
"""Background tasks and threading operations."""
|
||||
|
||||
from .autoadder import AutoAdder
|
||||
from .availability_checker import AvailChecker
|
||||
from .book_grabber import BookGrabber, BookGrabberTest
|
||||
from .new_editions import NewEditionCheckerThread
|
||||
from .documentation_server import DocumentationThread
|
||||
|
||||
__all__ = [
|
||||
"AutoAdder",
|
||||
"AvailChecker",
|
||||
"BookGrabber",
|
||||
"BookGrabberTest",
|
||||
"NewEditionCheckerThread",
|
||||
"DocumentationThread",
|
||||
]
|
||||
59
src/background/autoadder.py
Normal file
59
src/background/autoadder.py
Normal file
@@ -0,0 +1,59 @@
|
||||
import sys
|
||||
import time
|
||||
|
||||
import loguru
|
||||
|
||||
# from icecream import ic
|
||||
from PySide6.QtCore import QThread
|
||||
from PySide6.QtCore import Signal as Signal
|
||||
|
||||
from src import LOG_DIR
|
||||
from src.database import Database
|
||||
|
||||
log = loguru.logger
|
||||
log.remove()
|
||||
log.add(sys.stdout, level="INFO")
|
||||
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
|
||||
|
||||
|
||||
# from src.transformers import RDS_AVAIL_DATA
|
||||
|
||||
|
||||
class AutoAdder(QThread):
|
||||
updateSignal = Signal(int)
|
||||
|
||||
setTextSignal = Signal(int)
|
||||
progress = Signal(int)
|
||||
|
||||
def __init__(self, data=None, app_id=None, prof_id=None, parent=None):
|
||||
super().__init__(parent)
|
||||
self.data = data
|
||||
self.app_id = app_id
|
||||
self.prof_id = prof_id
|
||||
|
||||
# #print("Launched AutoAdder")
|
||||
# #print(self.data, self.app_id, self.prof_id)
|
||||
|
||||
def run(self):
|
||||
self.db = Database()
|
||||
# show the dialog, start the thread to gather data and dynamically update progressbar and listwidget
|
||||
log.info("Starting worker thread")
|
||||
item = 0
|
||||
for entry in self.data:
|
||||
try:
|
||||
self.updateSignal.emit(item)
|
||||
self.setTextSignal.emit(entry)
|
||||
item += 1
|
||||
self.progress.emit(item)
|
||||
time.sleep(1)
|
||||
|
||||
except Exception as e:
|
||||
# #print(e)
|
||||
log.exception(
|
||||
f"The query failed with message {e} for signature {entry}"
|
||||
)
|
||||
continue
|
||||
if item == len(self.data):
|
||||
log.info("Worker thread finished")
|
||||
# teminate thread
|
||||
self.finished.emit()
|
||||
83
src/background/availability_checker.py
Normal file
83
src/background/availability_checker.py
Normal file
@@ -0,0 +1,83 @@
|
||||
# from icecream import ic
|
||||
from PySide6.QtCore import QThread
|
||||
from PySide6.QtCore import Signal as Signal
|
||||
|
||||
from src.database import Database
|
||||
from src.services.webadis import get_book_medianr
|
||||
from src.services.webrequest import BibTextTransformer, TransformerType, WebRequest
|
||||
from src.shared.logging import log
|
||||
|
||||
|
||||
class AvailChecker(QThread):
|
||||
updateSignal = Signal(str, int)
|
||||
updateProgress = Signal(int, int)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
links: list[str] | None = None,
|
||||
appnumber: int | None = None,
|
||||
parent=None,
|
||||
books: list[dict] | None = None,
|
||||
):
|
||||
if links is None:
|
||||
links = []
|
||||
super().__init__(parent)
|
||||
log.info("Starting worker thread")
|
||||
log.info(
|
||||
"Checking availability for "
|
||||
+ str(links)
|
||||
+ " with appnumber "
|
||||
+ str(appnumber)
|
||||
+ "..."
|
||||
)
|
||||
self.links = links
|
||||
self.appnumber = appnumber
|
||||
self.books = books or []
|
||||
log.info(
|
||||
f"Started worker with appnumber: {self.appnumber} and links: {self.links} and {len(self.books)} books..."
|
||||
)
|
||||
# Pre-create reusable request and transformer to avoid per-item overhead
|
||||
self._request = WebRequest().set_apparat(self.appnumber)
|
||||
self._rds_transformer = BibTextTransformer(TransformerType.RDS)
|
||||
|
||||
def run(self):
|
||||
self.db = Database()
|
||||
state = 0
|
||||
count = 0
|
||||
for link in self.links:
|
||||
log.info("Processing entry: " + str(link))
|
||||
data = self._request.get_ppn(link).get_data()
|
||||
rds = self._rds_transformer.get_data(data).return_data("rds_availability")
|
||||
|
||||
book_id = None
|
||||
if not rds or not rds.items:
|
||||
log.warning(f"No RDS data found for link {link}")
|
||||
continue
|
||||
for item in rds.items:
|
||||
sign = item.superlocation
|
||||
loc = item.location
|
||||
# # #print(item.location)
|
||||
if str(self.appnumber) in sign or str(self.appnumber) in loc:
|
||||
state = 1
|
||||
break
|
||||
for book in self.books:
|
||||
if book["bookdata"].signature == link:
|
||||
book_id = book["id"]
|
||||
break
|
||||
log.info(f"State of {link}: " + str(state))
|
||||
# #print("Updating availability of " + str(book_id) + " to " + str(state))
|
||||
# use get_book_medianr to update the medianr of the book in the database
|
||||
auth = self.db.getWebADISAuth
|
||||
medianr = get_book_medianr(rds.items[0].callnumber, self.appnumber, auth)
|
||||
book_data = book["bookdata"]
|
||||
book_data.medianr = medianr
|
||||
self.db.updateBookdata(book["id"], book_data)
|
||||
self.db.setAvailability(book_id, state)
|
||||
count += 1
|
||||
self.updateProgress.emit(count, len(self.links))
|
||||
self.updateSignal.emit(item.callnumber, state)
|
||||
|
||||
log.info("Worker thread finished")
|
||||
# teminate thread
|
||||
|
||||
self.quit()
|
||||
199
src/background/book_grabber.py
Normal file
199
src/background/book_grabber.py
Normal file
@@ -0,0 +1,199 @@
|
||||
from PySide6.QtCore import QThread, Signal
|
||||
|
||||
from src.database import Database
|
||||
from src.services.webrequest import BibTextTransformer, WebRequest
|
||||
from src.shared.logging import log
|
||||
|
||||
# Logger configured centrally in main; this module just uses `log`
|
||||
|
||||
|
||||
class BookGrabber(QThread):
|
||||
updateSignal = Signal(int, int)
|
||||
done = Signal()
|
||||
|
||||
def __init__(self):
|
||||
super(BookGrabber, self).__init__(parent=None)
|
||||
self.is_Running = True
|
||||
log.info("Starting worker thread")
|
||||
self.data = []
|
||||
self.app_id = None
|
||||
self.prof_id = None
|
||||
self.mode = None
|
||||
self.book_id = None
|
||||
self.use_any = False
|
||||
self.use_exact = False
|
||||
self.app_nr = None
|
||||
self.tstate = (self.app_id, self.prof_id, self.mode, self.data)
|
||||
self.request = WebRequest()
|
||||
self.db = Database()
|
||||
|
||||
def add_values(
|
||||
self, app_id: int, prof_id: int, mode: str, data, any_book=False, exact=False
|
||||
):
|
||||
self.app_id = app_id
|
||||
self.prof_id = prof_id
|
||||
self.mode = mode
|
||||
self.data: list[str] = data
|
||||
self.use_any = any_book
|
||||
self.use_exact = exact
|
||||
log.info(f"Working on {len(self.data)} entries")
|
||||
self.tstate = (self.app_nr, self.prof_id, self.mode, self.data)
|
||||
log.debug("State: " + str(self.tstate))
|
||||
app_nr = self.db.query_db(
|
||||
"SELECT appnr FROM semesterapparat WHERE id = ?", (self.app_id,)
|
||||
)[0][0]
|
||||
self.request.set_apparat(app_nr)
|
||||
# log.debug(self.tstate)
|
||||
|
||||
def run(self):
|
||||
item = 0
|
||||
iterdata = self.data
|
||||
# log.debug(iterdata)
|
||||
|
||||
for entry in iterdata:
|
||||
# log.debug(entry)
|
||||
log.info("Processing entry: {}", entry)
|
||||
|
||||
webdata = self.request.get_ppn(entry)
|
||||
if self.use_any:
|
||||
webdata = webdata.use_any_book
|
||||
webdata = webdata.get_data()
|
||||
|
||||
if webdata == "error":
|
||||
continue
|
||||
|
||||
bd = BibTextTransformer(self.mode)
|
||||
log.debug(webdata)
|
||||
if self.mode == "ARRAY":
|
||||
if self.use_exact:
|
||||
bd = bd.use_signature(entry)
|
||||
bd = bd.get_data(webdata).return_data()
|
||||
log.debug(bd)
|
||||
if bd is None:
|
||||
# bd = BookData
|
||||
continue
|
||||
bd.signature = entry
|
||||
transformer = (
|
||||
BibTextTransformer("RDS").get_data(webdata).return_data("rds_data")
|
||||
)
|
||||
|
||||
# confirm lock is acquired
|
||||
self.db.addBookToDatabase(bd, self.app_id, self.prof_id)
|
||||
# get latest book id
|
||||
self.book_id = self.db.getLastBookId()
|
||||
log.info("Added book to database")
|
||||
state = 0
|
||||
for result in transformer.RDS_DATA:
|
||||
# log.debug(result.RDS_LOCATION)
|
||||
if str(self.app_nr) in result.RDS_LOCATION:
|
||||
state = 1
|
||||
break
|
||||
|
||||
log.info(f"State of {entry}: {state}")
|
||||
log.debug(
|
||||
"updating availability of " + str(self.book_id) + " to " + str(state)
|
||||
)
|
||||
try:
|
||||
self.db.setAvailability(self.book_id, state)
|
||||
log.debug("Added book to database")
|
||||
except Exception as e:
|
||||
log.error(f"Failed to update availability: {e}")
|
||||
log.debug("Failed to update availability: " + str(e))
|
||||
|
||||
# time.sleep(5)
|
||||
item += 1
|
||||
self.updateSignal.emit(item, len(self.data))
|
||||
log.info("Worker thread finished")
|
||||
# self.done.emit()
|
||||
self.quit()
|
||||
|
||||
def stop(self):
|
||||
self.is_Running = False
|
||||
|
||||
|
||||
class BookGrabberTest(QThread):
|
||||
updateSignal = Signal(int, int)
|
||||
done = Signal()
|
||||
|
||||
def __init__(self, appnr: int):
|
||||
super(BookGrabberTest, self).__init__(parent=None)
|
||||
self.is_Running = True
|
||||
log.info("Starting worker thread")
|
||||
self.data = None
|
||||
self.app_nr = None
|
||||
self.prof_id = None
|
||||
self.mode = None
|
||||
self.book_id = None
|
||||
self.use_any = False
|
||||
self.use_exact = False
|
||||
self.app_nr = appnr
|
||||
self.tstate = (self.app_nr, self.prof_id, self.mode, self.data)
|
||||
self.results = []
|
||||
|
||||
def add_values(
|
||||
self, app_nr: int, prof_id: int, mode: str, data, any_book=False, exact=False
|
||||
):
|
||||
self.app_nr = app_nr
|
||||
self.prof_id = prof_id
|
||||
self.mode = mode
|
||||
self.data = data
|
||||
self.use_any = any_book
|
||||
self.use_exact = exact
|
||||
log.info(f"Working on {len(self.data)} entries")
|
||||
self.tstate = (self.app_nr, self.prof_id, self.mode, self.data)
|
||||
log.debug("State: " + str(self.tstate))
|
||||
# log.debug(self.tstate)
|
||||
|
||||
def run(self):
|
||||
item = 0
|
||||
iterdata = self.data
|
||||
# log.debug(iterdata)
|
||||
for entry in iterdata:
|
||||
# log.debug(entry)
|
||||
signature = str(entry)
|
||||
log.info("Processing entry: " + signature)
|
||||
|
||||
webdata = WebRequest().set_apparat(self.app_nr).get_ppn(entry)
|
||||
if self.use_any:
|
||||
webdata = webdata.use_any_book
|
||||
webdata = webdata.get_data()
|
||||
|
||||
if webdata == "error":
|
||||
continue
|
||||
|
||||
bd = BibTextTransformer(self.mode)
|
||||
if self.mode == "ARRAY":
|
||||
if self.use_exact:
|
||||
bd = bd.use_signature(entry)
|
||||
bd = bd.get_data(webdata).return_data()
|
||||
if bd is None:
|
||||
# bd = BookData
|
||||
continue
|
||||
bd.signature = entry
|
||||
transformer = (
|
||||
BibTextTransformer("RDS").get_data(webdata).return_data("rds_data")
|
||||
)
|
||||
|
||||
# confirm lock is acquired
|
||||
# get latest book id
|
||||
log.info("Added book to database")
|
||||
state = 0
|
||||
for result in transformer.RDS_DATA:
|
||||
# log.debug(result.RDS_LOCATION)
|
||||
if str(self.app_nr) in result.RDS_LOCATION:
|
||||
state = 1
|
||||
break
|
||||
|
||||
log.info(f"State of {signature}: {state}")
|
||||
# log.debug("updating availability of " + str(self.book_id) + " to " + str(state))
|
||||
self.results.append(bd)
|
||||
|
||||
# time.sleep(5)
|
||||
item += 1
|
||||
self.updateSignal.emit(item, len(self.data))
|
||||
log.info("Worker thread finished")
|
||||
# self.done.emit()
|
||||
self.quit()
|
||||
|
||||
def stop(self):
|
||||
self.is_Running = False
|
||||
23
src/background/documentation_server.py
Normal file
23
src/background/documentation_server.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from PySide6.QtCore import QThread, Slot
|
||||
from src.utils.documentation import website, QuietHandler
|
||||
from wsgiref.simple_server import make_server
|
||||
|
||||
|
||||
class DocumentationThread(QThread):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self._server = None # store server so we can shut it down
|
||||
|
||||
def run(self):
|
||||
# launch_documentation()
|
||||
self._server = make_server(
|
||||
"localhost", 8000, website(), handler_class=QuietHandler
|
||||
)
|
||||
while not self.isInterruptionRequested():
|
||||
self._server.handle_request()
|
||||
|
||||
@Slot() # slot you can connect to aboutToQuit
|
||||
def stop(self):
|
||||
self.requestInterruption() # ask the loop above to exit
|
||||
if self._server:
|
||||
self._server.shutdown() # unblock handle_request()
|
||||
345
src/background/new_editions.py
Normal file
345
src/background/new_editions.py
Normal file
@@ -0,0 +1,345 @@
|
||||
import os
|
||||
import re
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from math import ceil
|
||||
from queue import Empty, Queue
|
||||
from time import monotonic # <-- NEW
|
||||
from typing import List, Optional
|
||||
|
||||
from PySide6.QtCore import QThread, Signal
|
||||
|
||||
# from src.services.webrequest import BibTextTransformer, WebRequest
|
||||
from src.services.catalogue import Catalogue
|
||||
from src.core.models import BookData
|
||||
from src.services.sru import SWB
|
||||
from src.shared.logging import log
|
||||
|
||||
# use all available cores - 2, but at least 1
|
||||
THREAD_COUNT = max(os.cpu_count() - 2, 1)
|
||||
THREAD_MIN_ITEMS = 5
|
||||
|
||||
# Logger configured centrally in main; use shared `log`
|
||||
|
||||
swb = SWB()
|
||||
dnb = SWB()
|
||||
cat = Catalogue()
|
||||
|
||||
RVK_ALLOWED = r"[A-Z0-9.\-\/]" # conservative RVK character set
|
||||
|
||||
|
||||
def find_newer_edition(
|
||||
swb_result: BookData, dnb_result: List[BookData]
|
||||
) -> Optional[List[BookData]]:
|
||||
"""
|
||||
New edition if:
|
||||
- year > swb.year OR
|
||||
- edition_number > swb.edition_number
|
||||
BUT: discard any candidate with year < swb.year (if both years are known).
|
||||
|
||||
Same-work check:
|
||||
- Compare RVK roots of signatures (after stripping trailing '+N' and '(N)').
|
||||
- If both have signatures and RVKs differ -> skip.
|
||||
|
||||
Preferences (in order):
|
||||
1) RVK matches SWB
|
||||
2) Print over Online-Ressource
|
||||
3) Has signature
|
||||
4) Newer: (year desc, edition_number desc)
|
||||
"""
|
||||
|
||||
def strip_copy_and_edition(s: str) -> str:
|
||||
s = re.sub(r"\(\s*\d+\s*\)", "", s) # remove '(N)'
|
||||
s = re.sub(r"\s*\+\s*\d+\s*$", "", s) # remove trailing '+N'
|
||||
return s
|
||||
|
||||
def extract_rvk_root(sig: Optional[str]) -> str:
|
||||
if not sig:
|
||||
return ""
|
||||
t = strip_copy_and_edition(sig.upper())
|
||||
t = re.sub(r"\s+", " ", t).strip()
|
||||
m = re.match(rf"^([A-Z]{{1,3}}\s*{RVK_ALLOWED}*)", t)
|
||||
if not m:
|
||||
cleaned = re.sub(rf"[^{RVK_ALLOWED} ]+", "", t).strip()
|
||||
return cleaned.split(" ")[0] if cleaned else ""
|
||||
return re.sub(r"\s+", " ", m.group(1)).strip()
|
||||
|
||||
def has_sig(b: BookData) -> bool:
|
||||
return bool(getattr(b, "signature", None))
|
||||
|
||||
def is_online(b: BookData) -> bool:
|
||||
return (getattr(b, "media_type", None) or "").strip() == "Online-Ressource"
|
||||
|
||||
def is_print(b: BookData) -> bool:
|
||||
return not is_online(b)
|
||||
|
||||
def rvk_matches_swb(b: BookData) -> bool:
|
||||
if not has_sig(b) or not has_sig(swb_result):
|
||||
return False
|
||||
return extract_rvk_root(b.signature) == extract_rvk_root(swb_result.signature)
|
||||
|
||||
def strictly_newer(b: BookData) -> bool:
|
||||
# Hard guard: if both years are known and candidate is older, discard
|
||||
if (
|
||||
b.year is not None
|
||||
and swb_result.year is not None
|
||||
and b.year < swb_result.year
|
||||
):
|
||||
return False
|
||||
|
||||
newer_by_year = (
|
||||
b.year is not None
|
||||
and swb_result.year is not None
|
||||
and b.year > swb_result.year
|
||||
)
|
||||
newer_by_edition = (
|
||||
b.edition_number is not None
|
||||
and swb_result.edition_number is not None
|
||||
and b.edition_number > swb_result.edition_number
|
||||
)
|
||||
# Thanks to the guard above, newer_by_edition can't pick something with a smaller year.
|
||||
return newer_by_year or newer_by_edition
|
||||
|
||||
swb_has_sig = has_sig(swb_result)
|
||||
swb_rvk = extract_rvk_root(getattr(swb_result, "signature", None))
|
||||
|
||||
# 1) Filter: same work (by RVK if both have sigs) AND strictly newer
|
||||
candidates: List[BookData] = []
|
||||
for b in dnb_result:
|
||||
if has_sig(b) and swb_has_sig:
|
||||
if extract_rvk_root(b.signature) != swb_rvk:
|
||||
continue # different work
|
||||
if strictly_newer(b):
|
||||
candidates.append(b)
|
||||
|
||||
if not candidates:
|
||||
return None
|
||||
|
||||
# 2) Dedupe by PPN → prefer (rvk-match, is-print, has-signature)
|
||||
def pref_score(x: BookData) -> tuple[int, int, int]:
|
||||
return (
|
||||
1 if rvk_matches_swb(x) else 0,
|
||||
1 if is_print(x) else 0,
|
||||
1 if has_sig(x) else 0,
|
||||
)
|
||||
|
||||
by_ppn: dict[Optional[str], BookData] = {}
|
||||
for b in candidates:
|
||||
key = getattr(b, "ppn", None)
|
||||
prev = by_ppn.get(key)
|
||||
if prev is None or pref_score(b) > pref_score(prev):
|
||||
by_ppn[key] = b
|
||||
|
||||
deduped = list(by_ppn.values())
|
||||
if not deduped:
|
||||
return None
|
||||
|
||||
# 3) Preserve all qualifying newer editions, but order by preference
|
||||
def sort_key(b: BookData):
|
||||
year = b.year if b.year is not None else -1
|
||||
ed = b.edition_number if b.edition_number is not None else -1
|
||||
return (
|
||||
1 if rvk_matches_swb(b) else 0,
|
||||
1 if is_print(b) else 0,
|
||||
1 if has_sig(b) else 0,
|
||||
year,
|
||||
ed,
|
||||
)
|
||||
|
||||
deduped.sort(key=sort_key, reverse=True)
|
||||
return deduped
|
||||
|
||||
|
||||
class NewEditionCheckerThread(QThread):
|
||||
updateSignal = Signal(int, int) # (processed, total)
|
||||
updateProgress = Signal(int, int) # (processed, total)
|
||||
total_entries_signal = Signal(int)
|
||||
resultsSignal = Signal(list) # list[tuple[BookData, list[BookData]]]
|
||||
|
||||
# NEW: metrics signals
|
||||
rateSignal = Signal(float) # items per second ("it/s")
|
||||
etaSignal = Signal(int) # seconds remaining (-1 when unknown)
|
||||
|
||||
def __init__(self, entries: Optional[list["BookData"]] = None, parent=None):
|
||||
super().__init__(parent)
|
||||
self.entries: list["BookData"] = entries if entries is not None else []
|
||||
self.results: list[tuple["BookData", list["BookData"]]] = []
|
||||
|
||||
def reset(self):
|
||||
self.entries = []
|
||||
self.results = []
|
||||
|
||||
# ---------- internal helpers ----------
|
||||
|
||||
@staticmethod
|
||||
def _split_evenly(items: list, parts: int) -> list[list]:
|
||||
"""Split items as evenly as possible into `parts` chunks (no empty tails)."""
|
||||
if parts <= 1 or len(items) <= 1:
|
||||
return [items]
|
||||
n = len(items)
|
||||
base = n // parts
|
||||
extra = n % parts
|
||||
chunks = []
|
||||
i = 0
|
||||
for k in range(parts):
|
||||
size = base + (1 if k < extra else 0)
|
||||
if size == 0:
|
||||
continue
|
||||
chunks.append(items[i : i + size])
|
||||
i += size
|
||||
return chunks
|
||||
|
||||
@staticmethod
|
||||
def _clean_title(raw: str) -> str:
|
||||
title = raw.rstrip(" .:,;!?")
|
||||
title = re.sub(r"\s*\(.*\)", "", title)
|
||||
return title.strip()
|
||||
|
||||
@classmethod
|
||||
def _process_book(
|
||||
cls, book: "BookData"
|
||||
) -> tuple["BookData", list["BookData"]] | None:
|
||||
"""Process one book; returns (original, [found editions]) or None on failure."""
|
||||
if not book.title:
|
||||
return None
|
||||
response: list["BookData"] = []
|
||||
query = [
|
||||
f"pica.tit={book.title}",
|
||||
f"pica.vlg={book.publisher}",
|
||||
]
|
||||
|
||||
swb_result = swb.getBooks(["pica.bib=20735", f"pica.ppn={book.ppn}"])[0]
|
||||
dnb_results = swb.getBooks(query)
|
||||
new_editions = find_newer_edition(swb_result, dnb_results)
|
||||
|
||||
if new_editions is not None:
|
||||
for new_edition in new_editions:
|
||||
new_edition.library_location = cat.get_location(new_edition.ppn)
|
||||
try:
|
||||
isbn = (
|
||||
str(new_edition.isbn[0])
|
||||
if isinstance(new_edition.isbn, list)
|
||||
else str(new_edition.isbn)
|
||||
)
|
||||
new_edition.link = (
|
||||
f"https://www.lehmanns.de/search/quick?mediatype_id=2&q={isbn}"
|
||||
)
|
||||
except (IndexError, TypeError):
|
||||
isbn = None
|
||||
new_edition.in_library = cat.in_library(new_edition.ppn)
|
||||
response = new_editions
|
||||
|
||||
# client = SWB()
|
||||
# response: list["BookData"] = []
|
||||
# # First, search by title only
|
||||
# results = client.getBooks([f"pica.title={title}", f"pica.vlg={book.publisher}"])
|
||||
|
||||
# lehmanns = LehmannsClient()
|
||||
# results = lehmanns.search_by_title(title)
|
||||
# for result in results:
|
||||
# if "(eBook)" in result.title:
|
||||
# result.title = result.title.replace("(eBook)", "").strip()
|
||||
# swb_results = client.getBooks(
|
||||
# [
|
||||
# f"pica.tit={result.title}",
|
||||
# f"pica.vlg={result.publisher.split(',')[0]}",
|
||||
# ]
|
||||
# )
|
||||
# for swb in swb_results:
|
||||
# if swb.isbn == result.isbn:
|
||||
# result.ppn = swb.ppn
|
||||
# result.signature = swb.signature
|
||||
# response.append(result)
|
||||
# if (result.edition_number < swb.edition_number) and (
|
||||
# swb.year > result.year
|
||||
# ):
|
||||
# response.append(result)
|
||||
if response == []:
|
||||
return None
|
||||
# Remove duplicates based on ppn
|
||||
return (book, response)
|
||||
|
||||
@classmethod
|
||||
def _worker(cls, items: list["BookData"], q: Queue) -> None:
|
||||
"""Worker for one chunk; pushes ('result', ...), ('progress', 1), and ('done', None)."""
|
||||
try:
|
||||
for book in items:
|
||||
try:
|
||||
result = cls._process_book(book)
|
||||
except Exception:
|
||||
result = None
|
||||
if result is not None:
|
||||
q.put(("result", result))
|
||||
q.put(("progress", 1))
|
||||
finally:
|
||||
q.put(("done", None))
|
||||
|
||||
# ---------- thread entry point ----------
|
||||
|
||||
def run(self):
|
||||
total = len(self.entries)
|
||||
self.total_entries_signal.emit(total)
|
||||
|
||||
# start timer for metrics
|
||||
t0 = monotonic()
|
||||
|
||||
if total == 0:
|
||||
log.debug("No entries to process.")
|
||||
# emit metrics (zero work)
|
||||
self.rateSignal.emit(0.0)
|
||||
self.etaSignal.emit(0)
|
||||
self.resultsSignal.emit([])
|
||||
return
|
||||
|
||||
# Up to 4 workers; ~20 items per worker
|
||||
num_workers = min(THREAD_COUNT, max(1, ceil(total / THREAD_MIN_ITEMS)))
|
||||
chunks = self._split_evenly(self.entries, num_workers)
|
||||
sizes = [len(ch) for ch in chunks]
|
||||
|
||||
q: Queue = Queue()
|
||||
processed = 0
|
||||
finished_workers = 0
|
||||
|
||||
with ThreadPoolExecutor(max_workers=len(chunks)) as ex:
|
||||
futures = [ex.submit(self._worker, ch, q) for ch in chunks]
|
||||
|
||||
log.info(
|
||||
f"Launched {len(futures)} worker thread(s) for {total} entries: {sizes} entries per thread."
|
||||
)
|
||||
for idx, sz in enumerate(sizes, 1):
|
||||
log.debug(f"Thread {idx}: {sz} entries")
|
||||
|
||||
# Aggregate progress/results
|
||||
while finished_workers < len(chunks):
|
||||
try:
|
||||
kind, payload = q.get(timeout=0.1)
|
||||
except Empty:
|
||||
continue
|
||||
|
||||
if kind == "progress":
|
||||
processed += int(payload)
|
||||
self.updateSignal.emit(processed, total)
|
||||
self.updateProgress.emit(processed, total)
|
||||
|
||||
# ---- NEW: compute & emit metrics ----
|
||||
elapsed = max(1e-9, monotonic() - t0)
|
||||
rate = processed / elapsed # items per second
|
||||
remaining = max(0, total - processed)
|
||||
eta_sec = int(round(remaining / rate)) if rate > 0 else -1
|
||||
|
||||
self.rateSignal.emit(rate)
|
||||
# clamp negative just in case
|
||||
self.etaSignal.emit(max(0, eta_sec) if eta_sec >= 0 else -1)
|
||||
# -------------------------------------
|
||||
|
||||
elif kind == "result":
|
||||
self.results.append(payload)
|
||||
elif kind == "done":
|
||||
finished_workers += 1
|
||||
|
||||
# Final metrics on completion
|
||||
elapsed_total = max(1e-9, monotonic() - t0)
|
||||
final_rate = total / elapsed_total
|
||||
self.rateSignal.emit(final_rate)
|
||||
self.etaSignal.emit(0)
|
||||
|
||||
self.resultsSignal.emit(self.results)
|
||||
30
src/core/__init__.py
Normal file
30
src/core/__init__.py
Normal file
@@ -0,0 +1,30 @@
|
||||
"""Core domain models and business constants."""
|
||||
|
||||
from .models import (
|
||||
Apparat,
|
||||
ApparatData,
|
||||
Book,
|
||||
BookData,
|
||||
ELSA,
|
||||
MailData,
|
||||
Prof,
|
||||
SemapDocument,
|
||||
Subjects,
|
||||
XMLMailSubmission,
|
||||
)
|
||||
from .constants import *
|
||||
from .semester import Semester
|
||||
|
||||
__all__ = [
|
||||
"Apparat",
|
||||
"ApparatData",
|
||||
"Book",
|
||||
"BookData",
|
||||
"ELSA",
|
||||
"MailData",
|
||||
"Prof",
|
||||
"SemapDocument",
|
||||
"Subjects",
|
||||
"XMLMailSubmission",
|
||||
"Semester",
|
||||
]
|
||||
213
src/core/constants.py
Normal file
213
src/core/constants.py
Normal file
@@ -0,0 +1,213 @@
|
||||
APP_NRS = [i for i in range(1, 181)]
|
||||
|
||||
PROF_TITLES = [
|
||||
"Dr. mult.",
|
||||
"Dr. paed.",
|
||||
"Dr. rer. pol.",
|
||||
"Dr. sc. techn.",
|
||||
"Drs.",
|
||||
"Dr. agr.",
|
||||
"Dr. habil.",
|
||||
"Dr. oec.",
|
||||
"Dr. med.",
|
||||
"Dr. e. h.",
|
||||
"Dr. oec. publ.",
|
||||
"Dr. -Ing.",
|
||||
"Dr. theol.",
|
||||
"Dr. med. vet.",
|
||||
"Dr. ing.",
|
||||
"Dr. rer. nat.",
|
||||
"Dr. des.",
|
||||
"Dr. sc. mus.",
|
||||
"Dr. h. c.",
|
||||
"Dr. pharm.",
|
||||
"Dr. med. dent.",
|
||||
"Dr. phil. nat.",
|
||||
"Dr. phil.",
|
||||
"Dr. iur.",
|
||||
"Dr.",
|
||||
"Kein Titel",
|
||||
]
|
||||
|
||||
SEMAP_MEDIA_ACCOUNTS = {
|
||||
1: "1008000055",
|
||||
2: "1008000188",
|
||||
3: "1008000211",
|
||||
4: "1008000344",
|
||||
5: "1008000477",
|
||||
6: "1008000500",
|
||||
7: "1008000633",
|
||||
8: "1008000766",
|
||||
9: "1008000899",
|
||||
10: "1008000922",
|
||||
11: "1008001044",
|
||||
12: "1008001177",
|
||||
13: "1008001200",
|
||||
14: "1008001333",
|
||||
15: "1008001466",
|
||||
16: "1008001599",
|
||||
17: "1008001622",
|
||||
18: "1008001755",
|
||||
19: "1008001888",
|
||||
20: "1008001911",
|
||||
21: "1008002033",
|
||||
22: "1008002166",
|
||||
23: "1008002299",
|
||||
24: "1008002322",
|
||||
25: "1008002455",
|
||||
26: "1008002588",
|
||||
27: "1008002611",
|
||||
28: "1008002744",
|
||||
29: "1008002877",
|
||||
30: "1008002900",
|
||||
31: "1008003022",
|
||||
32: "1008003155",
|
||||
33: "1008003288",
|
||||
34: "1008003311",
|
||||
35: "1008003444",
|
||||
36: "1008003577",
|
||||
37: "1008003600",
|
||||
38: "1008003733",
|
||||
39: "1008003866",
|
||||
40: "1008003999",
|
||||
41: "1008004011",
|
||||
42: "1008004144",
|
||||
43: "1008004277",
|
||||
44: "1008004300",
|
||||
45: "1008004433",
|
||||
46: "1008004566",
|
||||
47: "1008004699",
|
||||
48: "1008004722",
|
||||
49: "1008004855",
|
||||
50: "1008004988",
|
||||
51: "1008005000",
|
||||
52: "1008005133",
|
||||
53: "1008005266",
|
||||
54: "1008005399",
|
||||
55: "1008005422",
|
||||
56: "1008005555",
|
||||
57: "1008005688",
|
||||
58: "1008005711",
|
||||
59: "1008005844",
|
||||
60: "1008005977",
|
||||
61: "1008006099",
|
||||
62: "1008006122",
|
||||
63: "1008006255",
|
||||
64: "1008006388",
|
||||
65: "1008006411",
|
||||
66: "1008006544",
|
||||
67: "1008006677",
|
||||
68: "1008006700",
|
||||
69: "1008006833",
|
||||
70: "1008006966",
|
||||
71: "1008007088",
|
||||
72: "1008007111",
|
||||
73: "1008007244",
|
||||
74: "1008007377",
|
||||
75: "1008007400",
|
||||
76: "1008007533",
|
||||
77: "1008007666",
|
||||
78: "1008007799",
|
||||
79: "1008007822",
|
||||
80: "1008007955",
|
||||
81: "1008008077",
|
||||
82: "1008008100",
|
||||
83: "1008008233",
|
||||
84: "1008008366",
|
||||
85: "1008008499",
|
||||
86: "1008008522",
|
||||
87: "1008008655",
|
||||
88: "1008008788",
|
||||
89: "1008008811",
|
||||
90: "1008008944",
|
||||
91: "1008009066",
|
||||
92: "1008009199",
|
||||
93: "1008009222",
|
||||
94: "1008009355",
|
||||
95: "1008009488",
|
||||
96: "1008009511",
|
||||
97: "1008009644",
|
||||
98: "1008009777",
|
||||
99: "1008009800",
|
||||
100: "1008009933",
|
||||
101: "1008010022",
|
||||
102: "1008010155",
|
||||
103: "1008010288",
|
||||
104: "1008010311",
|
||||
105: "1008010444",
|
||||
106: "1008010577",
|
||||
107: "1008010600",
|
||||
108: "1008010733",
|
||||
109: "1008010866",
|
||||
110: "1008010999",
|
||||
111: "1008011011",
|
||||
112: "1008011144",
|
||||
113: "1008011277",
|
||||
114: "1008011300",
|
||||
115: "1008011433",
|
||||
116: "1008011566",
|
||||
117: "1008011699",
|
||||
118: "1008011722",
|
||||
119: "1008011855",
|
||||
120: "1008011988",
|
||||
121: "1008012000",
|
||||
122: "1008012133",
|
||||
123: "1008012266",
|
||||
124: "1008012399",
|
||||
125: "1008012422",
|
||||
126: "1008012555",
|
||||
127: "1008012688",
|
||||
128: "1008012711",
|
||||
129: "1008012844",
|
||||
130: "1008012977",
|
||||
131: "1008013099",
|
||||
132: "1008013122",
|
||||
133: "1008013255",
|
||||
134: "1008013388",
|
||||
135: "1008013411",
|
||||
136: "1008013544",
|
||||
137: "1008013677",
|
||||
138: "1008013700",
|
||||
139: "1008013833",
|
||||
140: "1008013966",
|
||||
141: "1008014088",
|
||||
142: "1008014111",
|
||||
143: "1008014244",
|
||||
144: "1008014377",
|
||||
145: "1008014400",
|
||||
146: "1008014533",
|
||||
147: "1008014666",
|
||||
148: "1008014799",
|
||||
149: "1008014822",
|
||||
150: "1008014955",
|
||||
151: "1008015077",
|
||||
152: "1008015100",
|
||||
153: "1008015233",
|
||||
154: "1008015366",
|
||||
155: "1008015499",
|
||||
156: "1008015522",
|
||||
157: "1008015655",
|
||||
158: "1008015788",
|
||||
159: "1008015811",
|
||||
160: "1008015944",
|
||||
161: "1008016066",
|
||||
162: "1008016199",
|
||||
163: "1008016222",
|
||||
164: "1008016355",
|
||||
165: "1008016488",
|
||||
166: "1008016511",
|
||||
167: "1008016644",
|
||||
168: "1008016777",
|
||||
169: "1008016800",
|
||||
170: "1008016933",
|
||||
171: "1008017055",
|
||||
172: "1008017188",
|
||||
173: "1008017211",
|
||||
174: "1008017344",
|
||||
175: "1008017477",
|
||||
176: "1008017500",
|
||||
177: "1008017633",
|
||||
178: "1008017766",
|
||||
179: "1008017899",
|
||||
180: "1008017922",
|
||||
}
|
||||
410
src/core/models.py
Normal file
410
src/core/models.py
Normal file
@@ -0,0 +1,410 @@
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import regex
|
||||
|
||||
from src.logic.openai import name_tester, run_shortener, semester_converter
|
||||
from src.logic.semester import Semester
|
||||
|
||||
|
||||
@dataclass
|
||||
class Prof:
|
||||
id: Optional[int] = None
|
||||
_title: Optional[str] = None
|
||||
firstname: Optional[str] = None
|
||||
lastname: Optional[str] = None
|
||||
fullname: Optional[str] = None
|
||||
mail: Optional[str] = None
|
||||
telnr: Optional[str] = None
|
||||
|
||||
# add function that sets the data based on a dict
|
||||
def from_dict(self, data: dict[str, Union[str, int]]):
|
||||
for key, value in data.items():
|
||||
if hasattr(self, key):
|
||||
setattr(self, key, value)
|
||||
return self
|
||||
|
||||
@property
|
||||
def title(self) -> str:
|
||||
if self._title is None or self._title == "None":
|
||||
return ""
|
||||
return self._title
|
||||
|
||||
@title.setter
|
||||
def title(self, value: str):
|
||||
self._title = value
|
||||
|
||||
# add function that sets the data from a tuple
|
||||
def from_tuple(self, data: tuple[Union[str, int], ...]) -> "Prof":
|
||||
setattr(self, "id", data[0])
|
||||
setattr(self, "_title", data[1])
|
||||
setattr(self, "firstname", data[2])
|
||||
setattr(self, "lastname", data[3])
|
||||
setattr(self, "fullname", data[4])
|
||||
setattr(self, "mail", data[5])
|
||||
setattr(self, "telnr", data[6])
|
||||
return self
|
||||
|
||||
def name(self, comma: bool = False) -> Optional[str]:
|
||||
if self.firstname is None and self.lastname is None:
|
||||
if "," in self.fullname:
|
||||
self.firstname = self.fullname.split(",")[1].strip()
|
||||
self.lastname = self.fullname.split(",")[0].strip()
|
||||
else:
|
||||
return self.fullname
|
||||
|
||||
if comma:
|
||||
return f"{self.lastname}, {self.firstname}"
|
||||
return f"{self.lastname} {self.firstname}"
|
||||
|
||||
|
||||
@dataclass
|
||||
class BookData:
|
||||
ppn: str | None = None
|
||||
title: str | None = None
|
||||
signature: str | None = None
|
||||
edition: str | None = None
|
||||
link: str | None = None
|
||||
isbn: Union[str, list[str], None] = field(default_factory=list)
|
||||
author: str | None = None
|
||||
language: Union[str, list[str], None] = field(default_factory=list)
|
||||
publisher: str | None = None
|
||||
place: str | None = None
|
||||
year: int | None = None
|
||||
pages: str | None = None
|
||||
library_location: str | None = None
|
||||
in_apparat: bool | None = False
|
||||
adis_idn: str | None = None
|
||||
old_book: Any | None = None
|
||||
media_type: str | None = None #
|
||||
in_library: bool | None = None # whether the book is in the library or not
|
||||
medianr: int | None = None # Media number in the library system
|
||||
|
||||
def __post_init__(self):
|
||||
self.library_location = (
|
||||
str(self.library_location) if self.library_location else None
|
||||
)
|
||||
if isinstance(self.language, list) and self.language:
|
||||
self.language = [lang.strip() for lang in self.language if lang.strip()]
|
||||
self.language = ",".join(self.language)
|
||||
self.year = regex.sub(r"[^\d]", "", str(self.year)) if self.year else None
|
||||
self.in_library = True if self.signature else False
|
||||
|
||||
def from_dict(self, data: dict) -> "BookData":
|
||||
for key, value in data.items():
|
||||
setattr(self, key, value)
|
||||
return self
|
||||
|
||||
def merge(self, other: "BookData") -> "BookData":
|
||||
for key, value in other.__dict__.items():
|
||||
# merge lists, if the attribute is a list, extend it
|
||||
if isinstance(value, list):
|
||||
current_value = getattr(self, key)
|
||||
if current_value is None:
|
||||
current_value = []
|
||||
elif not isinstance(current_value, list):
|
||||
current_value = [current_value]
|
||||
# extend the list with the new values, but only if they are not already in the list
|
||||
for v in value:
|
||||
if v not in current_value:
|
||||
current_value.append(v)
|
||||
setattr(self, key, current_value)
|
||||
if value is not None and (
|
||||
getattr(self, key) is None or getattr(self, key) == ""
|
||||
):
|
||||
setattr(self, key, value)
|
||||
# in language, drop all entries that are longer than 3 characters
|
||||
if isinstance(self.language, list):
|
||||
self.language = [lang for lang in self.language if len(lang) <= 4]
|
||||
return self
|
||||
|
||||
@property
|
||||
def to_dict(self) -> str:
|
||||
"""Convert the dataclass to a dictionary."""
|
||||
data_dict = {
|
||||
key: value for key, value in self.__dict__.items() if value is not None
|
||||
}
|
||||
# remove old_book from data_dict
|
||||
if "old_book" in data_dict:
|
||||
del data_dict["old_book"]
|
||||
return json.dumps(data_dict, ensure_ascii=False)
|
||||
|
||||
def from_dataclass(self, dataclass: Optional[Any]) -> None:
|
||||
if dataclass is None:
|
||||
return
|
||||
for key, value in dataclass.__dict__.items():
|
||||
setattr(self, key, value)
|
||||
|
||||
def get_book_type(self) -> str:
|
||||
if "Online" in self.pages:
|
||||
return "eBook"
|
||||
else:
|
||||
return "Druckausgabe"
|
||||
|
||||
def from_string(self, data: str) -> "BookData":
|
||||
ndata = json.loads(data)
|
||||
|
||||
return BookData(**ndata)
|
||||
|
||||
def from_LehmannsSearchResult(self, result: Any) -> "BookData":
|
||||
self.title = result.title
|
||||
self.author = "; ".join(result.authors) if result.authors else None
|
||||
self.edition = str(result.edition) if result.edition else None
|
||||
self.link = result.url
|
||||
self.isbn = (
|
||||
result.isbn13
|
||||
if isinstance(result.isbn13, list)
|
||||
else [result.isbn13]
|
||||
if result.isbn13
|
||||
else []
|
||||
)
|
||||
self.pages = str(result.pages) if result.pages else None
|
||||
self.publisher = result.publisher
|
||||
self.year = str(result.year) if result.year else None
|
||||
# self.pages = str(result.pages) if result.pages else None
|
||||
return self
|
||||
|
||||
@property
|
||||
def edition_number(self) -> Optional[int]:
|
||||
if self.edition is None:
|
||||
return 0
|
||||
match = regex.search(r"(\d+)", self.edition)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
return 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class MailData:
|
||||
subject: Optional[str] = None
|
||||
body: Optional[str] = None
|
||||
mailto: Optional[str] = None
|
||||
prof: Optional[str] = None
|
||||
|
||||
|
||||
class Subjects(Enum):
|
||||
BIOLOGY = (1, "Biologie")
|
||||
CHEMISTRY = (2, "Chemie")
|
||||
GERMAN = (3, "Deutsch")
|
||||
ENGLISH = (4, "Englisch")
|
||||
PEDAGOGY = (5, "Erziehungswissenschaft")
|
||||
FRENCH = (6, "Französisch")
|
||||
GEOGRAPHY = (7, "Geographie")
|
||||
HISTORY = (8, "Geschichte")
|
||||
HEALTH_EDUCATION = (9, "Gesundheitspädagogik")
|
||||
HTW = (10, "Haushalt / Textil")
|
||||
ART = (11, "Kunst")
|
||||
MATH_IT = (12, "Mathematik / Informatik")
|
||||
MEDIAPEDAGOGY = (13, "Medien in der Bildung")
|
||||
MUSIC = (14, "Musik")
|
||||
PHILOSOPHY = (15, "Philosophie")
|
||||
PHYSICS = (16, "Physik")
|
||||
POLITICS = (17, "Politikwissenschaft")
|
||||
PRORECTORATE = (18, "Prorektorat Lehre und Studium")
|
||||
PSYCHOLOGY = (19, "Psychologie")
|
||||
SOCIOLOGY = (20, "Soziologie")
|
||||
SPORT = (21, "Sport")
|
||||
TECHNIC = (22, "Technik")
|
||||
THEOLOGY = (23, "Theologie")
|
||||
ECONOMICS = (24, "Wirtschaftslehre")
|
||||
|
||||
@property
|
||||
def id(self) -> int:
|
||||
return self.value[0]
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return self.value[1]
|
||||
|
||||
@classmethod
|
||||
def get_index(cls, name: str) -> Optional[int]:
|
||||
for i in cls:
|
||||
if i.name == name:
|
||||
return i.id - 1
|
||||
return None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Apparat:
|
||||
id: int | None = None
|
||||
name: str | None = None
|
||||
prof_id: int | None = None
|
||||
subject: str | None = None
|
||||
appnr: int | None = None
|
||||
created_semester: str | None = None
|
||||
extended_at: str | None = None
|
||||
eternal: bool = False
|
||||
extend_until: str | None = None
|
||||
deleted: int | None = None
|
||||
deleted_date: str | None = None
|
||||
apparat_id_adis: str | None = None
|
||||
prof_id_adis: str | None = None
|
||||
konto: int | None = None
|
||||
|
||||
def from_tuple(self, data: tuple[Any, ...]) -> "Apparat":
|
||||
self.id = data[0]
|
||||
self.name = data[1]
|
||||
self.prof_id = data[2]
|
||||
self.subject = data[3]
|
||||
self.appnr = data[4]
|
||||
self.created_semester = data[5]
|
||||
self.extended_at = data[6]
|
||||
self.eternal = data[7]
|
||||
self.extend_until = data[8]
|
||||
self.deleted = data[9]
|
||||
self.deleted_date = data[10]
|
||||
self.apparat_id_adis = data[11]
|
||||
self.prof_id_adis = data[12]
|
||||
self.konto = data[13]
|
||||
return self
|
||||
|
||||
@property
|
||||
def get_semester(self) -> Optional[str]:
|
||||
if self.extend_until is not None:
|
||||
return self.extend_until
|
||||
else:
|
||||
return self.created_semester
|
||||
|
||||
|
||||
@dataclass
|
||||
class ELSA:
|
||||
id: int | None = None
|
||||
date: str | None = None
|
||||
semester: str | None = None
|
||||
prof_id: int | None = None
|
||||
|
||||
def from_tuple(self, data: tuple[Any, ...]) -> "ELSA":
|
||||
self.id = data[0]
|
||||
self.date = data[1]
|
||||
self.semester = data[2]
|
||||
self.prof_id = data[3]
|
||||
return self
|
||||
|
||||
|
||||
@dataclass
|
||||
class ApparatData:
|
||||
prof: Prof = field(default_factory=Prof)
|
||||
apparat: Apparat = field(default_factory=Apparat)
|
||||
|
||||
|
||||
@dataclass
|
||||
class XMLMailSubmission:
|
||||
name: Optional[str] = None
|
||||
lastname: Optional[str] = None
|
||||
title: Optional[str] = None
|
||||
telno: Optional[int] = None
|
||||
email: Optional[str] = None
|
||||
app_name: Optional[str] = None
|
||||
subject: Optional[str] = None
|
||||
semester: Optional[Semester] = None
|
||||
books: Optional[list[BookData]] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Book:
|
||||
author: str = None
|
||||
year: str = None
|
||||
edition: str = None
|
||||
title: str = None
|
||||
location: str = None
|
||||
publisher: str = None
|
||||
signature: str = None
|
||||
internal_notes: str = None
|
||||
|
||||
@property
|
||||
def has_signature(self) -> bool:
|
||||
return self.signature is not None and self.signature != ""
|
||||
|
||||
@property
|
||||
def is_empty(self) -> bool:
|
||||
return all(
|
||||
[
|
||||
self.author == "",
|
||||
self.year == "",
|
||||
self.edition == "",
|
||||
self.title == "",
|
||||
self.location == "",
|
||||
self.publisher == "",
|
||||
self.signature == "",
|
||||
self.internal_notes == "",
|
||||
]
|
||||
)
|
||||
|
||||
def from_dict(self, data: dict[str, Any]):
|
||||
for key, value in data.items():
|
||||
value = value.strip()
|
||||
if value == "\u2002\u2002\u2002\u2002\u2002":
|
||||
value = ""
|
||||
|
||||
if key == "Autorenname(n):Nachname, Vorname":
|
||||
self.author = value
|
||||
elif key == "Jahr/Auflage":
|
||||
self.year = value.split("/")[0] if "/" in value else value
|
||||
self.edition = value.split("/")[1] if "/" in value else ""
|
||||
elif key == "Titel":
|
||||
self.title = value
|
||||
elif key == "Ort und Verlag":
|
||||
self.location = value.split(",")[0] if "," in value else value
|
||||
self.publisher = value.split(",")[1] if "," in value else ""
|
||||
elif key == "Standnummer":
|
||||
self.signature = value.strip()
|
||||
elif key == "Interne Vermerke":
|
||||
self.internal_notes = value
|
||||
|
||||
|
||||
@dataclass
|
||||
class SemapDocument:
|
||||
subject: str = None
|
||||
phoneNumber: int = None
|
||||
mail: str = None
|
||||
title: str = None
|
||||
title_suggestions: list[str] = None
|
||||
semester: Union[str, Semester] = None
|
||||
books: list[Book] = None
|
||||
eternal: bool = False
|
||||
personName: str = None
|
||||
personTitle: str = None
|
||||
title_length = 0
|
||||
title_max_length = 0
|
||||
|
||||
def __post_init__(self):
|
||||
self.title_suggestions = []
|
||||
|
||||
@property
|
||||
def nameSetter(self):
|
||||
data = name_tester(self.personTitle)
|
||||
name = f"{data['last_name']}, {data['first_name']}"
|
||||
if data["title"] is not None:
|
||||
title = data["title"]
|
||||
self.personTitle = title
|
||||
self.personName = name
|
||||
self.title_length = len(self.title) + 3 + len(self.personName.split(",")[0])
|
||||
if self.title_length > 40:
|
||||
name_len = len(self.personName.split(",")[0])
|
||||
self.title_max_length = 38 - name_len
|
||||
suggestions = run_shortener(self.title, self.title_max_length)
|
||||
for suggestion in suggestions:
|
||||
self.title_suggestions.append(suggestion["shortened_string"])
|
||||
else:
|
||||
self.title_suggestions = []
|
||||
pass
|
||||
|
||||
@property
|
||||
def renameSemester(self) -> None:
|
||||
if self.semester:
|
||||
if ", Dauer" in self.semester:
|
||||
self.semester = self.semester.split(",")[0]
|
||||
self.eternal = True
|
||||
self.semester = Semester().from_string(self.semester)
|
||||
else:
|
||||
self.semester = Semester().from_string(
|
||||
semester_converter(self.semester)
|
||||
)
|
||||
|
||||
@property
|
||||
def signatures(self) -> list[str]:
|
||||
if self.books is not None:
|
||||
return [book.signature for book in self.books if book.has_signature]
|
||||
return []
|
||||
248
src/core/semester.py
Normal file
248
src/core/semester.py
Normal file
@@ -0,0 +1,248 @@
|
||||
"""Semester helper class
|
||||
|
||||
A small utility around the *German* academic calendar that distinguishes
|
||||
between *Wintersemester* (WiSe) and *Sommersemester* (SoSe).
|
||||
|
||||
Key points
|
||||
----------
|
||||
* A **`Semester`** is identified by a *term* ("SoSe" or "WiSe") and the last two
|
||||
digits of the calendar year in which the term *starts*.
|
||||
* Formatting **never** pads the year with a leading zero – so ``6`` stays ``6``.
|
||||
* ``offset(n)`` and the static ``generate_missing`` reliably walk the timeline
|
||||
one semester at a time with correct year transitions:
|
||||
|
||||
SoSe 6 → **WiSe 6/7** → SoSe 7 → WiSe 7/8 → …
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from src.shared.logging import log
|
||||
|
||||
|
||||
class Semester:
|
||||
"""Represents a German university semester (WiSe or SoSe)."""
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Class‑level defaults – will be *copied* to each instance and then
|
||||
# potentially overwritten in ``__init__``.
|
||||
# ------------------------------------------------------------------
|
||||
_year: int | None = int(str(datetime.datetime.now().year)[2:]) # 24 → 24
|
||||
_semester: str | None = None # "WiSe" or "SoSe" – set later
|
||||
_month: int | None = datetime.datetime.now().month
|
||||
value: str | None = None # Human‑readable label, e.g. "WiSe 23/24"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Construction helpers
|
||||
# ------------------------------------------------------------------
|
||||
def __init__(
|
||||
self,
|
||||
year: int | None = None,
|
||||
semester: str | None = None,
|
||||
month: int | None = None,
|
||||
) -> None:
|
||||
if year is not None:
|
||||
self._year = int(year)
|
||||
if semester is not None:
|
||||
if semester not in ("WiSe", "SoSe"):
|
||||
raise ValueError("semester must be 'WiSe' or 'SoSe'")
|
||||
self._semester = semester
|
||||
if month is not None:
|
||||
self._month = month
|
||||
|
||||
self.__post_init__()
|
||||
|
||||
def __post_init__(self) -> None: # noqa: D401 – keep original name
|
||||
if self._year is None:
|
||||
self._year = int(str(datetime.datetime.now().year)[2:])
|
||||
if self._month is None:
|
||||
self._month = datetime.datetime.now().month
|
||||
if self._semester is None:
|
||||
self._generate_semester_from_month()
|
||||
self._compute_value()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Dunder helpers
|
||||
# ------------------------------------------------------------------
|
||||
def __str__(self) -> str: # noqa: D401 – keep original name
|
||||
return self.value or "<invalid Semester>"
|
||||
|
||||
def __repr__(self) -> str: # Helpful for debugging lists
|
||||
return f"Semester({self._year!r}, {self._semester!r})"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ------------------------------------------------------------------
|
||||
def _generate_semester_from_month(self) -> None:
|
||||
"""Infer *WiSe* / *SoSe* from the month attribute."""
|
||||
self._semester = "WiSe" if (self._month <= 3 or self._month > 9) else "SoSe"
|
||||
|
||||
def _compute_value(self) -> None:
|
||||
"""Human‑readable semester label – e.g. ``WiSe 23/24`` or ``SoSe 24``."""
|
||||
year = self._year
|
||||
if self._semester == "WiSe":
|
||||
next_year = (year + 1) % 100 # wrap 99 → 0
|
||||
self.value = f"WiSe {year}/{next_year}"
|
||||
else: # SoSe
|
||||
self.value = f"SoSe {year}"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API
|
||||
# ------------------------------------------------------------------
|
||||
def offset(self, value: int) -> "Semester":
|
||||
"""Return a new :class:`Semester` *value* steps away.
|
||||
|
||||
The algorithm maps every semester to a monotonically increasing
|
||||
*linear index* so that simple addition suffices:
|
||||
|
||||
``index = year * 2 + (0 if SoSe else 1)``.
|
||||
"""
|
||||
if not isinstance(value, int):
|
||||
raise TypeError("value must be an int (number of semesters to jump)")
|
||||
if value == 0:
|
||||
return Semester(self._year, self._semester)
|
||||
|
||||
current_idx = self._year * 2 + (0 if self._semester == "SoSe" else 1)
|
||||
target_idx = current_idx + value
|
||||
if target_idx < 0:
|
||||
raise ValueError("offset would result in a negative year – not supported")
|
||||
|
||||
new_year, semester_bit = divmod(target_idx, 2)
|
||||
new_semester = "SoSe" if semester_bit == 0 else "WiSe"
|
||||
return Semester(new_year, new_semester)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Comparison helpers
|
||||
# ------------------------------------------------------------------
|
||||
def isPastSemester(self, current: "Semester") -> bool:
|
||||
log.debug(f"Comparing {self} < {current}")
|
||||
if self.year < current.year:
|
||||
return True
|
||||
if self.year == current.year:
|
||||
return (
|
||||
self.semester == "WiSe" and current.semester == "SoSe"
|
||||
) # WiSe before next SoSe
|
||||
return False
|
||||
|
||||
def isFutureSemester(self, current: "Semester") -> bool:
|
||||
if self.year > current.year:
|
||||
return True
|
||||
if self.year == current.year:
|
||||
return (
|
||||
self.semester == "SoSe" and current.semester == "WiSe"
|
||||
) # SoSe after WiSe of same year
|
||||
return False
|
||||
|
||||
def isMatch(self, other: "Semester") -> bool:
|
||||
return self.year == other.year and self.semester == other.semester
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Convenience properties
|
||||
# ------------------------------------------------------------------
|
||||
@property
|
||||
def next(self) -> "Semester":
|
||||
return self.offset(1)
|
||||
|
||||
@property
|
||||
def previous(self) -> "Semester":
|
||||
return self.offset(-1)
|
||||
|
||||
@property
|
||||
def year(self) -> int:
|
||||
return self._year
|
||||
|
||||
@property
|
||||
def semester(self) -> str:
|
||||
return self._semester
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Static helpers
|
||||
# ------------------------------------------------------------------
|
||||
@staticmethod
|
||||
def generate_missing(start: "Semester", end: "Semester") -> list[str]:
|
||||
"""Return all consecutive semesters from *start* to *end* (inclusive)."""
|
||||
if not isinstance(start, Semester) or not isinstance(end, Semester):
|
||||
raise TypeError("start and end must be Semester instances")
|
||||
if start.isFutureSemester(end) and not start.isMatch(end):
|
||||
raise ValueError("'start' must not be after 'end'")
|
||||
|
||||
chain: list[Semester] = [start.value]
|
||||
current = start
|
||||
while not current.isMatch(end):
|
||||
current = current.next
|
||||
chain.append(current.value)
|
||||
if len(chain) > 1000: # sanity guard
|
||||
raise RuntimeError("generate_missing exceeded sane iteration limit")
|
||||
return chain
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Parsing helper
|
||||
# ------------------------------------------------------------------
|
||||
@classmethod
|
||||
def from_string(cls, s: str) -> "Semester":
|
||||
"""Parse a human‑readable semester label and return a :class:`Semester`.
|
||||
|
||||
Accepted formats (case‑insensitive)::
|
||||
|
||||
"SoSe <YY>" → SoSe of year YY
|
||||
"WiSe <YY>/<YY+1>" → Winter term starting in YY
|
||||
"WiSe <YY>" → Shorthand for the above (next year implied)
|
||||
|
||||
``YY`` may contain a leading zero ("06" → 6).
|
||||
"""
|
||||
if not isinstance(s, str):
|
||||
raise TypeError("s must be a string")
|
||||
|
||||
pattern = r"\s*(WiSe|SoSe)\s+(\d{1,2})(?:\s*/\s*(\d{1,2}))?\s*"
|
||||
m = re.fullmatch(pattern, s, flags=re.IGNORECASE)
|
||||
if not m:
|
||||
raise ValueError(
|
||||
"invalid semester string format – expected 'SoSe YY' or 'WiSe YY/YY' (spacing flexible)"
|
||||
)
|
||||
|
||||
term_raw, y1_str, y2_str = m.groups()
|
||||
term = term_raw.capitalize() # normalize case → "WiSe" or "SoSe"
|
||||
year = int(y1_str.lstrip("0") or "0") # "06" → 6, "0" stays 0
|
||||
|
||||
if term == "SoSe":
|
||||
if y2_str is not None:
|
||||
raise ValueError(
|
||||
"SoSe string should not contain '/' followed by a second year"
|
||||
)
|
||||
return cls(year, "SoSe")
|
||||
|
||||
# term == "WiSe"
|
||||
if y2_str is not None:
|
||||
next_year = int(y2_str.lstrip("0") or "0")
|
||||
expected_next = (year + 1) % 100
|
||||
if next_year != expected_next:
|
||||
raise ValueError("WiSe second year must equal first year + 1 (mod 100)")
|
||||
# Accept both explicit "WiSe 6/7" and shorthand "WiSe 6"
|
||||
return cls(year, "WiSe")
|
||||
|
||||
|
||||
# ------------------------- quick self‑test -------------------------
|
||||
if __name__ == "__main__":
|
||||
# Chain generation demo ------------------------------------------------
|
||||
s_start = Semester(6, "SoSe") # SoSe 6
|
||||
s_end = Semester(25, "WiSe") # WiSe 25/26
|
||||
chain = Semester.generate_missing(s_start, s_end)
|
||||
# print("generate_missing:", [str(s) for s in chain])
|
||||
|
||||
# Parsing demo ---------------------------------------------------------
|
||||
examples = [
|
||||
"SoSe 6",
|
||||
"WiSe 6/7",
|
||||
"WiSe 6",
|
||||
"SoSe 23",
|
||||
"WiSe 23/24",
|
||||
"WiSe 24",
|
||||
"WiSe 99/00",
|
||||
"SoSe 00",
|
||||
"WiSe 100/101", # test large year
|
||||
]
|
||||
for ex in examples:
|
||||
parsed = Semester.from_string(ex)
|
||||
print(f"'{ex}' → {parsed} ({parsed.year=}, {parsed.semester=})")
|
||||
5
src/database/__init__.py
Normal file
5
src/database/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Database layer for data persistence."""
|
||||
|
||||
from .connection import Database
|
||||
|
||||
__all__ = ["Database"]
|
||||
2008
src/database/connection.py
Normal file
2008
src/database/connection.py
Normal file
File diff suppressed because it is too large
Load Diff
132
src/database/migrations/V001__create_base_tables.sql
Normal file
132
src/database/migrations/V001__create_base_tables.sql
Normal file
@@ -0,0 +1,132 @@
|
||||
BEGIN TRANSACTION;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS semesterapparat (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
name TEXT,
|
||||
prof_id INTEGER,
|
||||
fach TEXT,
|
||||
appnr INTEGER,
|
||||
erstellsemester TEXT,
|
||||
verlängert_am TEXT,
|
||||
dauer BOOLEAN,
|
||||
verlängerung_bis TEXT,
|
||||
deletion_status INTEGER,
|
||||
deleted_date TEXT,
|
||||
apparat_id_adis INTEGER,
|
||||
prof_id_adis INTEGER,
|
||||
konto INTEGER,
|
||||
FOREIGN KEY (prof_id) REFERENCES prof (id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS media (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
bookdata TEXT,
|
||||
app_id INTEGER,
|
||||
prof_id INTEGER,
|
||||
deleted INTEGER DEFAULT (0),
|
||||
available BOOLEAN,
|
||||
reservation BOOLEAN,
|
||||
FOREIGN KEY (prof_id) REFERENCES prof (id),
|
||||
FOREIGN KEY (app_id) REFERENCES semesterapparat (id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS files (
|
||||
id INTEGER PRIMARY KEY,
|
||||
filename TEXT,
|
||||
fileblob BLOB,
|
||||
app_id INTEGER,
|
||||
filetyp TEXT,
|
||||
prof_id INTEGER REFERENCES prof (id),
|
||||
FOREIGN KEY (app_id) REFERENCES semesterapparat (id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS messages (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
created_at date NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
message TEXT NOT NULL,
|
||||
remind_at date NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
user_id INTEGER NOT NULL,
|
||||
appnr INTEGER,
|
||||
FOREIGN KEY (user_id) REFERENCES user (id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS prof (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
titel TEXT,
|
||||
fname TEXT,
|
||||
lname TEXT,
|
||||
fullname TEXT NOT NULL UNIQUE,
|
||||
mail TEXT,
|
||||
telnr TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS user (
|
||||
id integer NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
created_at datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
username TEXT NOT NULL UNIQUE,
|
||||
password TEXT NOT NULL,
|
||||
salt TEXT NOT NULL,
|
||||
role TEXT NOT NULL,
|
||||
email TEXT UNIQUE,
|
||||
name TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS subjects (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
name TEXT NOT NULL UNIQUE
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS elsa (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
date TEXT NOT NULL,
|
||||
semester TEXT NOT NULL,
|
||||
prof_id INTEGER NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS elsa_files (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
filename TEXT NOT NULL,
|
||||
fileblob BLOB NOT NULL,
|
||||
elsa_id INTEGER NOT NULL,
|
||||
filetyp TEXT NOT NULL,
|
||||
FOREIGN KEY (elsa_id) REFERENCES elsa (id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS elsa_media (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
work_author TEXT,
|
||||
section_author TEXT,
|
||||
year TEXT,
|
||||
edition TEXT,
|
||||
work_title TEXT,
|
||||
chapter_title TEXT,
|
||||
location TEXT,
|
||||
publisher TEXT,
|
||||
signature TEXT,
|
||||
issue TEXT,
|
||||
pages TEXT,
|
||||
isbn TEXT,
|
||||
type TEXT,
|
||||
elsa_id INTEGER NOT NULL,
|
||||
FOREIGN KEY (elsa_id) REFERENCES elsa (id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS neweditions (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
new_bookdata TEXT,
|
||||
old_edition_id INTEGER,
|
||||
for_apparat INTEGER,
|
||||
ordered BOOLEAN DEFAULT (0),
|
||||
FOREIGN KEY (old_edition_id) REFERENCES media (id),
|
||||
FOREIGN KEY (for_apparat) REFERENCES semesterapparat (id)
|
||||
);
|
||||
|
||||
-- Helpful indices to speed up frequent lookups and joins
|
||||
CREATE INDEX IF NOT EXISTS idx_media_app_prof ON media(app_id, prof_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_media_deleted ON media(deleted);
|
||||
CREATE INDEX IF NOT EXISTS idx_media_available ON media(available);
|
||||
CREATE INDEX IF NOT EXISTS idx_messages_remind_at ON messages(remind_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_semesterapparat_prof ON semesterapparat(prof_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_semesterapparat_appnr ON semesterapparat(appnr);
|
||||
|
||||
COMMIT;
|
||||
10
src/database/migrations/V002__create_table_webadis_login.sql
Normal file
10
src/database/migrations/V002__create_table_webadis_login.sql
Normal file
@@ -0,0 +1,10 @@
|
||||
BEGIN TRANSACTION;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS webadis_login (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
username TEXT NOT NULL,
|
||||
password TEXT NOT NULL
|
||||
);
|
||||
|
||||
COMMIT;
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
BEGIN TRANSACTION;
|
||||
|
||||
ALTER TABLE webadis_login
|
||||
ADD COLUMN effective_range TEXT;
|
||||
|
||||
COMMIT;
|
||||
112
src/database/schemas.py
Normal file
112
src/database/schemas.py
Normal file
@@ -0,0 +1,112 @@
|
||||
CREATE_TABLE_APPARAT = """CREATE TABLE semesterapparat (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
name TEXT,
|
||||
prof_id INTEGER,
|
||||
fach TEXT,
|
||||
appnr INTEGER,
|
||||
erstellsemester TEXT,
|
||||
verlängert_am TEXT,
|
||||
dauer BOOLEAN,
|
||||
verlängerung_bis TEXT,
|
||||
deletion_status INTEGER,
|
||||
deleted_date TEXT,
|
||||
apparat_id_adis INTEGER,
|
||||
prof_id_adis INTEGER,
|
||||
konto INTEGER,
|
||||
FOREIGN KEY (prof_id) REFERENCES prof (id)
|
||||
)"""
|
||||
CREATE_TABLE_MEDIA = """CREATE TABLE media (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
bookdata TEXT,
|
||||
app_id INTEGER,
|
||||
prof_id INTEGER,
|
||||
deleted INTEGER DEFAULT (0),
|
||||
available BOOLEAN,
|
||||
reservation BOOLEAN,
|
||||
FOREIGN KEY (prof_id) REFERENCES prof (id),
|
||||
FOREIGN KEY (app_id) REFERENCES semesterapparat (id)
|
||||
)"""
|
||||
|
||||
CREATE_TABLE_FILES = """CREATE TABLE files (
|
||||
id INTEGER PRIMARY KEY,
|
||||
filename TEXT,
|
||||
fileblob BLOB,
|
||||
app_id INTEGER,
|
||||
filetyp TEXT,
|
||||
prof_id INTEGER REFERENCES prof (id),
|
||||
FOREIGN KEY (app_id) REFERENCES semesterapparat (id)
|
||||
)"""
|
||||
CREATE_TABLE_MESSAGES = """CREATE TABLE messages (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
created_at date NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
message TEXT NOT NULL,
|
||||
remind_at date NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
user_id INTEGER NOT NULL,
|
||||
appnr INTEGER,
|
||||
FOREIGN KEY (user_id) REFERENCES user (id)
|
||||
)"""
|
||||
CREATE_TABLE_PROF = """CREATE TABLE prof (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
titel TEXT,
|
||||
fname TEXT,
|
||||
lname TEXT,
|
||||
fullname TEXT NOT NULL UNIQUE,
|
||||
mail TEXT,
|
||||
telnr TEXT
|
||||
)"""
|
||||
CREATE_TABLE_USER = """CREATE TABLE user (
|
||||
id integer NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
created_at datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
username TEXT NOT NULL UNIQUE,
|
||||
password TEXT NOT NULL,
|
||||
salt TEXT NOT NULL,
|
||||
role TEXT NOT NULL,
|
||||
email TEXT UNIQUE,
|
||||
name TEXT
|
||||
)"""
|
||||
CREATE_TABLE_SUBJECTS = """CREATE TABLE subjects (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
name TEXT NOT NULL UNIQUE
|
||||
)"""
|
||||
|
||||
CREATE_ELSA_TABLE = """CREATE TABLE elsa (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
date TEXT NOT NULL,
|
||||
semester TEXT NOT NULL,
|
||||
prof_id INTEGER NOT NULL
|
||||
)"""
|
||||
CREATE_ELSA_FILES_TABLE = """CREATE TABLE elsa_files (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
filename TEXT NOT NULL,
|
||||
fileblob BLOB NOT NULL,
|
||||
elsa_id INTEGER NOT NULL,
|
||||
filetyp TEXT NOT NULL,
|
||||
FOREIGN KEY (elsa_id) REFERENCES elsa (id)
|
||||
)"""
|
||||
CREATE_ELSA_MEDIA_TABLE = """CREATE TABLE elsa_media (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
work_author TEXT,
|
||||
section_author TEXT,
|
||||
year TEXT,
|
||||
edition TEXT,
|
||||
work_title TEXT,
|
||||
chapter_title TEXT,
|
||||
location TEXT,
|
||||
publisher TEXT,
|
||||
signature TEXT,
|
||||
issue TEXT,
|
||||
pages TEXT,
|
||||
isbn TEXT,
|
||||
type TEXT,
|
||||
elsa_id INTEGER NOT NULL,
|
||||
FOREIGN KEY (elsa_id) REFERENCES elsa (id)
|
||||
)"""
|
||||
CREATE_TABLE_NEWEDITIONS = """CREATE TABLE neweditions (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
new_bookdata TEXT,
|
||||
old_edition_id INTEGER,
|
||||
for_apparat INTEGER,
|
||||
ordered BOOLEAN DEFAULT (0),
|
||||
FOREIGN KEY (old_edition_id) REFERENCES media (id),
|
||||
FOREIGN KEY (for_apparat) REFERENCES semesterapparat (id)
|
||||
)"""
|
||||
2
src/documents/__init__.py
Normal file
2
src/documents/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
|
||||
|
||||
371
src/documents/generators.py
Normal file
371
src/documents/generators.py
Normal file
@@ -0,0 +1,371 @@
|
||||
import os
|
||||
from datetime import datetime
|
||||
from os.path import basename
|
||||
|
||||
from docx import Document
|
||||
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
|
||||
from docx.oxml import OxmlElement
|
||||
from docx.oxml.ns import qn
|
||||
from docx.shared import Cm, Pt, RGBColor
|
||||
|
||||
from src import settings
|
||||
from src.shared.logging import log
|
||||
|
||||
logger = log
|
||||
|
||||
font = "Cascadia Mono"
|
||||
|
||||
|
||||
def print_document(file: str) -> None:
|
||||
# send document to printer as attachment of email
|
||||
import smtplib
|
||||
from email.mime.application import MIMEApplication
|
||||
from email.mime.multipart import MIMEMultipart
|
||||
from email.mime.text import MIMEText
|
||||
|
||||
smtp = settings.mail.smtp_server
|
||||
port = settings.mail.port
|
||||
sender_email = settings.mail.sender
|
||||
password = settings.mail.password
|
||||
receiver = settings.mail.printer_mail
|
||||
message = MIMEMultipart()
|
||||
message["From"] = sender_email
|
||||
message["To"] = receiver
|
||||
message["cc"] = settings.mail.sender
|
||||
message["Subject"] = "."
|
||||
mail_body = "."
|
||||
message.attach(MIMEText(mail_body, "html"))
|
||||
with open(file, "rb") as fil:
|
||||
part = MIMEApplication(fil.read(), Name=basename(file))
|
||||
# After the file is closed
|
||||
part["Content-Disposition"] = 'attachment; filename="%s"' % basename(file)
|
||||
message.attach(part)
|
||||
mail = message.as_string()
|
||||
with smtplib.SMTP_SSL(smtp, port) as server:
|
||||
server.connect(smtp, port)
|
||||
server.login(settings.mail.user_name, password)
|
||||
server.sendmail(sender_email, receiver, mail)
|
||||
server.quit()
|
||||
log.success("Mail sent")
|
||||
|
||||
|
||||
class SemesterError(Exception):
|
||||
"""Custom exception for semester-related errors."""
|
||||
|
||||
def __init__(self, message: str):
|
||||
super().__init__(message)
|
||||
log.error(message)
|
||||
|
||||
def __str__(self):
|
||||
return f"SemesterError: {self.args[0]}"
|
||||
|
||||
|
||||
class SemesterDocument:
|
||||
def __init__(
|
||||
self,
|
||||
apparats: list[tuple[int, str]],
|
||||
semester: str,
|
||||
filename: str,
|
||||
full: bool = False,
|
||||
):
|
||||
assert isinstance(apparats, list), SemesterError(
|
||||
"Apparats must be a list of tuples"
|
||||
)
|
||||
assert all(isinstance(apparat, tuple) for apparat in apparats), SemesterError(
|
||||
"Apparats must be a list of tuples"
|
||||
)
|
||||
assert all(isinstance(apparat[0], int) for apparat in apparats), SemesterError(
|
||||
"Apparat numbers must be integers"
|
||||
)
|
||||
assert all(isinstance(apparat[1], str) for apparat in apparats), SemesterError(
|
||||
"Apparat names must be strings"
|
||||
)
|
||||
assert isinstance(semester, str), SemesterError("Semester must be a string")
|
||||
assert "." not in filename and isinstance(filename, str), SemesterError(
|
||||
"Filename must be a string and not contain an extension"
|
||||
)
|
||||
self.doc = Document()
|
||||
self.apparats = apparats
|
||||
self.semester = semester
|
||||
self.table_font_normal = font
|
||||
self.table_font_bold = font
|
||||
self.header_font = font
|
||||
self.header_font_size = Pt(26)
|
||||
self.sub_header_font_size = Pt(18)
|
||||
self.table_font_size = Pt(10)
|
||||
self.color_red = RGBColor(255, 0, 0)
|
||||
self.color_blue = RGBColor(0, 0, 255)
|
||||
self.filename = filename
|
||||
if full:
|
||||
log.info("Full document generation")
|
||||
self.cleanup
|
||||
log.info("Cleanup done")
|
||||
self.make_document()
|
||||
log.info("Document created")
|
||||
self.create_pdf()
|
||||
log.info("PDF created")
|
||||
print_document(self.filename + ".pdf")
|
||||
log.info("Document printed")
|
||||
|
||||
def set_table_border(self, table):
|
||||
"""
|
||||
Adds a full border to the table.
|
||||
|
||||
:param table: Table object to which the border will be applied.
|
||||
"""
|
||||
tbl = table._element
|
||||
tbl_pr = tbl.xpath("w:tblPr")[0]
|
||||
tbl_borders = OxmlElement("w:tblBorders")
|
||||
|
||||
# Define border styles
|
||||
for border_name in ["top", "left", "bottom", "right", "insideH", "insideV"]:
|
||||
border = OxmlElement(f"w:{border_name}")
|
||||
border.set(qn("w:val"), "single")
|
||||
border.set(qn("w:sz"), "4") # Thickness of the border
|
||||
border.set(qn("w:space"), "0")
|
||||
border.set(qn("w:color"), "000000") # Black color
|
||||
tbl_borders.append(border)
|
||||
|
||||
tbl_pr.append(tbl_borders)
|
||||
|
||||
def create_sorted_table(self) -> None:
|
||||
# Sort the apparats list by the string in the tuple (index 1)
|
||||
self.apparats.sort(key=lambda x: x[1])
|
||||
# Create a table with rows equal to the length of the apparats list
|
||||
table = self.doc.add_table(rows=len(self.apparats), cols=2)
|
||||
table.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
||||
|
||||
# Set column widths by directly modifying the cell properties
|
||||
widths = [Cm(1.19), Cm(10)]
|
||||
for col_idx, width in enumerate(widths):
|
||||
for cell in table.columns[col_idx].cells:
|
||||
cell_width_element = cell._element.xpath(".//w:tcPr")[0]
|
||||
tcW = OxmlElement("w:tcW")
|
||||
tcW.set(qn("w:w"), str(int(width.cm * 567))) # Convert cm to twips
|
||||
tcW.set(qn("w:type"), "dxa")
|
||||
cell_width_element.append(tcW)
|
||||
|
||||
# Adjust row heights
|
||||
for row in table.rows:
|
||||
trPr = row._tr.get_or_add_trPr() # Get or add the <w:trPr> element
|
||||
trHeight = OxmlElement("w:trHeight")
|
||||
trHeight.set(
|
||||
qn("w:val"), str(int(Pt(15).pt * 20))
|
||||
) # Convert points to twips
|
||||
trHeight.set(qn("w:hRule"), "exact") # Use "exact" for fixed height
|
||||
trPr.append(trHeight)
|
||||
|
||||
# Fill the table with sorted data
|
||||
for row_idx, (number, name) in enumerate(self.apparats):
|
||||
row = table.rows[row_idx]
|
||||
|
||||
# Set font for the first column (number)
|
||||
cell_number_paragraph = row.cells[0].paragraphs[0]
|
||||
cell_number_run = cell_number_paragraph.add_run(str(number))
|
||||
cell_number_run.font.name = self.table_font_bold
|
||||
cell_number_run.font.size = self.table_font_size
|
||||
cell_number_run.font.bold = True
|
||||
cell_number_run.font.color.rgb = self.color_red
|
||||
cell_number_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
||||
|
||||
# Set font for the second column (name)
|
||||
cell_name_paragraph = row.cells[1].paragraphs[0]
|
||||
words = name.split()
|
||||
if words:
|
||||
# Add the first word in bold
|
||||
bold_run = cell_name_paragraph.add_run(words[0])
|
||||
bold_run.font.bold = True
|
||||
bold_run.font.name = self.table_font_bold
|
||||
bold_run.font.size = self.table_font_size
|
||||
|
||||
# Add the rest of the words normally
|
||||
if len(words) > 1:
|
||||
normal_run = cell_name_paragraph.add_run(" " + " ".join(words[1:]))
|
||||
normal_run.font.name = self.table_font_normal
|
||||
normal_run.font.size = self.table_font_size
|
||||
cell_name_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
|
||||
|
||||
self.set_table_border(table)
|
||||
|
||||
def make_document(self):
|
||||
# Create a new Document
|
||||
section = self.doc.sections[0]
|
||||
section.top_margin = Cm(2.54) # Default 1 inch (can adjust as needed)
|
||||
section.bottom_margin = Cm(1.5) # Set bottom margin to 1.5 cm
|
||||
section.left_margin = Cm(2.54) # Default 1 inch
|
||||
section.right_margin = Cm(2.54) # Default 1 inch
|
||||
|
||||
# Add the current date
|
||||
current_date = datetime.now().strftime("%Y-%m-%d")
|
||||
date_paragraph = self.doc.add_paragraph(current_date)
|
||||
date_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
|
||||
|
||||
# Add a header
|
||||
semester = f"Semesterapparate {self.semester}"
|
||||
header = self.doc.add_paragraph(semester)
|
||||
header.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
||||
header_run = header.runs[0]
|
||||
header_run.font.name = self.header_font
|
||||
header_run.font.size = self.header_font_size
|
||||
header_run.font.bold = True
|
||||
header_run.font.color.rgb = self.color_blue
|
||||
|
||||
sub_header = self.doc.add_paragraph("(Alphabetisch)")
|
||||
sub_header.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
||||
sub_header_run = sub_header.runs[0]
|
||||
sub_header_run.font.name = self.header_font
|
||||
sub_header_run.font.size = self.sub_header_font_size
|
||||
sub_header_run.font.color.rgb = self.color_red
|
||||
|
||||
self.doc.add_paragraph("")
|
||||
|
||||
self.create_sorted_table()
|
||||
|
||||
def save_document(self, name: str) -> None:
|
||||
# Save the document
|
||||
self.doc.save(name)
|
||||
|
||||
def create_pdf(self) -> None:
|
||||
# Save the document
|
||||
import comtypes.client
|
||||
|
||||
word = comtypes.client.CreateObject("Word.Application") # type: ignore
|
||||
self.save_document(self.filename + ".docx")
|
||||
docpath = os.path.abspath(self.filename + ".docx")
|
||||
doc = word.Documents.Open(docpath)
|
||||
curdir = os.getcwd()
|
||||
doc.SaveAs(f"{curdir}/{self.filename}.pdf", FileFormat=17)
|
||||
doc.Close()
|
||||
word.Quit()
|
||||
log.debug("PDF saved")
|
||||
|
||||
@property
|
||||
def cleanup(self) -> None:
|
||||
if os.path.exists(f"{self.filename}.docx"):
|
||||
os.remove(f"{self.filename}.docx")
|
||||
os.remove(f"{self.filename}.pdf")
|
||||
|
||||
@property
|
||||
def send(self) -> None:
|
||||
print_document(self.filename + ".pdf")
|
||||
log.debug("Document sent to printer")
|
||||
|
||||
|
||||
class SemapSchilder:
|
||||
def __init__(self, entries: list[str]):
|
||||
self.entries = entries
|
||||
self.filename = "Schilder"
|
||||
self.font_size = Pt(23)
|
||||
self.font_name = font
|
||||
self.doc = Document()
|
||||
self.define_doc_properties()
|
||||
self.add_entries()
|
||||
self.cleanup()
|
||||
self.create_pdf()
|
||||
|
||||
def define_doc_properties(self):
|
||||
# set the doc to have a top margin of 1cm, left and right are 0.5cm, bottom is 0cm
|
||||
section = self.doc.sections[0]
|
||||
section.top_margin = Cm(1)
|
||||
section.bottom_margin = Cm(0)
|
||||
section.left_margin = Cm(0.5)
|
||||
section.right_margin = Cm(0.5)
|
||||
|
||||
# set the font to Times New Roman, size 23 bold, color black
|
||||
for paragraph in self.doc.paragraphs:
|
||||
for run in paragraph.runs:
|
||||
run.font.name = self.font_name
|
||||
run.font.size = self.font_size
|
||||
run.font.bold = True
|
||||
run.font.color.rgb = RGBColor(0, 0, 0)
|
||||
paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
||||
|
||||
# if the length of the text is
|
||||
|
||||
def add_entries(self):
|
||||
for entry in self.entries:
|
||||
paragraph = self.doc.add_paragraph(entry)
|
||||
paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
||||
paragraph.paragraph_format.line_spacing = Pt(23) # Set fixed line spacing
|
||||
paragraph.paragraph_format.space_before = Pt(2) # Remove spacing before
|
||||
paragraph.paragraph_format.space_after = Pt(2) # Remove spacing after
|
||||
|
||||
run = paragraph.runs[0]
|
||||
run.font.name = self.font_name
|
||||
run.font.size = self.font_size
|
||||
run.font.bold = True
|
||||
run.font.color.rgb = RGBColor(0, 0, 0)
|
||||
|
||||
# Add a line to be used as a guideline for cutting
|
||||
line = self.doc.add_paragraph()
|
||||
line.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
||||
line.paragraph_format.line_spacing = Pt(23) # Match line spacing
|
||||
line.paragraph_format.space_before = Pt(2) # Remove spacing before
|
||||
line.paragraph_format.space_after = Pt(2) # Remove spacing after
|
||||
line.add_run("--------------------------")
|
||||
|
||||
def save_document(self):
|
||||
# Save the document
|
||||
self.doc.save(f"{self.filename}.docx")
|
||||
log.debug(f"Document saved as {self.filename}.docx")
|
||||
|
||||
def create_pdf(self) -> None:
|
||||
# Save the document
|
||||
import comtypes.client
|
||||
|
||||
word = comtypes.client.CreateObject("Word.Application") # type: ignore
|
||||
self.save_document()
|
||||
docpath = os.path.abspath(f"{self.filename}.docx")
|
||||
doc = word.Documents.Open(docpath)
|
||||
curdir = os.getcwd()
|
||||
doc.SaveAs(f"{curdir}/{self.filename}.pdf", FileFormat=17)
|
||||
doc.Close()
|
||||
word.Quit()
|
||||
log.debug("PDF saved")
|
||||
|
||||
def cleanup(self) -> None:
|
||||
if os.path.exists(f"{self.filename}.docx"):
|
||||
os.remove(f"{self.filename}.docx")
|
||||
if os.path.exists(f"{self.filename}.pdf"):
|
||||
os.remove(f"{self.filename}.pdf")
|
||||
|
||||
@property
|
||||
def send(self) -> None:
|
||||
print_document(self.filename + ".pdf")
|
||||
log.debug("Document sent to printer")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
entries = [
|
||||
"Lüsebrink (Theorie und Praxis der Leichtathletik)",
|
||||
"Kulovics (ISP-Betreuung)",
|
||||
"Köhler (Ausgewählte Aspekte der materiellen Kultur Textil)",
|
||||
"Grau (Young Adult Literature)",
|
||||
"Schiebel (Bewegung II:Ausgewählte Problemfelder)",
|
||||
"Schiebel (Ernährungswiss. Perspektive)",
|
||||
"Park (Kommunikation und Kooperation)",
|
||||
"Schiebel (Schwimmen)",
|
||||
"Huppertz (Philosophieren mit Kindern)",
|
||||
"Heyl (Heyl)",
|
||||
"Reuter (Verschiedene Veranstaltungen)",
|
||||
"Reinhold (Arithmetik und mathematisches Denken)",
|
||||
"Wirtz (Forschungsmethoden)",
|
||||
"Schleider (Essstörungen)",
|
||||
"Schleider (Klinische Psychologie)",
|
||||
"Schleider (Doktorandenkolloquium)",
|
||||
"Schleider (Störungen Sozialverhaltens/Delinquenz)",
|
||||
"Burth (EU Forschung im Int. Vergleich/EU Gegenstand biling. Didaktik)",
|
||||
"Reinhardt (Einführung Politikdidaktik)",
|
||||
"Schleider (Psychologische Interventionsmethoden)",
|
||||
"Schleider (ADHS)",
|
||||
"Schleider (Beratung und Teamarbeit)",
|
||||
"Schleider (LRS)",
|
||||
"Schleider (Gesundheitspsychologie)",
|
||||
"Schleider (Elterntraining)",
|
||||
"Wulff (Hochschulzertifikat DaZ)",
|
||||
"Dinkelaker ( )",
|
||||
"Droll (Einführung in die Sprachwissenschaft)",
|
||||
"Karoß (Gymnastik - Sich Bewegen mit und ohne Handgeräte)",
|
||||
"Sahrai (Kindheit und Gesellschaft)",
|
||||
]
|
||||
doc = SemapSchilder(entries)
|
||||
13
src/parsers/__init__.py
Normal file
13
src/parsers/__init__.py
Normal file
@@ -0,0 +1,13 @@
|
||||
__all__ = [
|
||||
"csv_to_list",
|
||||
"pdf_to_csv",
|
||||
"word_to_semap",
|
||||
"eml_parser",
|
||||
"eml_to_semap",
|
||||
]
|
||||
|
||||
|
||||
from .csv_parser import csv_to_list
|
||||
from .pdf_parser import pdf_to_csv
|
||||
from .word_parser import word_to_semap
|
||||
from .xml_parser import eml_parser, eml_to_semap
|
||||
23
src/parsers/csv_parser.py
Normal file
23
src/parsers/csv_parser.py
Normal file
@@ -0,0 +1,23 @@
|
||||
import csv
|
||||
|
||||
from charset_normalizer import detect
|
||||
|
||||
|
||||
def csv_to_list(path: str) -> list[str]:
|
||||
"""
|
||||
Extracts the data from a csv file and returns it as a pandas dataframe
|
||||
"""
|
||||
encoding = detect(open(path, "rb").read())["encoding"]
|
||||
with open(path, newline="", encoding=encoding) as csvfile:
|
||||
# if decoder fails to map, assign ""
|
||||
reader = csv.reader(csvfile, delimiter=";", quotechar="|")
|
||||
ret = []
|
||||
for row in reader:
|
||||
ret.append(row[0].replace('"', ""))
|
||||
return ret
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
text = csv_to_list("C:/Users/aky547/Desktop/semap/71.csv")
|
||||
# remove linebreaks
|
||||
# #print(text)
|
||||
23
src/parsers/pdf_parser.py
Normal file
23
src/parsers/pdf_parser.py
Normal file
@@ -0,0 +1,23 @@
|
||||
# add depend path to system path
|
||||
|
||||
from pdfquery import PDFQuery
|
||||
|
||||
|
||||
def pdf_to_csv(path: str) -> str:
|
||||
"""
|
||||
Extracts the data from a pdf file and returns it as a pandas dataframe
|
||||
"""
|
||||
file = PDFQuery(path)
|
||||
file.load()
|
||||
# get the text from the pdf file
|
||||
text_elems = file.extract([("with_formatter", "text"), ("all_text", "*")])
|
||||
extracted_text = text_elems["all_text"]
|
||||
|
||||
return extracted_text
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
text = pdf_to_csv("54_pdf.pdf")
|
||||
# remove linebreaks
|
||||
text = text.replace("\n", "")
|
||||
# print(text)
|
||||
8
src/parsers/transformers/__init__.py
Normal file
8
src/parsers/transformers/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from .transformers import (
|
||||
RDS_AVAIL_DATA,
|
||||
ARRAYData,
|
||||
BibTeXData,
|
||||
COinSData,
|
||||
RDSData,
|
||||
RISData,
|
||||
)
|
||||
122
src/parsers/transformers/schemas.py
Normal file
122
src/parsers/transformers/schemas.py
Normal file
@@ -0,0 +1,122 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, Any, List
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import field as dataclass_field
|
||||
import json
|
||||
|
||||
|
||||
@dataclass
|
||||
class Item:
|
||||
superlocation: str | None = dataclass_field(default_factory=str)
|
||||
status: str | None = dataclass_field(default_factory=str)
|
||||
availability: str | None = dataclass_field(default_factory=str)
|
||||
notes: str | None = dataclass_field(default_factory=str)
|
||||
limitation: str | None = dataclass_field(default_factory=str)
|
||||
duedate: str | None = dataclass_field(default_factory=str)
|
||||
id: str | None = dataclass_field(default_factory=str)
|
||||
item_id: str | None = dataclass_field(default_factory=str)
|
||||
ilslink: str | None = dataclass_field(default_factory=str)
|
||||
number: int | None = dataclass_field(default_factory=int)
|
||||
barcode: str | None = dataclass_field(default_factory=str)
|
||||
reserve: str | None = dataclass_field(default_factory=str)
|
||||
callnumber: str | None = dataclass_field(default_factory=str)
|
||||
department: str | None = dataclass_field(default_factory=str)
|
||||
locationhref: str | None = dataclass_field(default_factory=str)
|
||||
location: str | None = dataclass_field(default_factory=str)
|
||||
|
||||
def from_dict(self, data: dict):
|
||||
"""Import data from dict"""
|
||||
data = data["items"]
|
||||
for entry in data:
|
||||
for key, value in entry.items():
|
||||
setattr(self, key, value)
|
||||
return self
|
||||
|
||||
|
||||
@dataclass
|
||||
class RDS_AVAIL_DATA:
|
||||
"""Class to store RDS availability data"""
|
||||
|
||||
library_sigil: str = dataclass_field(default_factory=str)
|
||||
items: List[Item] = dataclass_field(default_factory=list)
|
||||
|
||||
def import_from_dict(self, data: str):
|
||||
"""Import data from dict"""
|
||||
edata = json.loads(data)
|
||||
# library sigil is first key
|
||||
|
||||
self.library_sigil = str(list(edata.keys())[0])
|
||||
# get data from first key
|
||||
edata = edata[self.library_sigil]
|
||||
for location in edata:
|
||||
item = Item(superlocation=location).from_dict(edata[location])
|
||||
|
||||
self.items.append(item)
|
||||
return self
|
||||
|
||||
|
||||
@dataclass
|
||||
class RDS_DATA:
|
||||
"""Class to store RDS data"""
|
||||
|
||||
RDS_SIGNATURE: str = dataclass_field(default_factory=str)
|
||||
RDS_STATUS: str = dataclass_field(default_factory=str)
|
||||
RDS_LOCATION: str = dataclass_field(default_factory=str)
|
||||
RDS_URL: Any = dataclass_field(default_factory=str)
|
||||
RDS_HINT: Any = dataclass_field(default_factory=str)
|
||||
RDS_COMMENT: Any = dataclass_field(default_factory=str)
|
||||
RDS_HOLDING: Any = dataclass_field(default_factory=str)
|
||||
RDS_HOLDING_LEAK: Any = dataclass_field(default_factory=str)
|
||||
RDS_INTERN: Any = dataclass_field(default_factory=str)
|
||||
RDS_PROVENIENCE: Any = dataclass_field(default_factory=str)
|
||||
RDS_LOCAL_NOTATION: str = dataclass_field(default_factory=str)
|
||||
RDS_LEA: Any = dataclass_field(default_factory=str)
|
||||
|
||||
def import_from_dict(self, data: dict) -> RDS_DATA:
|
||||
"""Import data from dict"""
|
||||
for key, value in data.items():
|
||||
setattr(self, key, value)
|
||||
return self
|
||||
|
||||
|
||||
@dataclass
|
||||
class RDS_GENERIC_DATA:
|
||||
LibrarySigil: str = dataclass_field(default_factory=str)
|
||||
RDS_DATA: List[RDS_DATA] = dataclass_field(default_factory=list)
|
||||
|
||||
def import_from_dict(self, data: str) -> RDS_GENERIC_DATA:
|
||||
"""Import data from dict"""
|
||||
edata = json.loads(data)
|
||||
# library sigil is first key
|
||||
self.LibrarySigil = str(list(edata.keys())[0])
|
||||
# get data from first key
|
||||
edata = edata[self.LibrarySigil]
|
||||
for entry in edata:
|
||||
rds_data = RDS_DATA() # Create a new RDS_DATA instance
|
||||
# Populate the RDS_DATA instance from the entry
|
||||
# This assumes that the entry is a dictionary that matches the structure of the RDS_DATA class
|
||||
rds_data.import_from_dict(entry)
|
||||
self.RDS_DATA.append(rds_data) # Add the RDS_DATA instance to the list
|
||||
return self
|
||||
|
||||
|
||||
@dataclass
|
||||
class LoksatzData:
|
||||
type: Optional[str] = None
|
||||
adis_idn: Optional[str] = None
|
||||
t_idn: Optional[str] = None
|
||||
ktrl_nr: Optional[str] = None
|
||||
adis_isil: Optional[str] = None
|
||||
adis_sigel: Optional[str] = None
|
||||
bib_sigel: Optional[str] = None
|
||||
standort: Optional[str] = None
|
||||
signatur: Optional[str] = None
|
||||
ausleihcode: Optional[str] = None
|
||||
sig_katalog: Optional[str] = None
|
||||
erwerb_datum: Optional[str] = None
|
||||
medientypcode: Optional[str] = None
|
||||
bestellart: Optional[str] = None
|
||||
faecherstatistik: Optional[str] = None
|
||||
exemplar_stat: Optional[str] = None
|
||||
so_standort: Optional[str] = None
|
||||
522
src/parsers/transformers/transformers.py
Normal file
522
src/parsers/transformers/transformers.py
Normal file
@@ -0,0 +1,522 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import field as dataclass_field
|
||||
from typing import Any, List
|
||||
|
||||
import loguru
|
||||
|
||||
from src import LOG_DIR
|
||||
from src.logic.dataclass import BookData
|
||||
|
||||
log = loguru.logger
|
||||
log.remove()
|
||||
log.add(sys.stdout, level="INFO")
|
||||
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
|
||||
|
||||
|
||||
###Pydatnic models
|
||||
@dataclass
|
||||
class Item:
|
||||
superlocation: str | None = dataclass_field(default_factory=str)
|
||||
status: str | None = dataclass_field(default_factory=str)
|
||||
availability: str | None = dataclass_field(default_factory=str)
|
||||
notes: str | None = dataclass_field(default_factory=str)
|
||||
limitation: str | None = dataclass_field(default_factory=str)
|
||||
duedate: str | None = dataclass_field(default_factory=str)
|
||||
id: str | None = dataclass_field(default_factory=str)
|
||||
item_id: str | None = dataclass_field(default_factory=str)
|
||||
ilslink: str | None = dataclass_field(default_factory=str)
|
||||
number: int | None = dataclass_field(default_factory=int)
|
||||
barcode: str | None = dataclass_field(default_factory=str)
|
||||
reserve: str | None = dataclass_field(default_factory=str)
|
||||
callnumber: str | None = dataclass_field(default_factory=str)
|
||||
department: str | None = dataclass_field(default_factory=str)
|
||||
locationhref: str | None = dataclass_field(default_factory=str)
|
||||
location: str | None = dataclass_field(default_factory=str)
|
||||
ktrl_nr: str | None = dataclass_field(default_factory=str)
|
||||
|
||||
def from_dict(self, data: dict):
|
||||
"""Import data from dict"""
|
||||
data = data["items"]
|
||||
for entry in data:
|
||||
for key, value in entry.items():
|
||||
setattr(self, key, value)
|
||||
return self
|
||||
|
||||
|
||||
@dataclass
|
||||
class RDS_AVAIL_DATA:
|
||||
"""Class to store RDS availability data"""
|
||||
|
||||
library_sigil: str = dataclass_field(default_factory=str)
|
||||
items: List[Item] = dataclass_field(default_factory=list)
|
||||
|
||||
def import_from_dict(self, data: str):
|
||||
"""Import data from dict"""
|
||||
edata = json.loads(data)
|
||||
# library sigil is first key
|
||||
|
||||
self.library_sigil = str(list(edata.keys())[0])
|
||||
# get data from first key
|
||||
edata = edata[self.library_sigil]
|
||||
for location in edata:
|
||||
item = Item(superlocation=location).from_dict(edata[location])
|
||||
|
||||
self.items.append(item)
|
||||
return self
|
||||
|
||||
|
||||
@dataclass
|
||||
class RDS_DATA:
|
||||
"""Class to store RDS data"""
|
||||
|
||||
RDS_SIGNATURE: str = dataclass_field(default_factory=str)
|
||||
RDS_STATUS: str = dataclass_field(default_factory=str)
|
||||
RDS_LOCATION: str = dataclass_field(default_factory=str)
|
||||
RDS_URL: Any = dataclass_field(default_factory=str)
|
||||
RDS_HINT: Any = dataclass_field(default_factory=str)
|
||||
RDS_COMMENT: Any = dataclass_field(default_factory=str)
|
||||
RDS_HOLDING: Any = dataclass_field(default_factory=str)
|
||||
RDS_HOLDING_LEAK: Any = dataclass_field(default_factory=str)
|
||||
RDS_INTERN: Any = dataclass_field(default_factory=str)
|
||||
RDS_PROVENIENCE: Any = dataclass_field(default_factory=str)
|
||||
RDS_LOCAL_NOTATION: str = dataclass_field(default_factory=str)
|
||||
RDS_LEA: Any = dataclass_field(default_factory=str)
|
||||
|
||||
def import_from_dict(self, data: dict) -> RDS_DATA:
|
||||
"""Import data from dict"""
|
||||
for key, value in data.items():
|
||||
setattr(self, key, value)
|
||||
return self
|
||||
|
||||
|
||||
@dataclass
|
||||
class RDS_GENERIC_DATA:
|
||||
LibrarySigil: str = dataclass_field(default_factory=str)
|
||||
RDS_DATA: List[RDS_DATA] = dataclass_field(default_factory=list)
|
||||
|
||||
def import_from_dict(self, data: str) -> RDS_GENERIC_DATA:
|
||||
"""Import data from dict"""
|
||||
edata = json.loads(data)
|
||||
# library sigil is first key
|
||||
self.LibrarySigil = str(list(edata.keys())[0])
|
||||
# get data from first key
|
||||
edata = edata[self.LibrarySigil]
|
||||
for entry in edata:
|
||||
rds_data = RDS_DATA() # Create a new RDS_DATA instance
|
||||
# Populate the RDS_DATA instance from the entry
|
||||
# This assumes that the entry is a dictionary that matches the structure of the RDS_DATA class
|
||||
rds_data.import_from_dict(entry)
|
||||
self.RDS_DATA.append(rds_data) # Add the RDS_DATA instance to the list
|
||||
return self
|
||||
|
||||
|
||||
class BaseStruct:
|
||||
def __init__(self, **kwargs):
|
||||
for key, value in kwargs.items():
|
||||
setattr(self, key, value)
|
||||
|
||||
|
||||
class ARRAYData:
|
||||
def __init__(self, signature=None) -> None:
|
||||
self.signature = None
|
||||
pass
|
||||
|
||||
def transform(self, data: str) -> BookData:
|
||||
def _get_line(source: str, search: str) -> str:
|
||||
try:
|
||||
data = (
|
||||
source.split(search)[1]
|
||||
.split("\n")[0]
|
||||
.strip()
|
||||
.replace("=>", "")
|
||||
.strip()
|
||||
)
|
||||
return data
|
||||
|
||||
except Exception:
|
||||
# # log.debug(f"ARRAYData.transform failed, {source}, {search}")
|
||||
log.exception(f"ARRAYData.transform failed, no string {search}")
|
||||
return ""
|
||||
|
||||
def _get_list_entry(source: str, search: str, entry: str) -> str:
|
||||
try:
|
||||
source = source.replace("\t", "").replace("\r", "")
|
||||
source = source.split(search)[1].split(")")[0]
|
||||
return _get_line(source, entry).replace("=>", "").strip()
|
||||
except:
|
||||
return ""
|
||||
|
||||
def _get_isbn(source: str) -> list:
|
||||
try:
|
||||
isbn = source.split("[isbn]")[1].split(")")[0].strip()
|
||||
isbn = isbn.split("(")[1]
|
||||
isbns = isbn.split("=>")
|
||||
ret = []
|
||||
for _ in isbns:
|
||||
# remove _ from list
|
||||
isb = _.split("\n")[0].strip()
|
||||
if isb == "":
|
||||
continue
|
||||
ret.append(isb) if isb not in ret else None
|
||||
return ret
|
||||
except:
|
||||
isbn = []
|
||||
return isbn
|
||||
|
||||
def _get_signature(data):
|
||||
try:
|
||||
sig_data = (
|
||||
data.split("[loksatz]")[1]
|
||||
.split("[0] => ")[1]
|
||||
.split("\n")[0]
|
||||
.strip()
|
||||
)
|
||||
signature_data = eval(sig_data)
|
||||
return signature_data["signatur"]
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _get_author(data):
|
||||
try:
|
||||
array = data.split("[au_display_short]")[1].split(")\n")[0].strip()
|
||||
except Exception:
|
||||
return ""
|
||||
entries = array.split("\n")
|
||||
authors = []
|
||||
hg_present = False
|
||||
verf_present = False
|
||||
lines = []
|
||||
for entry in entries:
|
||||
if "=>" in entry:
|
||||
line = entry.split("=>")[1].strip()
|
||||
if "[HerausgeberIn]" in line:
|
||||
hg_present = True
|
||||
if "[VerfasserIn]" in line:
|
||||
verf_present = True
|
||||
lines.append(line)
|
||||
for line in lines:
|
||||
if hg_present and verf_present:
|
||||
if "[HerausgeberIn]" in line:
|
||||
authors.append(line.split("[")[0].strip())
|
||||
elif verf_present:
|
||||
if "[VerfasserIn]" in line:
|
||||
authors.append(line.split("[")[0].strip())
|
||||
else:
|
||||
pass
|
||||
return ";".join(authors)
|
||||
|
||||
def _get_title(data):
|
||||
titledata = None
|
||||
title = ""
|
||||
if "[ti_long]" in data:
|
||||
titledata = data.split("[ti_long]")[1].split(")\n")[0].strip()
|
||||
title = titledata.split("=>")[1].strip().split("/")[0].strip()
|
||||
if "[ti_long_f]" in data:
|
||||
titledata = data.split("[ti_long_f]")[1].split(")\n")[0].strip()
|
||||
title = titledata.split("=>")[1].strip().split("/")[0].strip()
|
||||
return title
|
||||
|
||||
def _get_adis_idn(data, signature):
|
||||
loksatz_match = re.search(
|
||||
r"\[loksatz\] => Array\s*\((.*?)\)", data, re.DOTALL
|
||||
)
|
||||
if loksatz_match:
|
||||
loksatz_content = loksatz_match.group(1)
|
||||
|
||||
# Step 2: Extract JSON objects within the loksatz section
|
||||
json_objects = re.findall(r"{.*?}", loksatz_content, re.DOTALL)
|
||||
# Print each JSON object
|
||||
for obj in json_objects:
|
||||
data = eval(obj)
|
||||
if data["signatur"] == signature:
|
||||
return data["adis_idn"]
|
||||
|
||||
def _get_in_apparat(data):
|
||||
loksatz_match = re.search(
|
||||
r"\[loksatz\] => Array\s*\((.*?)\)", data, re.DOTALL
|
||||
)
|
||||
if loksatz_match:
|
||||
loksatz_content = loksatz_match.group(1)
|
||||
|
||||
# Step 2: Extract JSON objects within the loksatz section
|
||||
json_objects = re.findall(r"{.*?}", loksatz_content, re.DOTALL)
|
||||
# Print each JSON object
|
||||
for obj in json_objects:
|
||||
data = eval(obj)
|
||||
if data["ausleihcode"] == "R" and data["standort"] == "40":
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
ppn = _get_line(data, "[kid]")
|
||||
title = _get_title(data).strip()
|
||||
author = _get_author(data)
|
||||
edition = _get_list_entry(data, "[ausgabe]", "[0]").replace(",", "")
|
||||
link = f"https://rds.ibs-bw.de/phfreiburg/link?kid={_get_line(data, '[kid]')}"
|
||||
isbn = _get_isbn(data)
|
||||
# [self._get_list_entry(data,"[isbn]","[0]"),self._get_list_entry(data,"[is]","[1]")],
|
||||
language = _get_list_entry(data, "[la_facet]", "[0]")
|
||||
publisher = _get_list_entry(data, "[pu]", "[0]")
|
||||
year = _get_list_entry(data, "[py_display]", "[0]")
|
||||
pages = _get_list_entry(data, "[umfang]", "[0]").split(":")[0].strip()
|
||||
signature = (
|
||||
self.signature if self.signature is not None else _get_signature(data)
|
||||
)
|
||||
|
||||
place = _get_list_entry(data, "[pp]", "[0]")
|
||||
adis_idn = _get_adis_idn(data, signature=signature)
|
||||
in_apparat = _get_in_apparat(data)
|
||||
return BookData(
|
||||
ppn=ppn,
|
||||
title=title,
|
||||
author=author,
|
||||
edition=edition,
|
||||
link=link,
|
||||
isbn=isbn,
|
||||
language=language,
|
||||
publisher=publisher,
|
||||
year=year,
|
||||
pages=pages,
|
||||
signature=signature,
|
||||
place=place,
|
||||
adis_idn=adis_idn,
|
||||
in_apparat=in_apparat,
|
||||
)
|
||||
|
||||
|
||||
class COinSData:
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
def transform(self, data: str) -> BookData:
|
||||
def _get_line(source: str, search: str) -> str:
|
||||
try:
|
||||
data = source.split(f"{search}=")[1] # .split("")[0].strip()
|
||||
return data.split("rft")[0].strip() if "rft" in data else data
|
||||
except:
|
||||
return ""
|
||||
|
||||
return BookData(
|
||||
ppn=_get_line(data, "rft_id").split("=")[1],
|
||||
title=_get_line(data, "rft.btitle"),
|
||||
author=f"{_get_line(data, 'rft.aulast')}, {_get_line(data, 'rft.aufirst')}",
|
||||
edition=_get_line(data, "rft.edition"),
|
||||
link=_get_line(data, "rft_id"),
|
||||
isbn=_get_line(data, "rft.isbn"),
|
||||
publisher=_get_line(data, "rft.pub"),
|
||||
year=_get_line(data, "rft.date"),
|
||||
pages=_get_line(data, "rft.tpages").split(":")[0].strip(),
|
||||
)
|
||||
|
||||
|
||||
class RISData:
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
def transform(self, data: str) -> BookData:
|
||||
def _get_line(source: str, search: str) -> str:
|
||||
try:
|
||||
data = source.split(f"{search} - ")[1] # .split("")[0].strip()
|
||||
return data.split("\n")[0].strip() if "\n" in data else data
|
||||
except:
|
||||
return ""
|
||||
|
||||
return BookData(
|
||||
ppn=_get_line(data, "DP").split("=")[1],
|
||||
title=_get_line(data, "TI"),
|
||||
signature=_get_line(data, "CN"),
|
||||
edition=_get_line(data, "ET").replace(",", ""),
|
||||
link=_get_line(data, "DP"),
|
||||
isbn=_get_line(data, "SN").split(","),
|
||||
author=_get_line(data, "AU").split("[")[0].strip(),
|
||||
language=_get_line(data, "LA"),
|
||||
publisher=_get_line(data, "PB"),
|
||||
year=_get_line(data, "PY"),
|
||||
pages=_get_line(data, "SP"),
|
||||
)
|
||||
|
||||
|
||||
class BibTeXData:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def transform(self, data: str) -> BookData:
|
||||
def _get_line(source: str, search: str) -> str:
|
||||
try:
|
||||
return (
|
||||
data.split(search)[1]
|
||||
.split("\n")[0]
|
||||
.strip()
|
||||
.split("=")[1]
|
||||
.strip()
|
||||
.replace("{", "")
|
||||
.replace("}", "")
|
||||
.replace(",", "")
|
||||
.replace("[", "")
|
||||
.replace("];", "")
|
||||
)
|
||||
except:
|
||||
return ""
|
||||
|
||||
return BookData(
|
||||
ppn=None,
|
||||
title=_get_line(data, "title"),
|
||||
signature=_get_line(data, "bestand"),
|
||||
edition=_get_line(data, "edition"),
|
||||
isbn=_get_line(data, "isbn"),
|
||||
author=";".join(_get_line(data, "author").split(" and ")),
|
||||
language=_get_line(data, "language"),
|
||||
publisher=_get_line(data, "publisher"),
|
||||
year=_get_line(data, "year"),
|
||||
pages=_get_line(data, "pages"),
|
||||
)
|
||||
|
||||
|
||||
class RDSData:
|
||||
def __init__(self):
|
||||
self.retlist = []
|
||||
|
||||
def transform(self, data: str):
|
||||
# rds_availability = RDS_AVAIL_DATA()
|
||||
# rds_data = RDS_GENERIC_DATA()
|
||||
print(data)
|
||||
|
||||
def __get_raw_data(data: str) -> list:
|
||||
# create base data to be turned into pydantic classes
|
||||
data = data.split("RDS ----------------------------------")[1]
|
||||
edata = data.strip()
|
||||
edata = edata.split("\n", 9)[9]
|
||||
edata = edata.split("\n")[1:]
|
||||
entry_1 = edata[0]
|
||||
edata = edata[1:]
|
||||
entry_2 = "".join(edata)
|
||||
edata = []
|
||||
edata.append(entry_1)
|
||||
edata.append(entry_2)
|
||||
return edata
|
||||
|
||||
ret_data = __get_raw_data(data)
|
||||
# assign data[1] to RDS_AVAIL_DATA
|
||||
# assign data[0] to RDS_DATA
|
||||
self.rds_data = RDS_GENERIC_DATA().import_from_dict(ret_data[1])
|
||||
self.rds_availability = RDS_AVAIL_DATA().import_from_dict(ret_data[0])
|
||||
self.retlist.append(self.rds_availability)
|
||||
self.retlist.append(self.rds_data)
|
||||
return self
|
||||
|
||||
def return_data(self, option=None):
|
||||
if option == "rds_availability":
|
||||
return self.retlist[0]
|
||||
elif option == "rds_data":
|
||||
return self.retlist[1]
|
||||
else:
|
||||
return {"rds_availability": self.retlist[0], "rds_data": self.retlist[1]}
|
||||
|
||||
|
||||
class DictToTable:
|
||||
def __init__(self):
|
||||
self.work_author = None
|
||||
self.section_author = None
|
||||
self.year = None
|
||||
self.edition = None
|
||||
self.work_title = None
|
||||
self.chapter_title = None
|
||||
self.location = None
|
||||
self.publisher = None
|
||||
self.signature = None
|
||||
self.type = None
|
||||
self.pages = None
|
||||
self.issue = None
|
||||
self.isbn = None
|
||||
|
||||
def makeResult(self):
|
||||
data = {
|
||||
"work_author": self.work_author,
|
||||
"section_author": self.section_author,
|
||||
"year": self.year,
|
||||
"edition": self.edition,
|
||||
"work_title": self.work_title,
|
||||
"chapter_title": self.chapter_title,
|
||||
"location": self.location,
|
||||
"publisher": self.publisher,
|
||||
"signature": self.signature,
|
||||
"issue": self.issue,
|
||||
"pages": self.pages,
|
||||
"isbn": self.isbn,
|
||||
"type": self.type,
|
||||
}
|
||||
data = {k: v for k, v in data.items() if v is not None}
|
||||
return data
|
||||
|
||||
def reset(self):
|
||||
for key in self.__dict__:
|
||||
setattr(self, key, None)
|
||||
|
||||
def transform(self, data: dict):
|
||||
mode = data["mode"]
|
||||
self.reset()
|
||||
if mode == "book":
|
||||
return self.book_assign(data)
|
||||
elif mode == "hg":
|
||||
return self.hg_assign(data)
|
||||
elif mode == "zs":
|
||||
return self.zs_assign(data)
|
||||
else:
|
||||
return None
|
||||
|
||||
def book_assign(self, data):
|
||||
self.type = "book"
|
||||
self.work_author = data["book_author"]
|
||||
self.signature = data["book_signature"]
|
||||
self.location = data["book_place"]
|
||||
self.year = data["book_year"]
|
||||
self.work_title = data["book_title"]
|
||||
self.edition = data["book_edition"]
|
||||
self.pages = data["book_pages"]
|
||||
self.publisher = data["book_publisher"]
|
||||
self.isbn = data["book_isbn"]
|
||||
return self.makeResult()
|
||||
|
||||
def hg_assign(self, data):
|
||||
self.type = "hg"
|
||||
self.section_author = data["hg_author"]
|
||||
self.work_author = data["hg_editor"]
|
||||
self.year = data["hg_year"]
|
||||
self.work_title = data["hg_title"]
|
||||
self.publisher = data["hg_publisher"]
|
||||
self.location = data["hg_place"]
|
||||
self.edition = data["hg_edition"]
|
||||
self.chapter_title = data["hg_chaptertitle"]
|
||||
self.pages = data["hg_pages"]
|
||||
self.signature = data["hg_signature"]
|
||||
self.isbn = data["hg_isbn"]
|
||||
return self.makeResult()
|
||||
|
||||
def zs_assign(self, data):
|
||||
self.type = "zs"
|
||||
self.section_author = data["zs_author"]
|
||||
self.chapter_title = data["zs_chapter_title"]
|
||||
self.location = data["zs_place"]
|
||||
self.issue = data["zs_issue"]
|
||||
self.pages = data["zs_pages"]
|
||||
self.publisher = data["zs_publisher"]
|
||||
self.isbn = data["zs_isbn"]
|
||||
|
||||
self.year = data["zs_year"]
|
||||
self.signature = data["zs_signature"]
|
||||
self.work_title = data["zs_title"]
|
||||
return self.makeResult()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
with open("daiadata", "r") as f:
|
||||
data = f.read()
|
||||
|
||||
ret = RDSData().transform(data)
|
||||
data = ret.return_data("rds_availability")
|
||||
# log.debug(data)
|
||||
373
src/parsers/word_parser.py
Normal file
373
src/parsers/word_parser.py
Normal file
@@ -0,0 +1,373 @@
|
||||
import zipfile
|
||||
from typing import Any, Optional
|
||||
|
||||
import fitz # PyMuPDF
|
||||
import pandas as pd
|
||||
from bs4 import BeautifulSoup
|
||||
from docx import Document
|
||||
|
||||
from src.core.models import Book, SemapDocument
|
||||
from src.shared.logging import log
|
||||
|
||||
|
||||
def word_docx_to_csv(path: str) -> list[pd.DataFrame]:
|
||||
doc = Document(path)
|
||||
tables = doc.tables
|
||||
m_data = []
|
||||
for table in tables:
|
||||
data = []
|
||||
for row in table.rows:
|
||||
row_data: list[Any] = []
|
||||
for cell in row.cells:
|
||||
text = cell.text
|
||||
|
||||
text = text.replace("\n", "")
|
||||
row_data.append(text)
|
||||
# if text == "Ihr Fach:":
|
||||
# row_data.append(get_fach(path))
|
||||
data.append(row_data)
|
||||
df = pd.DataFrame(data)
|
||||
df.columns = df.iloc[0]
|
||||
df = df.iloc[1:]
|
||||
|
||||
m_data.append(df)
|
||||
|
||||
return m_data
|
||||
|
||||
|
||||
def get_fach(path: str) -> Optional[str]:
|
||||
document = zipfile.ZipFile(path)
|
||||
xml_data = document.read("word/document.xml")
|
||||
document.close()
|
||||
|
||||
soup = BeautifulSoup(xml_data, "xml")
|
||||
# text we need is in <w:p w14:paraId="12456A32" ... > -> w:r -> w:t
|
||||
paragraphs = soup.find_all("w:p")
|
||||
for para in paragraphs:
|
||||
para_id = para.get("w14:paraId")
|
||||
if para_id == "12456A32":
|
||||
# get the data in the w:t
|
||||
for run in para.find_all("w:r"):
|
||||
data = run.find("w:t")
|
||||
if data and data.contents:
|
||||
return data.contents[0]
|
||||
return None
|
||||
|
||||
|
||||
def makeDict() -> dict[str, Optional[str]]:
|
||||
return {
|
||||
"work_author": None,
|
||||
"section_author": None,
|
||||
"year": None,
|
||||
"edition": None,
|
||||
"work_title": None,
|
||||
"chapter_title": None,
|
||||
"location": None,
|
||||
"publisher": None,
|
||||
"signature": None,
|
||||
"issue": None,
|
||||
"pages": None,
|
||||
"isbn": None,
|
||||
"type": None,
|
||||
}
|
||||
|
||||
|
||||
def tuple_to_dict(tlist: tuple, type: str) -> list[dict[str, Optional[str]]]:
|
||||
ret: list[dict[str, Optional[str]]] = []
|
||||
for line in tlist:
|
||||
data = makeDict()
|
||||
if type == "Monografien":
|
||||
data["type"] = type
|
||||
data["work_author"] = line[0]
|
||||
data["year"] = line[1]
|
||||
data["edition"] = line[2]
|
||||
data["work_title"] = line[3]
|
||||
data["location"] = line[4]
|
||||
data["publisher"] = line[5]
|
||||
data["signature"] = line[6]
|
||||
data["pages"] = line[7]
|
||||
elif type == "Herausgeberwerke":
|
||||
data["type"] = type
|
||||
data["section_author"] = line[0]
|
||||
data["year"] = line[1]
|
||||
data["edition"] = line[2]
|
||||
data["chapter_title"] = line[3]
|
||||
data["work_author"] = line[4]
|
||||
data["work_title"] = line[5]
|
||||
data["location"] = line[6]
|
||||
data["publisher"] = line[7]
|
||||
data["signature"] = line[9]
|
||||
data["pages"] = line[8]
|
||||
elif type == "Zeitschriftenaufsätze":
|
||||
data["type"] = type
|
||||
data["section_author"] = line[0]
|
||||
data["year"] = line[1]
|
||||
data["issue"] = line[2]
|
||||
data["chapter_title"] = line[3]
|
||||
data["work_title"] = line[4]
|
||||
data["location"] = line[5]
|
||||
data["publisher"] = line[6]
|
||||
data["signature"] = line[8]
|
||||
data["pages"] = line[7]
|
||||
ret.append(data)
|
||||
return ret
|
||||
|
||||
|
||||
def elsa_word_to_csv(path: str) -> tuple[list[dict[str, Optional[str]]], str]:
|
||||
doc = Document(path)
|
||||
# # print all lines in doc
|
||||
doctype = [para.text for para in doc.paragraphs if para.text != ""][-1]
|
||||
tuples = {
|
||||
"Monografien": ("", "", "", "", "", "", "", "", ""),
|
||||
"Herausgeberwerke": ("", "", "", "", "", "", "", "", "", "", ""),
|
||||
"Zeitschriftenaufsätze": ("", "", "", "", "", "", "", "", "", ""),
|
||||
}
|
||||
tables = doc.tables
|
||||
|
||||
m_data: list[pd.DataFrame] = []
|
||||
for table in tables:
|
||||
data: list[list[str]] = []
|
||||
for row in table.rows:
|
||||
row_data: list[str] = []
|
||||
for cell in row.cells:
|
||||
text = cell.text
|
||||
text = text.replace("\n", "")
|
||||
text = text.replace("\u2002", "")
|
||||
row_data.append(text)
|
||||
data.append(row_data)
|
||||
df = pd.DataFrame(data)
|
||||
df.columns = df.iloc[0]
|
||||
df = df.iloc[1:]
|
||||
m_data.append(df)
|
||||
df = m_data[0]
|
||||
# split df to rows
|
||||
data = [
|
||||
row for row in df.itertuples(index=False, name=None) if row != tuples[doctype]
|
||||
]
|
||||
# log.debug(data)
|
||||
return tuple_to_dict(data, doctype), doctype
|
||||
|
||||
|
||||
def word_to_semap(word_path: str, ai: bool = True) -> SemapDocument:
|
||||
log.info("Parsing Word Document {}", word_path)
|
||||
semap = SemapDocument()
|
||||
df = word_docx_to_csv(word_path)
|
||||
apparatdata = df[0]
|
||||
apparatdata = apparatdata.to_dict()
|
||||
keys = list(apparatdata.keys())
|
||||
# print(apparatdata, keys)
|
||||
|
||||
appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys) - 1, 2)}
|
||||
semap.phoneNumber = appdata["Telefon:"]
|
||||
semap.subject = appdata["Ihr Fach:"]
|
||||
semap.mail = appdata["Mailadresse:"]
|
||||
semap.personName = ",".join(appdata["Ihr Name und Titel:"].split(",")[:-1])
|
||||
semap.personTitle = ",".join(appdata["Ihr Name und Titel:"].split(",")[-1:]).strip()
|
||||
apparatdata = df[1]
|
||||
apparatdata = apparatdata.to_dict()
|
||||
keys = list(apparatdata.keys())
|
||||
appdata = {keys[i]: keys[i + 1] for i in range(0, len(keys), 2)}
|
||||
semap.title = appdata["Veranstaltung:"]
|
||||
semap.semester = appdata["Semester:"]
|
||||
if ai:
|
||||
semap.renameSemester
|
||||
semap.nameSetter
|
||||
|
||||
books = df[2]
|
||||
booklist = []
|
||||
for i in range(len(books)):
|
||||
if books.iloc[i].isnull().all():
|
||||
continue
|
||||
data = books.iloc[i].to_dict()
|
||||
book = Book()
|
||||
book.from_dict(data)
|
||||
if book.is_empty:
|
||||
continue
|
||||
elif not book.has_signature:
|
||||
continue
|
||||
else:
|
||||
booklist.append(book)
|
||||
log.info("Found {} books", len(booklist))
|
||||
semap.books = booklist
|
||||
return semap
|
||||
|
||||
|
||||
def pdf_to_semap(pdf_path: str, ai: bool = True) -> SemapDocument:
|
||||
"""
|
||||
Parse a Semesterapparat PDF like the sample you provided and return a SemapDocument.
|
||||
- No external programs, only PyMuPDF.
|
||||
- Robust to multi-line field values (e.g., hyphenated emails) and multi-line table cells.
|
||||
- Works across multiple pages; headers only need to exist on the first page.
|
||||
"""
|
||||
doc = fitz.open(pdf_path)
|
||||
semap = SemapDocument()
|
||||
|
||||
# ---------- helpers ----------
|
||||
def _join_tokens(tokens: list[str]) -> str:
|
||||
"""Join tokens, preserving hyphen/URL joins across line wraps."""
|
||||
parts = []
|
||||
for tok in tokens:
|
||||
if parts and (
|
||||
parts[-1].endswith("-")
|
||||
or parts[-1].endswith("/")
|
||||
or parts[-1].endswith(":")
|
||||
):
|
||||
parts[-1] = parts[-1] + tok # no space after '-', '/' or ':'
|
||||
else:
|
||||
parts.append(tok)
|
||||
return " ".join(parts).strip()
|
||||
|
||||
def _extract_row_values_multiline(
|
||||
page, labels: list[str], y_window: float = 24
|
||||
) -> dict[str, str]:
|
||||
"""For a row of inline labels (e.g., Name/Fach/Telefon/Mail), grab text to the right of each label."""
|
||||
rects = []
|
||||
for lab in labels:
|
||||
hits = page.search_for(lab)
|
||||
if hits:
|
||||
rects.append((lab, hits[0]))
|
||||
if not rects:
|
||||
return {}
|
||||
|
||||
rects.sort(key=lambda t: t[1].x0)
|
||||
words = page.get_text("words")
|
||||
out = {}
|
||||
for i, (lab, r) in enumerate(rects):
|
||||
x0 = r.x1 + 1
|
||||
x1 = rects[i + 1][1].x0 - 1 if i + 1 < len(rects) else page.rect.width - 5
|
||||
y0 = r.y0 - 3
|
||||
y1 = r.y0 + y_window
|
||||
toks = [w for w in words if x0 <= w[0] <= x1 and y0 <= w[1] <= y1]
|
||||
toks.sort(key=lambda w: (w[1], w[0])) # line, then x
|
||||
out[lab] = _join_tokens([w[4] for w in toks])
|
||||
return out
|
||||
|
||||
def _compute_columns_from_headers(page0):
|
||||
"""Find column headers (once) and derive column centers + header baseline."""
|
||||
headers = [
|
||||
("Autorenname(n):", "Autorenname(n):Nachname, Vorname"),
|
||||
("Jahr/Auflage", "Jahr/Auflage"),
|
||||
("Titel", "Titel"),
|
||||
("Ort und Verlag", "Ort und Verlag"),
|
||||
("Standnummer", "Standnummer"),
|
||||
("Interne Vermerke", "Interne Vermerke"),
|
||||
]
|
||||
found = []
|
||||
for label, canon in headers:
|
||||
rects = [
|
||||
r for r in page0.search_for(label) if r.y0 > 200
|
||||
] # skip top-of-form duplicates
|
||||
if rects:
|
||||
found.append((canon, rects[0]))
|
||||
found.sort(key=lambda t: t[1].x0)
|
||||
cols = [(canon, r.x0, r.x1, (r.x0 + r.x1) / 2.0) for canon, r in found]
|
||||
header_y = min(r.y0 for _, r in found) if found else 0
|
||||
return cols, header_y
|
||||
|
||||
def _extract_table_rows_from_page(
|
||||
page, cols, header_y, y_top_margin=5, y_bottom_margin=40, y_tol=26.0
|
||||
):
|
||||
"""
|
||||
Group words into logical rows (tolerant to wrapped lines), then map each word
|
||||
to the nearest column by x-center and join tokens per column.
|
||||
"""
|
||||
words = [
|
||||
w
|
||||
for w in page.get_text("words")
|
||||
if w[1] > header_y + y_top_margin
|
||||
and w[3] < page.rect.height - y_bottom_margin
|
||||
]
|
||||
|
||||
# group into row bands by y (tolerance big enough to capture wrapped lines, but below next row gap)
|
||||
rows = []
|
||||
for w in sorted(words, key=lambda w: w[1]):
|
||||
y = w[1]
|
||||
for row in rows:
|
||||
if abs(row["y_mean"] - y) <= y_tol:
|
||||
row["ys"].append(y)
|
||||
row["y_mean"] = sum(row["ys"]) / len(row["ys"])
|
||||
row["words"].append(w)
|
||||
break
|
||||
else:
|
||||
rows.append({"y_mean": y, "ys": [y], "words": [w]})
|
||||
|
||||
# map to columns + join
|
||||
joined_rows = []
|
||||
for row in rows:
|
||||
rowdict = {canon: "" for canon, *_ in cols}
|
||||
words_by_col = {canon: [] for canon, *_ in cols}
|
||||
for w in sorted(row["words"], key=lambda w: (w[1], w[0])):
|
||||
xmid = (w[0] + w[2]) / 2.0
|
||||
canon = min(cols, key=lambda c: abs(xmid - c[3]))[0]
|
||||
words_by_col[canon].append(w[4])
|
||||
for canon, toks in words_by_col.items():
|
||||
rowdict[canon] = _join_tokens(toks)
|
||||
if any(v for v in rowdict.values()):
|
||||
joined_rows.append(rowdict)
|
||||
return joined_rows
|
||||
|
||||
# ---------- top-of-form fields ----------
|
||||
p0 = doc[0]
|
||||
row1 = _extract_row_values_multiline(
|
||||
p0,
|
||||
["Ihr Name und Titel:", "Ihr Fach:", "Telefon:", "Mailadresse:"],
|
||||
y_window=22,
|
||||
)
|
||||
row2 = _extract_row_values_multiline(
|
||||
p0, ["Veranstaltung:", "Semester:"], y_window=20
|
||||
)
|
||||
|
||||
name_title = row1.get("Ihr Name und Titel:", "") or ""
|
||||
semap.subject = row1.get("Ihr Fach:", None)
|
||||
semap.phoneNumber = row1.get("Telefon:", None) # keep as-is (string like "682-308")
|
||||
semap.mail = row1.get("Mailadresse:", None)
|
||||
semap.personName = ",".join(name_title.split(",")[:-1]) if name_title else None
|
||||
semap.personTitle = (
|
||||
",".join(name_title.split(",")[-1:]).strip() if name_title else None
|
||||
)
|
||||
|
||||
semap.title = row2.get("Veranstaltung:", None)
|
||||
semap.semester = row2.get("Semester:", None)
|
||||
|
||||
# ---------- table extraction (all pages) ----------
|
||||
cols, header_y = _compute_columns_from_headers(p0)
|
||||
all_rows: list[dict[str, Any]] = []
|
||||
for pn in range(len(doc)):
|
||||
all_rows.extend(_extract_table_rows_from_page(doc[pn], cols, header_y))
|
||||
|
||||
# drop the sub-header line "Nachname, Vorname" etc.
|
||||
filtered = []
|
||||
for r in all_rows:
|
||||
if r.get("Autorenname(n):Nachname, Vorname", "").strip() in (
|
||||
"",
|
||||
"Nachname, Vorname",
|
||||
):
|
||||
# skip if it's just the sub-header line
|
||||
if all(not r[c] for c in r if c != "Autorenname(n):Nachname, Vorname"):
|
||||
continue
|
||||
filtered.append(r)
|
||||
|
||||
# build Book objects (same filters as your word parser)
|
||||
booklist: list[Book] = []
|
||||
for row in filtered:
|
||||
b = Book()
|
||||
b.from_dict(row)
|
||||
if b.is_empty:
|
||||
continue
|
||||
if not b.has_signature:
|
||||
continue
|
||||
booklist.append(b)
|
||||
|
||||
semap.books = booklist
|
||||
|
||||
# keep parity with your post-processing
|
||||
if ai:
|
||||
_ = semap.renameSemester
|
||||
_ = semap.nameSetter
|
||||
|
||||
return semap
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
else_df = pdf_to_semap("C:/Users/aky547/Dokumente/testsemap.pdf")
|
||||
# print(else_df)
|
||||
67
src/parsers/xml_parser.py
Normal file
67
src/parsers/xml_parser.py
Normal file
@@ -0,0 +1,67 @@
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from src.core.models import Apparat, BookData, SemapDocument, XMLMailSubmission
|
||||
from src.core.semester import Semester
|
||||
|
||||
|
||||
def parse_xml_submission(xml_string: str) -> XMLMailSubmission:
|
||||
"""
|
||||
Parse an XML string representing a mail submission and return an XMLMailSubmission object.
|
||||
"""
|
||||
submission = XMLMailSubmission()
|
||||
root = ET.fromstring(xml_string)
|
||||
static_data = root.find("static")
|
||||
static_info = {child.tag: child.text for child in static_data}
|
||||
books = root.find("books")
|
||||
books_info = []
|
||||
for book in books:
|
||||
book_details = {detail.tag: detail.text for detail in book}
|
||||
book = BookData(
|
||||
author=book_details.get("authorname"),
|
||||
year=book_details.get("year").split("/")[0]
|
||||
if "/" in book_details.get("year")
|
||||
else book_details.get("year"),
|
||||
edition=book_details.get("year").split("/")[1]
|
||||
if "/" in book_details.get("year")
|
||||
else None,
|
||||
title=book_details.get("title"),
|
||||
signature=book_details.get("signature"),
|
||||
)
|
||||
books_info.append(book)
|
||||
# Extract static data
|
||||
submission.name = static_info.get("name")
|
||||
submission.lastname = static_info.get("lastname")
|
||||
submission.title = static_info.get("title")
|
||||
submission.telno = int(static_info.get("telno"))
|
||||
submission.email = static_info.get("mail")
|
||||
submission.app_name = static_info.get("apparatsname")
|
||||
submission.subject = static_info.get("subject")
|
||||
sem_year = static_info.get("semester").split()[1]
|
||||
sem_term = static_info.get("semester").split()[0]
|
||||
submission.semester = Semester(semester=sem_term, year=int(sem_year))
|
||||
submission.books = books_info
|
||||
# Extract book information
|
||||
# book_info = []
|
||||
# for book in books:
|
||||
# book_details = {detail.tag: detail.text for detail in book}
|
||||
# book_info.append(book_details)
|
||||
return submission
|
||||
|
||||
|
||||
def eml_parser(path: str) -> XMLMailSubmission:
|
||||
with open(path, "r", encoding="utf-8") as file:
|
||||
xml_content = file.read().split("\n\n", 1)[1] # Skip headers
|
||||
print("EML content loaded, parsing XML...")
|
||||
print(xml_content)
|
||||
return parse_xml_submission(xml_content)
|
||||
|
||||
|
||||
def eml_to_semap(xml_mail: XMLMailSubmission) -> SemapDocument:
|
||||
submission = eml_parser(xml_mail)
|
||||
semap_doc = SemapDocument(
|
||||
# prof=Prof(name=submission.name, lastname=submission.lastname, email=submission.email),
|
||||
apparat=Apparat(name=submission.app_name, subject=submission.subject),
|
||||
semester=submission.semester,
|
||||
books=submission.books,
|
||||
)
|
||||
return semap_doc
|
||||
16
src/services/__init__.py
Normal file
16
src/services/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
||||
"""External service integrations and API clients."""
|
||||
|
||||
from .catalogue import Catalogue
|
||||
from .sru import SWB
|
||||
from .lehmanns import LehmannsClient
|
||||
from .zotero import ZoteroController
|
||||
from .webrequest import BibTextTransformer, WebRequest
|
||||
|
||||
__all__ = [
|
||||
"Catalogue",
|
||||
"SWB",
|
||||
"LehmannsClient",
|
||||
"ZoteroController",
|
||||
"BibTextTransformer",
|
||||
"WebRequest",
|
||||
]
|
||||
292
src/services/catalogue.py
Normal file
292
src/services/catalogue.py
Normal file
@@ -0,0 +1,292 @@
|
||||
from typing import List
|
||||
|
||||
import regex
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from src.core.models import BookData as Book
|
||||
from src.shared.logging import log
|
||||
|
||||
URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND"
|
||||
BASE = "https://rds.ibs-bw.de"
|
||||
|
||||
|
||||
class Catalogue:
|
||||
def __init__(self, timeout=15):
|
||||
self.timeout = timeout
|
||||
reachable = self.check_connection()
|
||||
if not reachable:
|
||||
log.error("No internet connection available.")
|
||||
raise ConnectionError("No internet connection available.")
|
||||
|
||||
def check_connection(self):
|
||||
try:
|
||||
response = requests.get("https://www.google.com", timeout=self.timeout)
|
||||
if response.status_code == 200:
|
||||
return True
|
||||
except requests.exceptions.RequestException as e:
|
||||
log.error(f"Could not connect to google.com: {e}")
|
||||
|
||||
def search_book(self, searchterm: str):
|
||||
response = requests.get(URL.format(searchterm), timeout=self.timeout)
|
||||
return response.text
|
||||
|
||||
def search(self, link: str):
|
||||
response = requests.get(link, timeout=self.timeout)
|
||||
return response.text
|
||||
|
||||
def get_book_links(self, searchterm: str) -> List[str]:
|
||||
response = self.search_book(searchterm)
|
||||
soup = BeautifulSoup(response, "html.parser")
|
||||
links = soup.find_all("a", class_="title getFull")
|
||||
res: List[str] = []
|
||||
for link in links:
|
||||
res.append(BASE + link["href"]) # type: ignore
|
||||
return res
|
||||
|
||||
def get_book(self, searchterm: str):
|
||||
log.info(f"Searching for term: {searchterm}")
|
||||
|
||||
links = self.get_book_links(searchterm)
|
||||
print(links)
|
||||
for elink in links:
|
||||
result = self.search(elink)
|
||||
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
||||
# if found, return text of href
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
|
||||
# Optional (unchanged): title and ppn if you need them
|
||||
title_el = soup.find("div", class_="headline text")
|
||||
title = title_el.get_text(strip=True) if title_el else None
|
||||
|
||||
ppn_el = soup.find(
|
||||
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PPN"
|
||||
)
|
||||
# in ppn_el, get text of div col-xs-12 col-md-7 col-lg-8 rds-dl-panel
|
||||
ppn = (
|
||||
ppn_el.find_next_sibling(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
).get_text(strip=True)
|
||||
if ppn_el
|
||||
else None
|
||||
)
|
||||
|
||||
# get edition text at div class col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_EDITION
|
||||
edition_el = soup.find(
|
||||
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_EDITION"
|
||||
)
|
||||
edition = (
|
||||
edition_el.find_next_sibling(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
).get_text(strip=True)
|
||||
if edition_el
|
||||
else None
|
||||
)
|
||||
|
||||
authors = soup.find_all(
|
||||
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON"
|
||||
)
|
||||
author = None
|
||||
if authors:
|
||||
# get the names of the a href links in the div col-xs-12 col-md-7 col-lg-8 rds-dl-panel
|
||||
author_names = []
|
||||
for author in authors:
|
||||
panel = author.find_next_sibling(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
)
|
||||
if panel:
|
||||
links = panel.find_all("a")
|
||||
for link in links:
|
||||
author_names.append(link.text.strip())
|
||||
author = (
|
||||
";".join(author_names) if len(author_names) > 1 else author_names[0]
|
||||
)
|
||||
signature = None
|
||||
|
||||
panel = soup.select_one("div.panel-body")
|
||||
if panel:
|
||||
# Collect the RDS_* blocks in order, using the 'space' divs as separators
|
||||
groups = []
|
||||
cur = {}
|
||||
for node in panel.select(
|
||||
"div.rds-dl.RDS_SIGNATURE, div.rds-dl.RDS_STATUS, div.rds-dl.RDS_LOCATION, div.col-xs-12.space"
|
||||
):
|
||||
classes = node.get("class", [])
|
||||
# Separator between entries
|
||||
if "space" in classes:
|
||||
if cur:
|
||||
groups.append(cur)
|
||||
cur = {}
|
||||
continue
|
||||
|
||||
# Read the value from the corresponding panel cell
|
||||
val_el = node.select_one(".rds-dl-panel")
|
||||
val = (
|
||||
val_el.get_text(" ", strip=True)
|
||||
if val_el
|
||||
else node.get_text(" ", strip=True)
|
||||
)
|
||||
|
||||
if "RDS_SIGNATURE" in classes:
|
||||
cur["signature"] = val
|
||||
elif "RDS_STATUS" in classes:
|
||||
cur["status"] = val
|
||||
elif "RDS_LOCATION" in classes:
|
||||
cur["location"] = val
|
||||
|
||||
if cur: # append the last group if not followed by a space
|
||||
groups.append(cur)
|
||||
|
||||
# Find the signature for the entry whose location mentions "Semesterapparat"
|
||||
for g in groups:
|
||||
loc = g.get("location", "").lower()
|
||||
if "semesterapparat" in loc:
|
||||
signature = g.get("signature")
|
||||
return Book(
|
||||
title=title,
|
||||
ppn=ppn,
|
||||
signature=signature,
|
||||
library_location=loc.split("-")[-1],
|
||||
link=elink,
|
||||
author=author,
|
||||
edition=edition,
|
||||
)
|
||||
else:
|
||||
return Book(
|
||||
title=title,
|
||||
ppn=ppn,
|
||||
signature=signature,
|
||||
library_location=loc.split("\n\n")[-1],
|
||||
link=elink,
|
||||
author=author,
|
||||
edition=edition,
|
||||
)
|
||||
|
||||
def get(self, ppn: str) -> Book | None:
|
||||
# based on PPN, get title, people, edition, year, language, pages, isbn,
|
||||
link = f"https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{ppn}"
|
||||
result = self.search(link)
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
|
||||
def get_ppn(self, searchterm: str) -> str | None:
|
||||
links = self.get_book_links(searchterm)
|
||||
ppn = None
|
||||
for link in links:
|
||||
result = self.search(link)
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
print(link)
|
||||
ppn = link.split("/")[-1]
|
||||
if ppn and regex.match(r"^\d{8,10}[X\d]?$", ppn):
|
||||
return ppn
|
||||
return ppn
|
||||
|
||||
def get_semesterapparat_number(self, searchterm: str) -> int:
|
||||
links = self.get_book_links(searchterm)
|
||||
for link in links:
|
||||
result = self.search(link)
|
||||
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
||||
# if found, return text of href
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
|
||||
locations = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION")
|
||||
for location_el in locations:
|
||||
if "Semesterapparat-" in location_el.text:
|
||||
match = regex.search(r"Semesterapparat-(\d+)", location_el.text)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
if "Handbibliothek-" in location_el.text:
|
||||
return location_el.text.strip().split("\n\n")[-1].strip()
|
||||
return location_el.text.strip().split("\n\n")[-1].strip()
|
||||
return 0
|
||||
|
||||
def get_author(self, link: str) -> str:
|
||||
links = self.get_book_links(f"kid:{link}")
|
||||
author = None
|
||||
for link in links:
|
||||
# print(link)
|
||||
result = self.search(link)
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
# get all authors, return them as a string seperated by ;
|
||||
authors = soup.find_all(
|
||||
"div", class_="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON"
|
||||
)
|
||||
if authors:
|
||||
# get the names of the a href links in the div col-xs-12 col-md-7 col-lg-8 rds-dl-panel
|
||||
author_names = []
|
||||
for author in authors:
|
||||
panel = author.find_next_sibling(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
)
|
||||
if panel:
|
||||
links = panel.find_all("a")
|
||||
for link in links:
|
||||
author_names.append(link.text.strip())
|
||||
author = "; ".join(author_names)
|
||||
return author
|
||||
|
||||
def get_signature(self, isbn: str):
|
||||
links = self.get_book_links(f"{isbn}")
|
||||
signature = None
|
||||
for link in links:
|
||||
result = self.search(link)
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
panel = soup.select_one("div.panel-body")
|
||||
if panel:
|
||||
# Collect the RDS_* blocks in order, using the 'space' divs as separators
|
||||
groups = []
|
||||
cur = {}
|
||||
for node in panel.select(
|
||||
"div.rds-dl.RDS_SIGNATURE, div.rds-dl.RDS_STATUS, div.rds-dl.RDS_LOCATION, div.col-xs-12.space"
|
||||
):
|
||||
classes = node.get("class", [])
|
||||
# Separator between entries
|
||||
if "space" in classes:
|
||||
if cur:
|
||||
groups.append(cur)
|
||||
cur = {}
|
||||
continue
|
||||
|
||||
# Read the value from the corresponding panel cell
|
||||
val_el = node.select_one(".rds-dl-panel")
|
||||
val = (
|
||||
val_el.get_text(" ", strip=True)
|
||||
if val_el
|
||||
else node.get_text(" ", strip=True)
|
||||
)
|
||||
|
||||
if "RDS_SIGNATURE" in classes:
|
||||
cur["signature"] = val
|
||||
elif "RDS_STATUS" in classes:
|
||||
cur["status"] = val
|
||||
elif "RDS_LOCATION" in classes:
|
||||
cur["location"] = val
|
||||
|
||||
if cur: # append the last group if not followed by a space
|
||||
groups.append(cur)
|
||||
|
||||
# Find the signature for the entry whose location mentions "Semesterapparat"
|
||||
for g in groups:
|
||||
print(g)
|
||||
loc = g.get("location", "").lower()
|
||||
if "semesterapparat" in loc:
|
||||
signature = g.get("signature")
|
||||
return signature
|
||||
else:
|
||||
signature = g.get("signature")
|
||||
return signature
|
||||
print("No signature found")
|
||||
return signature
|
||||
|
||||
def in_library(self, ppn: str) -> bool:
|
||||
if ppn is None:
|
||||
return False
|
||||
links = self.get_book_links(f"kid:{ppn}")
|
||||
return len(links) > 0
|
||||
|
||||
def get_location(self, ppn: str) -> str | None:
|
||||
if ppn is None:
|
||||
return None
|
||||
link = self.get_book(f"{ppn}")
|
||||
if link is None:
|
||||
return None
|
||||
return link.library_location
|
||||
312
src/services/lehmanns.py
Normal file
312
src/services/lehmanns.py
Normal file
@@ -0,0 +1,312 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from typing import Iterable, List, Optional
|
||||
from urllib.parse import quote_plus, urljoin
|
||||
|
||||
import httpx
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from src.core.models import BookData
|
||||
|
||||
BASE = "https://www.lehmanns.de"
|
||||
SEARCH_URL = "https://www.lehmanns.de/search/quick?mediatype_id=&q="
|
||||
|
||||
|
||||
@dataclass
|
||||
class LehmannsSearchResult:
|
||||
title: str
|
||||
url: str
|
||||
|
||||
# Core fields from the listing card
|
||||
year: Optional[int] = None
|
||||
edition: Optional[int] = None
|
||||
publisher: Optional[str] = None
|
||||
isbn13: Optional[str] = None
|
||||
|
||||
# Extras from the listing card
|
||||
description: Optional[str] = None
|
||||
authors: list[str] = field(default_factory=list)
|
||||
media_type: Optional[str] = None
|
||||
book_format: Optional[str] = None
|
||||
price_eur: Optional[float] = None
|
||||
currency: str = "EUR"
|
||||
image: Optional[str] = None
|
||||
|
||||
# From detail page:
|
||||
pages: Optional[str] = None # "<N> Seiten"
|
||||
buyable: bool = True # set in enrich_pages (detail page)
|
||||
unavailable_hint: Optional[str] = (
|
||||
None # e.g. "Titel ist leider vergriffen; keine Neuauflage"
|
||||
)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return asdict(self)
|
||||
|
||||
|
||||
class LehmannsClient:
|
||||
"""Scrapes quick-search results, then enriches (and filters) via product pages."""
|
||||
|
||||
def __init__(self, timeout: float = 20.0):
|
||||
self.client = httpx.Client(
|
||||
headers={
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/124.0 Safari/537.36"
|
||||
),
|
||||
"Accept-Language": "de-DE,de;q=0.9,en;q=0.8",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
},
|
||||
timeout=timeout,
|
||||
follow_redirects=True,
|
||||
)
|
||||
|
||||
def close(self):
|
||||
self.client.close()
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *exc):
|
||||
self.close()
|
||||
|
||||
# ------------------- Search (listing) -------------------
|
||||
|
||||
def build_search_url(self, title: str) -> str:
|
||||
# spaces -> '+'
|
||||
return SEARCH_URL + quote_plus(title)
|
||||
|
||||
def search_by_title(
|
||||
self,
|
||||
title: str,
|
||||
limit: Optional[int] = None,
|
||||
strict: bool = False,
|
||||
only_latest: bool = True,
|
||||
) -> List[BookData]:
|
||||
"""
|
||||
Parse the listing page only (no availability check here).
|
||||
Use enrich_pages(...) afterwards to fetch detail pages, add 'pages',
|
||||
and drop unbuyable items.
|
||||
"""
|
||||
url = self.build_search_url(title=title)
|
||||
html = self._get(url)
|
||||
if not html:
|
||||
return []
|
||||
results = self._parse_results(html)
|
||||
self.enrich_pages(results)
|
||||
|
||||
results = [BookData().from_LehmannsSearchResult(r) for r in results]
|
||||
if strict:
|
||||
# filter results to only those with exact title match (case-insensitive)
|
||||
title_lower = title.lower()
|
||||
results = [r for r in results if r.title and r.title.lower() == title_lower]
|
||||
# results = [r for r in results if r.buyable]
|
||||
return results
|
||||
if limit is not None:
|
||||
results = results[: max(0, limit)]
|
||||
if only_latest and len(results) > 1:
|
||||
# keep only the latest edition (highest edition number)
|
||||
results.sort(key=lambda r: (r.edition_number or 0), reverse=True)
|
||||
results = [results[0]]
|
||||
return results
|
||||
|
||||
# ------------------- Detail enrichment & filtering -------------------
|
||||
|
||||
def enrich_pages(
|
||||
self, results: Iterable[LehmannsSearchResult], drop_unbuyable: bool = True
|
||||
) -> List[LehmannsSearchResult]:
|
||||
"""
|
||||
Fetch each result.url, extract:
|
||||
- pages: from <span class="book-meta meta-seiten" itemprop="numberOfPages">...</span>
|
||||
- availability: from <li class="availability-3">...</li>
|
||||
* if it contains "Titel ist leider vergriffen", mark buyable=False
|
||||
* if it also contains "keine Neuauflage", set unavailable_hint accordingly
|
||||
If drop_unbuyable=True, exclude non-buyable results from the returned list.
|
||||
"""
|
||||
enriched: List[LehmannsSearchResult] = []
|
||||
for r in results:
|
||||
try:
|
||||
html = self._get(r.url)
|
||||
if not html:
|
||||
# Can't verify; keep as-is when not dropping, else skip
|
||||
if not drop_unbuyable:
|
||||
enriched.append(r)
|
||||
continue
|
||||
|
||||
soup = BeautifulSoup(html, "html.parser") # type: ignore
|
||||
|
||||
# Pages
|
||||
pages_node = soup.select_one( # type: ignore
|
||||
"span.book-meta.meta-seiten[itemprop='numberOfPages'], "
|
||||
"span.book-meta.meta-seiten[itemprop='numberofpages'], "
|
||||
".meta-seiten [itemprop='numberOfPages'], "
|
||||
".meta-seiten[itemprop='numberOfPages'], "
|
||||
".book-meta.meta-seiten"
|
||||
)
|
||||
if pages_node:
|
||||
text = pages_node.get_text(" ", strip=True)
|
||||
m = re.search(r"\d+", text)
|
||||
if m:
|
||||
r.pages = f"{m.group(0)} Seiten"
|
||||
|
||||
# Availability via li.availability-3
|
||||
avail_li = soup.select_one("li.availability-3") # type: ignore
|
||||
if avail_li:
|
||||
avail_text = " ".join(
|
||||
avail_li.get_text(" ", strip=True).split()
|
||||
).lower()
|
||||
if "titel ist leider vergriffen" in avail_text:
|
||||
r.buyable = False
|
||||
if "keine neuauflage" in avail_text:
|
||||
r.unavailable_hint = (
|
||||
"Titel ist leider vergriffen; keine Neuauflage"
|
||||
)
|
||||
else:
|
||||
r.unavailable_hint = "Titel ist leider vergriffen"
|
||||
|
||||
# Append or drop
|
||||
if (not drop_unbuyable) or r.buyable:
|
||||
enriched.append(r)
|
||||
|
||||
except Exception:
|
||||
# On any per-item error, keep the record if not dropping; else skip
|
||||
if not drop_unbuyable:
|
||||
enriched.append(r)
|
||||
continue
|
||||
|
||||
return enriched
|
||||
|
||||
# ------------------- Internals -------------------
|
||||
|
||||
def _get(self, url: str) -> Optional[str]:
|
||||
try:
|
||||
r = self.client.get(url)
|
||||
r.encoding = "utf-8"
|
||||
if r.status_code == 200 and "text/html" in (
|
||||
r.headers.get("content-type") or ""
|
||||
):
|
||||
return r.text
|
||||
except httpx.HTTPError:
|
||||
pass
|
||||
return None
|
||||
|
||||
def _parse_results(self, html: str) -> List[LehmannsSearchResult]:
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
results: list[LehmannsSearchResult] = []
|
||||
|
||||
for block in soup.select("div.info-block"):
|
||||
a = block.select_one(".title a[href]")
|
||||
if not a:
|
||||
continue
|
||||
url = urljoin(BASE, a["href"].strip())
|
||||
base_title = (block.select_one(".title [itemprop='name']") or a).get_text( # type: ignore
|
||||
strip=True
|
||||
)
|
||||
|
||||
# Alternative headline => extend title
|
||||
alt_tag = block.select_one(".description[itemprop='alternativeHeadline']") # type: ignore
|
||||
alternative_headline = alt_tag.get_text(strip=True) if alt_tag else None
|
||||
title = (
|
||||
f"{base_title} : {alternative_headline}"
|
||||
if alternative_headline
|
||||
else base_title
|
||||
)
|
||||
description = alternative_headline
|
||||
|
||||
# Authors from .author
|
||||
authors: list[str] = []
|
||||
author_div = block.select_one("div.author") # type: ignore
|
||||
if author_div:
|
||||
t = author_div.get_text(" ", strip=True)
|
||||
t = re.sub(r"^\s*von\s+", "", t, flags=re.I)
|
||||
for part in re.split(r"\s*;\s*|\s*&\s*|\s+und\s+", t):
|
||||
name = " ".join(part.split())
|
||||
if name:
|
||||
authors.append(name)
|
||||
|
||||
# Media + format
|
||||
media_type = None
|
||||
book_format = None
|
||||
type_text = block.select_one(".type") # type: ignore
|
||||
if type_text:
|
||||
t = type_text.get_text(" ", strip=True)
|
||||
m = re.search(r"\b(Buch|eBook|Hörbuch)\b", t)
|
||||
if m:
|
||||
media_type = m.group(1)
|
||||
fm = re.search(r"\(([^)]+)\)", t)
|
||||
if fm:
|
||||
book_format = fm.group(1).strip().upper()
|
||||
|
||||
# Year
|
||||
year = None
|
||||
y = block.select_one("[itemprop='copyrightYear']") # type: ignore
|
||||
if y:
|
||||
try:
|
||||
year = int(y.get_text(strip=True))
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Edition
|
||||
edition = None
|
||||
ed = block.select_one("[itemprop='bookEdition']") # type: ignore
|
||||
if ed:
|
||||
m = re.search(r"\d+", ed.get_text(strip=True))
|
||||
if m:
|
||||
edition = int(m.group())
|
||||
|
||||
# Publisher
|
||||
publisher = None
|
||||
pub = block.select_one( # type: ignore
|
||||
".publisherprop [itemprop='name']"
|
||||
) or block.select_one(".publisher [itemprop='name']") # type: ignore
|
||||
if pub:
|
||||
publisher = pub.get_text(strip=True)
|
||||
|
||||
# ISBN-13
|
||||
isbn13 = None
|
||||
isbn_tag = block.select_one(".isbn [itemprop='isbn'], [itemprop='isbn']") # type: ignore
|
||||
if isbn_tag:
|
||||
digits = re.sub(r"[^0-9Xx]", "", isbn_tag.get_text(strip=True))
|
||||
m = re.search(r"(97[89]\d{10})", digits)
|
||||
if m:
|
||||
isbn13 = m.group(1)
|
||||
|
||||
# Price (best effort)
|
||||
price_eur = None
|
||||
txt = block.get_text(" ", strip=True)
|
||||
mprice = re.search(r"(\d{1,3}(?:\.\d{3})*,\d{2})\s*€", txt)
|
||||
if not mprice and block.parent:
|
||||
sib = block.parent.get_text(" ", strip=True)
|
||||
mprice = re.search(r"(\d{1,3}(?:\.\d{3})*,\d{2})\s*€", sib)
|
||||
if mprice:
|
||||
num = mprice.group(1).replace(".", "").replace(",", ".")
|
||||
try:
|
||||
price_eur = float(num)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Image (best-effort)
|
||||
image = None
|
||||
left_img = block.find_previous("img") # type: ignore
|
||||
if left_img and left_img.get("src"):
|
||||
image = urljoin(BASE, left_img["src"])
|
||||
|
||||
results.append(
|
||||
LehmannsSearchResult(
|
||||
title=title,
|
||||
url=url,
|
||||
description=description,
|
||||
authors=authors,
|
||||
media_type=media_type,
|
||||
book_format=book_format,
|
||||
year=year,
|
||||
edition=edition,
|
||||
publisher=publisher,
|
||||
isbn13=isbn13,
|
||||
price_eur=price_eur,
|
||||
image=image,
|
||||
)
|
||||
)
|
||||
|
||||
return results
|
||||
58
src/services/openai.py
Normal file
58
src/services/openai.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
from src import settings
|
||||
|
||||
|
||||
def init_client() -> OpenAI:
|
||||
"""Initialize the OpenAI client with the API key and model from settings."""
|
||||
global client, model, api_key
|
||||
if not settings.openAI.api_key:
|
||||
raise ValueError("OpenAI API key is not set in the configuration.")
|
||||
if not settings.openAI.model:
|
||||
raise ValueError("OpenAI model is not set in the configuration.")
|
||||
|
||||
model = settings.openAI.model
|
||||
api_key = settings.openAI.api_key
|
||||
client = OpenAI(api_key=api_key)
|
||||
return client
|
||||
|
||||
|
||||
def run_shortener(title: str, length: int) -> list[dict[str, Any]]:
|
||||
client = init_client()
|
||||
response = client.responses.create( # type: ignore
|
||||
model=model,
|
||||
instructions="""you are a sentence shortener. The next message will contain the string to shorten and the length limit.
|
||||
You need to shorten the string to be under the length limit, while keeping as much detail as possible. The result may NOT be longer than the length limit.
|
||||
based on that, please reply only the shortened string. Give me 5 choices. if the length is too long, discard the string and try another one.Return the data as a python list containing the result as {"shortened_string": shortened_string, "length": lengthasInt}. Do not return the answer in a codeblock, use a pure string. Before answering, check the results and if ANY is longer than the needed_length, discard all and try again""",
|
||||
input=f'{{"string":"{title}", "needed_length":{length}}}',
|
||||
)
|
||||
answers = response.output_text
|
||||
return eval(answers) # type: ignore
|
||||
# answers are strings in json format, so we need to convert them to a list of dicts
|
||||
|
||||
|
||||
def name_tester(name: str) -> dict:
|
||||
client = init_client()
|
||||
response = client.responses.create( # type: ignore
|
||||
model=model,
|
||||
instructions="""you are a name tester, You are given a name and will have to split the name into first name, last name, and if present the title. Return the name in a json format with the keys "title", "first_name", "last_name". If no title is present, set title to none. Do NOt return the answer in a codeblock, use a pure json string. Assume the names are in the usual german naming scheme""",
|
||||
input=f'{{"name":"{name}"}}',
|
||||
)
|
||||
answers = response.output_text
|
||||
|
||||
return json.loads(answers)
|
||||
|
||||
|
||||
def semester_converter(semester: str) -> str:
|
||||
client = init_client()
|
||||
response = client.responses.create( # type: ignore
|
||||
model=model,
|
||||
instructions="""you are a semester converter. You will be given a string. Convert this into a string like this: SoSe YY or WiSe YY/YY+1. Do not return the answer in a codeblock, use a pure string.""",
|
||||
input=semester,
|
||||
)
|
||||
answers = response.output_text
|
||||
|
||||
return answers
|
||||
631
src/services/sru.py
Normal file
631
src/services/sru.py
Normal file
@@ -0,0 +1,631 @@
|
||||
import re
|
||||
import xml.etree.ElementTree as ET
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Dict, Iterable, List, Optional, Tuple, Union
|
||||
|
||||
import requests
|
||||
from requests.adapters import HTTPAdapter
|
||||
|
||||
# centralized logging used via src.shared.logging
|
||||
from src.core.models import BookData
|
||||
from src.shared.logging import log
|
||||
|
||||
log # ensure imported logger is referenced
|
||||
|
||||
|
||||
# -----------------------
|
||||
# Dataclasses
|
||||
# -----------------------
|
||||
|
||||
|
||||
# --- MARC XML structures ---
|
||||
@dataclass
|
||||
class ControlField:
|
||||
tag: str
|
||||
value: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class SubField:
|
||||
code: str
|
||||
value: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class DataField:
|
||||
tag: str
|
||||
ind1: str = " "
|
||||
ind2: str = " "
|
||||
subfields: List[SubField] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class MarcRecord:
|
||||
leader: str
|
||||
controlfields: List[ControlField] = field(default_factory=list)
|
||||
datafields: List[DataField] = field(default_factory=list)
|
||||
|
||||
|
||||
# --- SRU record wrapper ---
|
||||
@dataclass
|
||||
class Record:
|
||||
recordSchema: str
|
||||
recordPacking: str
|
||||
recordData: MarcRecord
|
||||
recordPosition: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class EchoedSearchRequest:
|
||||
version: str
|
||||
query: str
|
||||
maximumRecords: int
|
||||
recordPacking: str
|
||||
recordSchema: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class SearchRetrieveResponse:
|
||||
version: str
|
||||
numberOfRecords: int
|
||||
records: List[Record] = field(default_factory=list)
|
||||
echoedSearchRetrieveRequest: Optional[EchoedSearchRequest] = None
|
||||
|
||||
|
||||
# -----------------------
|
||||
# Parser
|
||||
# -----------------------
|
||||
|
||||
ZS = "http://www.loc.gov/zing/srw/"
|
||||
MARC = "http://www.loc.gov/MARC21/slim"
|
||||
NS = {"zs": ZS, "marc": MARC}
|
||||
|
||||
|
||||
def _text(elem: Optional[ET.Element]) -> str:
|
||||
return (elem.text or "") if elem is not None else ""
|
||||
|
||||
|
||||
def _req_text(parent: ET.Element, path: str) -> Optional[str]:
|
||||
el = parent.find(path, NS)
|
||||
if el is None or el.text is None:
|
||||
return None
|
||||
return el.text
|
||||
|
||||
|
||||
def parse_marc_record(record_el: ET.Element) -> MarcRecord:
|
||||
"""
|
||||
record_el is the <marc:record> element (default ns MARC in your sample)
|
||||
"""
|
||||
# leader
|
||||
leader_text = _req_text(record_el, "marc:leader") or ""
|
||||
|
||||
# controlfields
|
||||
controlfields: List[ControlField] = []
|
||||
for cf in record_el.findall("marc:controlfield", NS):
|
||||
tag = cf.get("tag", "").strip()
|
||||
controlfields.append(ControlField(tag=tag, value=_text(cf)))
|
||||
|
||||
# datafields
|
||||
datafields: List[DataField] = []
|
||||
for df in record_el.findall("marc:datafield", NS):
|
||||
tag = df.get("tag", "").strip()
|
||||
ind1 = df.get("ind1") or " "
|
||||
ind2 = df.get("ind2") or " "
|
||||
subfields: List[SubField] = []
|
||||
for sf in df.findall("marc:subfield", NS):
|
||||
code = sf.get("code", "")
|
||||
subfields.append(SubField(code=code, value=_text(sf)))
|
||||
datafields.append(DataField(tag=tag, ind1=ind1, ind2=ind2, subfields=subfields))
|
||||
|
||||
return MarcRecord(
|
||||
leader=leader_text, controlfields=controlfields, datafields=datafields
|
||||
)
|
||||
|
||||
|
||||
def parse_record(zs_record_el: ET.Element) -> Record:
|
||||
recordSchema = _req_text(zs_record_el, "zs:recordSchema") or ""
|
||||
recordPacking = _req_text(zs_record_el, "zs:recordPacking") or ""
|
||||
|
||||
# recordData contains a MARC <record> with default MARC namespace in your sample
|
||||
recordData_el = zs_record_el.find("zs:recordData", NS)
|
||||
if recordData_el is None:
|
||||
raise ValueError("Missing zs:recordData")
|
||||
|
||||
marc_record_el = recordData_el.find("marc:record", NS)
|
||||
if marc_record_el is None:
|
||||
# If the MARC record uses default ns (xmlns="...") ElementTree still needs the ns-qualified name
|
||||
# We already searched with prefix; this covers both default and prefixed cases.
|
||||
raise ValueError("Missing MARC21 record inside zs:recordData")
|
||||
|
||||
marc_record = parse_marc_record(marc_record_el)
|
||||
|
||||
recordPosition = int(_req_text(zs_record_el, "zs:recordPosition") or "0")
|
||||
return Record(
|
||||
recordSchema=recordSchema,
|
||||
recordPacking=recordPacking,
|
||||
recordData=marc_record,
|
||||
recordPosition=recordPosition,
|
||||
)
|
||||
|
||||
|
||||
def parse_echoed_request(root: ET.Element) -> Optional[EchoedSearchRequest]:
|
||||
el = root.find("zs:echoedSearchRetrieveRequest", NS)
|
||||
if el is None:
|
||||
return None
|
||||
|
||||
# Be permissive with missing fields
|
||||
version = _text(el.find("zs:version", NS))
|
||||
query = _text(el.find("zs:query", NS))
|
||||
maximumRecords_text = _text(el.find("zs:maximumRecords", NS)) or "0"
|
||||
recordPacking = _text(el.find("zs:recordPacking", NS))
|
||||
recordSchema = _text(el.find("zs:recordSchema", NS))
|
||||
|
||||
try:
|
||||
maximumRecords = int(maximumRecords_text)
|
||||
except ValueError:
|
||||
maximumRecords = 0
|
||||
|
||||
return EchoedSearchRequest(
|
||||
version=version,
|
||||
query=query,
|
||||
maximumRecords=maximumRecords,
|
||||
recordPacking=recordPacking,
|
||||
recordSchema=recordSchema,
|
||||
)
|
||||
|
||||
|
||||
def parse_search_retrieve_response(
|
||||
xml_str: Union[str, bytes],
|
||||
) -> SearchRetrieveResponse:
|
||||
root = ET.fromstring(xml_str)
|
||||
|
||||
# Root is zs:searchRetrieveResponse
|
||||
version = _req_text(root, "zs:version")
|
||||
numberOfRecords = int(_req_text(root, "zs:numberOfRecords") or "0")
|
||||
|
||||
records_parent = root.find("zs:records", NS)
|
||||
records: List[Record] = []
|
||||
if records_parent is not None:
|
||||
for r in records_parent.findall("zs:record", NS):
|
||||
records.append(parse_record(r))
|
||||
|
||||
echoed = parse_echoed_request(root)
|
||||
|
||||
return SearchRetrieveResponse(
|
||||
version=version,
|
||||
numberOfRecords=numberOfRecords,
|
||||
records=records,
|
||||
echoedSearchRetrieveRequest=echoed,
|
||||
)
|
||||
|
||||
|
||||
# --- Query helpers over MarcRecord ---
|
||||
|
||||
|
||||
def iter_datafields(
|
||||
rec: MarcRecord,
|
||||
tag: Optional[str] = None,
|
||||
ind1: Optional[str] = None,
|
||||
ind2: Optional[str] = None,
|
||||
) -> Iterable[DataField]:
|
||||
"""Yield datafields, optionally filtered by tag/indicators."""
|
||||
for df in rec.datafields:
|
||||
if tag is not None and df.tag != tag:
|
||||
continue
|
||||
if ind1 is not None and df.ind1 != ind1:
|
||||
continue
|
||||
if ind2 is not None and df.ind2 != ind2:
|
||||
continue
|
||||
yield df
|
||||
|
||||
|
||||
def subfield_values(
|
||||
rec: MarcRecord,
|
||||
tag: str,
|
||||
code: str,
|
||||
*,
|
||||
ind1: Optional[str] = None,
|
||||
ind2: Optional[str] = None,
|
||||
) -> List[str]:
|
||||
"""All values for subfield `code` in every `tag` field (respecting indicators)."""
|
||||
out: List[str] = []
|
||||
for df in iter_datafields(rec, tag, ind1, ind2):
|
||||
out.extend(sf.value for sf in df.subfields if sf.code == code)
|
||||
return out
|
||||
|
||||
|
||||
def first_subfield_value(
|
||||
rec: MarcRecord,
|
||||
tag: str,
|
||||
code: str,
|
||||
*,
|
||||
ind1: Optional[str] = None,
|
||||
ind2: Optional[str] = None,
|
||||
default: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
"""First value for subfield `code` in `tag` (respecting indicators)."""
|
||||
for df in iter_datafields(rec, tag, ind1, ind2):
|
||||
for sf in df.subfields:
|
||||
if sf.code == code:
|
||||
return sf.value
|
||||
return default
|
||||
|
||||
|
||||
def find_datafields_with_subfields(
|
||||
rec: MarcRecord,
|
||||
tag: str,
|
||||
*,
|
||||
where_all: Optional[Dict[str, str]] = None,
|
||||
where_any: Optional[Dict[str, str]] = None,
|
||||
casefold: bool = False,
|
||||
ind1: Optional[str] = None,
|
||||
ind2: Optional[str] = None,
|
||||
) -> List[DataField]:
|
||||
"""
|
||||
Return datafields of `tag` whose subfields match constraints:
|
||||
- where_all: every (code -> exact value) must be present
|
||||
- where_any: at least one (code -> exact value) present
|
||||
Set `casefold=True` for case-insensitive comparison.
|
||||
"""
|
||||
where_all = where_all or {}
|
||||
where_any = where_any or {}
|
||||
matched: List[DataField] = []
|
||||
|
||||
for df in iter_datafields(rec, tag, ind1, ind2):
|
||||
# Map code -> list of values (with optional casefold applied)
|
||||
vals: Dict[str, List[str]] = {}
|
||||
for sf in df.subfields:
|
||||
v = sf.value.casefold() if casefold else sf.value
|
||||
vals.setdefault(sf.code, []).append(v)
|
||||
|
||||
ok = True
|
||||
for c, v in where_all.items():
|
||||
vv = v.casefold() if casefold else v
|
||||
if c not in vals or vv not in vals[c]:
|
||||
ok = False
|
||||
break
|
||||
|
||||
if ok and where_any:
|
||||
any_ok = any(
|
||||
(c in vals) and ((v.casefold() if casefold else v) in vals[c])
|
||||
for c, v in where_any.items()
|
||||
)
|
||||
if not any_ok:
|
||||
ok = False
|
||||
|
||||
if ok:
|
||||
matched.append(df)
|
||||
|
||||
return matched
|
||||
|
||||
|
||||
def controlfield_value(
|
||||
rec: MarcRecord, tag: str, default: Optional[str] = None
|
||||
) -> Optional[str]:
|
||||
"""Get the first controlfield value by tag (e.g., '001', '005')."""
|
||||
for cf in rec.controlfields:
|
||||
if cf.tag == tag:
|
||||
return cf.value
|
||||
return default
|
||||
|
||||
|
||||
def datafields_value(
|
||||
data: List[DataField], code: str, default: Optional[str] = None
|
||||
) -> Optional[str]:
|
||||
"""Get the first value for a specific subfield code in a list of datafields."""
|
||||
for df in data:
|
||||
for sf in df.subfields:
|
||||
if sf.code == code:
|
||||
return sf.value
|
||||
return default
|
||||
|
||||
|
||||
def datafield_value(
|
||||
df: DataField, code: str, default: Optional[str] = None
|
||||
) -> Optional[str]:
|
||||
"""Get the first value for a specific subfield code in a datafield."""
|
||||
for sf in df.subfields:
|
||||
if sf.code == code:
|
||||
return sf.value
|
||||
return default
|
||||
|
||||
|
||||
def _smart_join_title(a: str, b: Optional[str]) -> str:
|
||||
"""
|
||||
Join 245 $a and $b with MARC-style punctuation.
|
||||
If $b is present, join with ' : ' unless either side already supplies punctuation.
|
||||
"""
|
||||
a = a.strip()
|
||||
if not b:
|
||||
return a
|
||||
b = b.strip()
|
||||
if a.endswith((":", ";", "/")) or b.startswith((":", ";", "/")):
|
||||
return f"{a} {b}"
|
||||
return f"{a} : {b}"
|
||||
|
||||
|
||||
def subfield_values_from_fields(
|
||||
fields: Iterable[DataField],
|
||||
code: str,
|
||||
) -> List[str]:
|
||||
"""All subfield values with given `code` across a list of DataField."""
|
||||
return [sf.value for df in fields for sf in df.subfields if sf.code == code]
|
||||
|
||||
|
||||
def first_subfield_value_from_fields(
|
||||
fields: Iterable[DataField],
|
||||
code: str,
|
||||
default: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
"""First subfield value with given `code` across a list of DataField."""
|
||||
for df in fields:
|
||||
for sf in df.subfields:
|
||||
if sf.code == code:
|
||||
return sf.value
|
||||
return default
|
||||
|
||||
|
||||
def subfield_value_pairs_from_fields(
|
||||
fields: Iterable[DataField],
|
||||
code: str,
|
||||
) -> List[Tuple[DataField, str]]:
|
||||
"""
|
||||
Return (DataField, value) pairs for all subfields with `code`.
|
||||
Useful if you need to know which field a value came from.
|
||||
"""
|
||||
out: List[Tuple[DataField, str]] = []
|
||||
for df in fields:
|
||||
for sf in df.subfields:
|
||||
if sf.code == code:
|
||||
out.append((df, sf.value))
|
||||
return out
|
||||
|
||||
|
||||
def book_from_marc(rec: MarcRecord) -> BookData:
|
||||
# PPN from controlfield 001
|
||||
ppn = controlfield_value(rec, "001")
|
||||
|
||||
# Title = 245 $a + 245 $b (if present)
|
||||
t_a = first_subfield_value(rec, "245", "a")
|
||||
t_b = first_subfield_value(rec, "245", "b")
|
||||
title = _smart_join_title(t_a, t_b) if t_a else None
|
||||
|
||||
# Signature = 924 where $9 == "Frei 129" → take that field's $g
|
||||
frei_fields = find_datafields_with_subfields(
|
||||
rec, "924", where_all={"9": "Frei 129"}
|
||||
)
|
||||
signature = first_subfield_value_from_fields(frei_fields, "g")
|
||||
|
||||
# Year = 264 $c (prefer ind2="1" publication; fallback to any 264)
|
||||
year = first_subfield_value(rec, "264", "c", ind2="1") or first_subfield_value(
|
||||
rec, "264", "c"
|
||||
)
|
||||
isbn = subfield_values(rec, "020", "a")
|
||||
mediatype = first_subfield_value(rec, "338", "a")
|
||||
lang = subfield_values(rec, "041", "a")
|
||||
authors = subfield_values(rec, "700", "a")
|
||||
author = None
|
||||
if authors:
|
||||
author = "; ".join(authors)
|
||||
|
||||
return BookData(
|
||||
ppn=ppn,
|
||||
title=title,
|
||||
signature=signature,
|
||||
edition=first_subfield_value(rec, "250", "a") or "",
|
||||
year=year,
|
||||
pages=first_subfield_value(rec, "300", "a") or "",
|
||||
publisher=first_subfield_value(rec, "264", "b") or "",
|
||||
isbn=isbn,
|
||||
language=lang,
|
||||
link="",
|
||||
author=author,
|
||||
media_type=mediatype,
|
||||
)
|
||||
|
||||
|
||||
class SWBData(Enum):
|
||||
URL = "https://sru.k10plus.de/opac-de-627!rec=1?version=1.1&operation=searchRetrieve&query={}&maximumRecords=100&recordSchema=marcxml"
|
||||
ARGSCHEMA = "pica."
|
||||
NAME = "SWB"
|
||||
|
||||
|
||||
class DNBData(Enum):
|
||||
URL = "https://services.dnb.de/sru/dnb?version=1.1&operation=searchRetrieve&query={}&maximumRecords=100&recordSchema=MARC21-xml"
|
||||
ARGSCHEMA = ""
|
||||
NAME = "DNB"
|
||||
|
||||
|
||||
class SRUSite(Enum):
|
||||
SWB = SWBData
|
||||
DNB = DNBData
|
||||
|
||||
|
||||
RVK_ALLOWED = r"[A-Z0-9.\-\/]" # conservative char set typically seen in RVK notations
|
||||
|
||||
|
||||
def find_newer_edition(
|
||||
swb_result: BookData, dnb_result: List[BookData]
|
||||
) -> Optional[List[BookData]]:
|
||||
"""
|
||||
New edition if:
|
||||
- year > swb.year OR
|
||||
- edition_number > swb.edition_number
|
||||
|
||||
Additional guards & preferences:
|
||||
- If both have signatures and they differ, skip (not the same work).
|
||||
- For duplicates (same ppn): keep the one that has a signature, and
|
||||
prefer a signature that matches swb_result.signature.
|
||||
- If multiple remain: keep the single 'latest' by (year desc,
|
||||
edition_number desc, best-signature-match desc, has-signature desc).
|
||||
"""
|
||||
|
||||
def norm_sig(s: Optional[str]) -> str:
|
||||
if not s:
|
||||
return ""
|
||||
# normalize: lowercase, collapse whitespace, keep alnum + a few separators
|
||||
s = s.lower()
|
||||
s = re.sub(r"\s+", " ", s).strip()
|
||||
# remove obvious noise; adjust if your signature format differs
|
||||
s = re.sub(r"[^a-z0-9\-_/\. ]+", "", s)
|
||||
return s
|
||||
|
||||
def has_sig(b: BookData) -> bool:
|
||||
return bool(getattr(b, "signature", None))
|
||||
|
||||
def sig_matches_swb(b: BookData) -> bool:
|
||||
if not has_sig(b) or not has_sig(swb_result):
|
||||
return False
|
||||
return norm_sig(b.signature) == norm_sig(swb_result.signature)
|
||||
|
||||
def strictly_newer(b: BookData) -> bool:
|
||||
by_year = (
|
||||
b.year is not None
|
||||
and swb_result.year is not None
|
||||
and b.year > swb_result.year
|
||||
)
|
||||
by_edition = (
|
||||
b.edition_number is not None
|
||||
and swb_result.edition_number is not None
|
||||
and b.edition_number > swb_result.edition_number
|
||||
)
|
||||
return by_year or by_edition
|
||||
|
||||
swb_sig_norm = norm_sig(getattr(swb_result, "signature", None))
|
||||
|
||||
# 1) Filter to same-work AND newer
|
||||
candidates: List[BookData] = []
|
||||
for b in dnb_result:
|
||||
# Skip if both signatures exist and don't match (different work)
|
||||
b_sig = getattr(b, "signature", None)
|
||||
if b_sig and swb_result.signature:
|
||||
if norm_sig(b_sig) != swb_sig_norm:
|
||||
continue # not the same work
|
||||
|
||||
# Keep only if newer by rules
|
||||
if strictly_newer(b):
|
||||
candidates.append(b)
|
||||
|
||||
if not candidates:
|
||||
return None
|
||||
|
||||
# 2) Dedupe by PPN, preferring signature (and matching signature if possible)
|
||||
by_ppn: dict[Optional[str], BookData] = {}
|
||||
for b in candidates:
|
||||
key = getattr(b, "ppn", None)
|
||||
prev = by_ppn.get(key)
|
||||
if prev is None:
|
||||
by_ppn[key] = b
|
||||
continue
|
||||
|
||||
# Compute preference score for both
|
||||
def ppn_pref_score(x: BookData) -> tuple[int, int]:
|
||||
# (signature matches swb, has signature)
|
||||
return (1 if sig_matches_swb(x) else 0, 1 if has_sig(x) else 0)
|
||||
|
||||
if ppn_pref_score(b) > ppn_pref_score(prev):
|
||||
by_ppn[key] = b
|
||||
|
||||
deduped = list(by_ppn.values())
|
||||
if not deduped:
|
||||
return None
|
||||
|
||||
# 3) If multiple remain, keep only the latest one.
|
||||
# Order: year desc, edition_number desc, signature-match desc, has-signature desc
|
||||
def sort_key(b: BookData):
|
||||
year = b.year if b.year is not None else -1
|
||||
ed = b.edition_number if b.edition_number is not None else -1
|
||||
sig_match = 1 if sig_matches_swb(b) else 0
|
||||
sig_present = 1 if has_sig(b) else 0
|
||||
return (year, ed, sig_match, sig_present)
|
||||
|
||||
best = max(deduped, key=sort_key)
|
||||
return [best] if best else None
|
||||
|
||||
|
||||
class Api:
|
||||
def __init__(self, site: str, url: str, prefix: str):
|
||||
self.site = site
|
||||
self.url = url
|
||||
self.prefix = prefix
|
||||
# Reuse TCP connections across requests for better performance
|
||||
self._session = requests.Session()
|
||||
# Slightly larger connection pool for concurrent calls
|
||||
adapter = HTTPAdapter(pool_connections=10, pool_maxsize=20)
|
||||
self._session.mount("http://", adapter)
|
||||
self._session.mount("https://", adapter)
|
||||
|
||||
def close(self):
|
||||
try:
|
||||
self._session.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def __del__(self):
|
||||
# Best-effort cleanup
|
||||
self.close()
|
||||
|
||||
def get(self, query_args: Iterable[str]) -> List[Record]:
|
||||
# if any query_arg ends with =, remove it
|
||||
if self.site == "DNB":
|
||||
args = [arg for arg in query_args if not arg.startswith("pica.")]
|
||||
if args == []:
|
||||
raise ValueError("DNB queries must include at least one search term")
|
||||
query_args = args
|
||||
# query_args = [f"{self.prefix}{arg}" for arg in query_args]
|
||||
query = "+and+".join(query_args)
|
||||
query = query.replace(" ", "%20").replace("&", "%26")
|
||||
# query_args = [arg for arg in query_args if not arg.endswith("=")]
|
||||
# query = "+and+".join(query_args)
|
||||
# query = query.replace(" ", "%20").replace("&", "%26")
|
||||
# insert the query into the url url is
|
||||
url = self.url.format(query)
|
||||
|
||||
log.debug(url)
|
||||
headers = {
|
||||
"User-Agent": f"{self.site} SRU Client, <alexander.kirchner@ph-freiburg.de>",
|
||||
"Accept": "application/xml",
|
||||
"Accept-Charset": "latin1,utf-8;q=0.7,*;q=0.3",
|
||||
}
|
||||
# Use persistent session and set timeouts to avoid hanging
|
||||
resp = self._session.get(url, headers=headers, timeout=(3.05, 60))
|
||||
if resp.status_code != 200:
|
||||
raise Exception(f"Error fetching data from SWB: {resp.status_code}")
|
||||
# Parse using raw bytes (original behavior) to preserve encoding edge cases
|
||||
sr = parse_search_retrieve_response(resp.content)
|
||||
return sr.records
|
||||
|
||||
def getBooks(self, query_args: Iterable[str]) -> List[BookData]:
|
||||
records: List[Record] = self.get(query_args)
|
||||
# Avoid printing on hot paths; rely on logger if needed
|
||||
log.debug(f"{self.site} found {len(records)} records for args={query_args}")
|
||||
books: List[BookData] = []
|
||||
# extract title from query_args if present
|
||||
title = None
|
||||
for arg in query_args:
|
||||
if arg.startswith("pica.tit="):
|
||||
title = arg.split("=")[1]
|
||||
break
|
||||
for rec in records:
|
||||
book = book_from_marc(rec.recordData)
|
||||
books.append(book)
|
||||
if title:
|
||||
books = [
|
||||
b
|
||||
for b in books
|
||||
if b.title and b.title.lower().startswith(title.lower())
|
||||
]
|
||||
return books
|
||||
|
||||
def getLinkForBook(self, book: BookData) -> str:
|
||||
# Not implemented: depends on catalog front-end; return empty string for now
|
||||
return ""
|
||||
|
||||
|
||||
class SWB(Api):
|
||||
def __init__(self):
|
||||
self.site = SWBData.NAME.value
|
||||
self.url = SWBData.URL.value
|
||||
self.prefix = SWBData.ARGSCHEMA.value
|
||||
super().__init__(self.site, self.url, self.prefix)
|
||||
35
src/services/webadis.py
Normal file
35
src/services/webadis.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
|
||||
def get_book_medianr(signature: str, semesterapparat_nr: int, auth: tuple) -> str:
|
||||
with sync_playwright() as playwright:
|
||||
browser = playwright.chromium.launch(headless=True)
|
||||
context = browser.new_context()
|
||||
page = context.new_page()
|
||||
page.goto(
|
||||
"https://bsz.ibs-bw.de:22998/aDISWeb/app?service=direct/0/Home/$DirectLink&sp=SDAP42"
|
||||
)
|
||||
page.get_by_role("textbox", name="Benutzer").fill(auth[0])
|
||||
page.get_by_role("textbox", name="Benutzer").press("Tab")
|
||||
page.get_by_role("textbox", name="Kennwort").fill(auth[1])
|
||||
page.get_by_role("textbox", name="Kennwort").press("Enter")
|
||||
page.get_by_role("button", name="Katalog").click()
|
||||
page.get_by_role("textbox", name="Signatur").click()
|
||||
page.get_by_role("textbox", name="Signatur").fill(signature)
|
||||
page.get_by_role("textbox", name="Signatur").press("Enter")
|
||||
book_list = page.locator("iframe").content_frame.get_by_role(
|
||||
"cell", name="Bibliothek der Pädagogischen"
|
||||
)
|
||||
# this will always find one result, we need to split the resulting text based on the entries that start with "* "
|
||||
book_entries = book_list.inner_text().split("\n")
|
||||
books = []
|
||||
for entry in book_entries:
|
||||
if entry.startswith("* "):
|
||||
books.append(entry)
|
||||
for book in books:
|
||||
if f"Semesterapparat: {semesterapparat_nr}" in book:
|
||||
return book.split("* ")[1].split(":")[0]
|
||||
|
||||
# ---------------------
|
||||
context.close()
|
||||
browser.close()
|
||||
314
src/services/webrequest.py
Normal file
314
src/services/webrequest.py
Normal file
@@ -0,0 +1,314 @@
|
||||
from enum import Enum
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# import sleep_and_retry decorator to retry requests
|
||||
from ratelimit import limits, sleep_and_retry
|
||||
|
||||
from src.core.models import BookData
|
||||
from src.shared.logging import log
|
||||
from src.transformers import ARRAYData, BibTeXData, COinSData, RDSData, RISData
|
||||
from src.transformers.transformers import RDS_AVAIL_DATA, RDS_GENERIC_DATA
|
||||
|
||||
# logger.add(sys.stderr, format="{time} {level} {message}", level="INFO")
|
||||
|
||||
|
||||
API_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{}/"
|
||||
PPN_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND"
|
||||
BASE = "https://rds.ibs-bw.de"
|
||||
#
|
||||
TITLE = "RDS_TITLE"
|
||||
SIGNATURE = "RDS_SIGNATURE"
|
||||
EDITION = "RDS_EDITION"
|
||||
ISBN = "RDS_ISBN"
|
||||
AUTHOR = "RDS_PERSON"
|
||||
|
||||
HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 \
|
||||
(HTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36",
|
||||
"Accept-Language": "en-US, en;q=0.5",
|
||||
}
|
||||
RATE_LIMIT = 20
|
||||
RATE_PERIOD = 30
|
||||
|
||||
|
||||
class TransformerType(Enum):
|
||||
ARRAY = "ARRAY"
|
||||
COinS = "COinS"
|
||||
BibTeX = "BibTeX"
|
||||
RIS = "RIS"
|
||||
RDS = "RDS"
|
||||
|
||||
|
||||
class WebRequest:
|
||||
def __init__(self) -> None:
|
||||
"""Request data from the web, and format it depending on the mode."""
|
||||
self.apparat = None
|
||||
self.use_any = False # use any book that matches the search term
|
||||
self.signature = None
|
||||
self.ppn = None
|
||||
self.data = None
|
||||
self.timeout = 5
|
||||
log.info("Initialized WebRequest")
|
||||
|
||||
@property
|
||||
def use_any_book(self):
|
||||
"""use any book that matches the search term"""
|
||||
self.use_any = True
|
||||
log.info("Using any book")
|
||||
return self
|
||||
|
||||
def set_apparat(self, apparat: int) -> "WebRequest":
|
||||
self.apparat = apparat
|
||||
if int(self.apparat) < 10:
|
||||
self.apparat = f"0{self.apparat}"
|
||||
log.info(f"Set apparat to {self.apparat}")
|
||||
return self
|
||||
|
||||
def get_ppn(self, signature: str) -> "WebRequest":
|
||||
self.signature = signature
|
||||
if "+" in signature:
|
||||
signature = signature.replace("+", "%2B")
|
||||
if "doi.org" in signature:
|
||||
signature = signature.split("/")[-1]
|
||||
self.ppn = signature
|
||||
return self
|
||||
|
||||
@sleep_and_retry
|
||||
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
|
||||
def search_book(self, searchterm: str) -> str:
|
||||
response = requests.get(PPN_URL.format(searchterm), timeout=self.timeout)
|
||||
return response.text
|
||||
|
||||
@sleep_and_retry
|
||||
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
|
||||
def search_ppn(self, ppn: str) -> str:
|
||||
response = requests.get(API_URL.format(ppn), timeout=self.timeout)
|
||||
return response.text
|
||||
|
||||
def get_book_links(self, searchterm: str) -> list[str]:
|
||||
response: str = self.search_book(searchterm) # type:ignore
|
||||
soup = BeautifulSoup(response, "html.parser")
|
||||
links = soup.find_all("a", class_="title getFull")
|
||||
res: list[str] = []
|
||||
for link in links:
|
||||
res.append(BASE + link["href"])
|
||||
return res
|
||||
|
||||
@sleep_and_retry
|
||||
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
|
||||
def search(self, link: str) -> Optional[str]:
|
||||
try:
|
||||
response = requests.get(link, timeout=self.timeout)
|
||||
return response.text
|
||||
except requests.exceptions.RequestException as e:
|
||||
log.error(f"Request failed: {e}")
|
||||
return None
|
||||
|
||||
def get_data(self) -> Optional[list[str]]:
|
||||
links = self.get_book_links(self.ppn)
|
||||
log.debug(f"Links: {links}")
|
||||
return_data: list[str] = []
|
||||
for link in links:
|
||||
result: str = self.search(link) # type:ignore
|
||||
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
||||
# if found, return text of href
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
locations = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION")
|
||||
if locations:
|
||||
for location in locations:
|
||||
if "1. OG Semesterapparat" in location.text:
|
||||
log.success("Found Semesterapparat, adding entry")
|
||||
pre_tag = soup.find_all("pre")
|
||||
return_data = []
|
||||
if pre_tag:
|
||||
for tag in pre_tag:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
else:
|
||||
log.error("No <pre> tag found")
|
||||
return return_data
|
||||
else:
|
||||
item_location = location.find(
|
||||
"div", class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel"
|
||||
).text.strip()
|
||||
log.debug(f"Item location: {item_location}")
|
||||
if self.use_any:
|
||||
pre_tag = soup.find_all("pre")
|
||||
if pre_tag:
|
||||
for tag in pre_tag:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
else:
|
||||
log.error("No <pre> tag found")
|
||||
raise ValueError("No <pre> tag found")
|
||||
elif f"Semesterapparat-{self.apparat}" in item_location:
|
||||
pre_tag = soup.find_all("pre")
|
||||
return_data = []
|
||||
if pre_tag:
|
||||
for tag in pre_tag:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
else:
|
||||
log.error("No <pre> tag found")
|
||||
return return_data
|
||||
else:
|
||||
log.error(
|
||||
f"Signature {self.signature} not found in {item_location}"
|
||||
)
|
||||
# return_data = []
|
||||
|
||||
return return_data
|
||||
|
||||
def get_data_elsa(self) -> Optional[list[str]]:
|
||||
links = self.get_book_links(self.ppn)
|
||||
for link in links:
|
||||
result = self.search(link)
|
||||
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
||||
# if found, return text of href
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
locations = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION")
|
||||
if locations:
|
||||
for _ in locations:
|
||||
pre_tag = soup.find_all("pre")
|
||||
return_data = []
|
||||
if pre_tag:
|
||||
for tag in pre_tag:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
else:
|
||||
log.error("No <pre> tag found")
|
||||
return return_data
|
||||
|
||||
|
||||
class BibTextTransformer:
|
||||
"""Transforms data from the web into a BibText format.
|
||||
Valid Modes are ARRAY, COinS, BibTeX, RIS, RDS
|
||||
Raises:
|
||||
ValueError: Raised if mode is not in valid_modes
|
||||
"""
|
||||
|
||||
valid_modes = [
|
||||
TransformerType.ARRAY,
|
||||
TransformerType.COinS,
|
||||
TransformerType.BibTeX,
|
||||
TransformerType.RIS,
|
||||
TransformerType.RDS,
|
||||
]
|
||||
|
||||
def __init__(self, mode: TransformerType = TransformerType.ARRAY) -> None:
|
||||
self.mode = mode.value
|
||||
self.field = None
|
||||
self.signature = None
|
||||
if mode not in self.valid_modes:
|
||||
log.error(f"Mode {mode} not valid")
|
||||
raise ValueError(f"Mode {mode} not valid")
|
||||
self.data = None
|
||||
# self.bookdata = BookData(**self.data)
|
||||
|
||||
def use_signature(self, signature: str) -> "BibTextTransformer":
|
||||
"""use the exact signature to search for the book"""
|
||||
self.signature = signature
|
||||
return self
|
||||
|
||||
def get_data(self, data: Optional[list[str]] = None) -> "BibTextTransformer":
|
||||
RIS_IDENT = "TY -"
|
||||
ARRAY_IDENT = "[kid]"
|
||||
COinS_IDENT = "ctx_ver"
|
||||
BIBTEX_IDENT = "@book"
|
||||
RDS_IDENT = "RDS ---------------------------------- "
|
||||
|
||||
if data is None:
|
||||
self.data = None
|
||||
return self
|
||||
|
||||
if self.mode == "RIS":
|
||||
for line in data:
|
||||
if RIS_IDENT in line:
|
||||
self.data = line
|
||||
elif self.mode == "ARRAY":
|
||||
for line in data:
|
||||
if ARRAY_IDENT in line:
|
||||
self.data = line
|
||||
elif self.mode == "COinS":
|
||||
for line in data:
|
||||
if COinS_IDENT in line:
|
||||
self.data = line
|
||||
elif self.mode == "BibTeX":
|
||||
for line in data:
|
||||
if BIBTEX_IDENT in line:
|
||||
self.data = line
|
||||
elif self.mode == "RDS":
|
||||
for line in data:
|
||||
if RDS_IDENT in line:
|
||||
self.data = line
|
||||
return self
|
||||
|
||||
def return_data(
|
||||
self, option: Any = None
|
||||
) -> Union[
|
||||
Optional[BookData],
|
||||
Optional[RDS_GENERIC_DATA],
|
||||
Optional[RDS_AVAIL_DATA],
|
||||
None,
|
||||
dict[str, Union[RDS_AVAIL_DATA, RDS_GENERIC_DATA]],
|
||||
]:
|
||||
"""Return Data to caller.
|
||||
|
||||
Args:
|
||||
option (string, optional): Option for RDS as there are two filetypes. Use rds_availability or rds_data. Anything else gives a dict of both responses. Defaults to None.
|
||||
|
||||
Returns:
|
||||
BookData: a dataclass containing data about the book
|
||||
"""
|
||||
if self.data is None:
|
||||
return None
|
||||
match self.mode:
|
||||
case "ARRAY":
|
||||
return ARRAYData(self.signature).transform(self.data)
|
||||
case "COinS":
|
||||
return COinSData().transform(self.data)
|
||||
case "BibTeX":
|
||||
return BibTeXData().transform(self.data)
|
||||
case "RIS":
|
||||
return RISData().transform(self.data)
|
||||
case "RDS":
|
||||
return RDSData().transform(self.data).return_data(option)
|
||||
case _:
|
||||
return None
|
||||
|
||||
# if self.mode == "ARRAY":
|
||||
# return ARRAYData().transform(self.data)
|
||||
# elif self.mode == "COinS":
|
||||
# return COinSData().transform(self.data)
|
||||
# elif self.mode == "BibTeX":
|
||||
# return BibTeXData().transform(self.data)
|
||||
# elif self.mode == "RIS":
|
||||
# return RISData().transform(self.data)
|
||||
# elif self.mode == "RDS":
|
||||
# return RDSData().transform(self.data).return_data(option)
|
||||
|
||||
|
||||
def cover(isbn):
|
||||
test_url = f"https://www.buchhandel.de/cover/{isbn}/{isbn}-cover-m.jpg"
|
||||
# log.debug(test_url)
|
||||
data = requests.get(test_url, stream=True)
|
||||
return data.content
|
||||
|
||||
|
||||
def get_content(soup, css_class):
|
||||
return soup.find("div", class_=css_class).text.strip()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# log.debug("main")
|
||||
link = "CU 8500 K64"
|
||||
data = WebRequest(71).get_ppn(link).get_data()
|
||||
bib = BibTextTransformer("ARRAY").get_data().return_data()
|
||||
log.debug(bib)
|
||||
340
src/services/zotero.py
Normal file
340
src/services/zotero.py
Normal file
@@ -0,0 +1,340 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
from pyzotero import zotero
|
||||
|
||||
from src import settings
|
||||
from src.services.webrequest import BibTextTransformer, WebRequest
|
||||
from src.shared.logging import log
|
||||
|
||||
|
||||
@dataclass
|
||||
class Creator:
|
||||
firstName: str = None
|
||||
lastName: str = None
|
||||
creatorType: str = "author"
|
||||
|
||||
def from_dict(self, data: dict) -> None:
|
||||
for key, value in data.items():
|
||||
setattr(self, key, value)
|
||||
|
||||
def from_string(self, data: str) -> "Creator":
|
||||
if "," in data:
|
||||
self.firstName = data.split(",")[1]
|
||||
self.lastName = data.split(",")[0]
|
||||
|
||||
return self
|
||||
|
||||
# set __dict__ object to be used in json
|
||||
|
||||
|
||||
@dataclass
|
||||
class Book:
|
||||
itemType: str = "book"
|
||||
creators: list[Creator] = None
|
||||
tags: list = None
|
||||
collections: list = None
|
||||
relations: dict = None
|
||||
title: str = None
|
||||
abstractNote: str = None
|
||||
series: str = None
|
||||
seriesNumber: str = None
|
||||
volume: str = None
|
||||
numberOfVolumes: str = None
|
||||
edition: str = None
|
||||
place: str = None
|
||||
publisher: str = None
|
||||
date: str = None
|
||||
numPages: str = None
|
||||
language: str = None
|
||||
ISBN: str = None
|
||||
shortTitle: str = None
|
||||
url: str = None
|
||||
accessDate: str = None
|
||||
archive: str = None
|
||||
archiveLocation: str = None
|
||||
libraryCatalog: str = None
|
||||
callNumber: str = None
|
||||
rights: str = None
|
||||
extra: str = None
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
ret = {}
|
||||
for key, value in self.__dict__.items():
|
||||
if value:
|
||||
ret[key] = value
|
||||
return ret
|
||||
|
||||
|
||||
@dataclass
|
||||
class BookSection:
|
||||
itemType: str = "bookSection"
|
||||
title: str = None
|
||||
creators: list[Creator] = None
|
||||
abstractNote: str = None
|
||||
bookTitle: str = None
|
||||
series: str = None
|
||||
seriesNumber: str = None
|
||||
volume: str = None
|
||||
numberOfVolumes: str = None
|
||||
edition: str = None
|
||||
place: str = None
|
||||
publisher: str = None
|
||||
date: str = None
|
||||
pages: str = None
|
||||
language: str = None
|
||||
ISBN: str = None
|
||||
shortTitle: str = None
|
||||
url: str = None
|
||||
accessDate: str = None
|
||||
archive: str = None
|
||||
archiveLocation: str = None
|
||||
libraryCatalog: str = None
|
||||
callNumber: str = None
|
||||
rights: str = None
|
||||
extra: str = None
|
||||
tags = list
|
||||
collections = list
|
||||
relations = dict
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
ret = {}
|
||||
for key, value in self.__dict__.items():
|
||||
if value:
|
||||
ret[key] = value
|
||||
return ret
|
||||
|
||||
def assign(self, book) -> None:
|
||||
for key, value in book.__dict__.items():
|
||||
if key in self.__dict__.keys():
|
||||
try:
|
||||
setattr(self, key, value)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class JournalArticle:
|
||||
itemType = "journalArticle"
|
||||
title: str = None
|
||||
creators: list[Creator] = None
|
||||
abstractNote: str = None
|
||||
publicationTitle: str = None
|
||||
volume: str = None
|
||||
issue: str = None
|
||||
pages: str = None
|
||||
date: str = None
|
||||
series: str = None
|
||||
seriesTitle: str = None
|
||||
seriesText: str = None
|
||||
journalAbbreviation: str = None
|
||||
language: str = None
|
||||
DOI: str = None
|
||||
ISSN: str = None
|
||||
shortTitle: str = None
|
||||
url: str = None
|
||||
accessDate: str = None
|
||||
archive: str = None
|
||||
archiveLocation: str = None
|
||||
libraryCatalog: str = None
|
||||
callNumber: str = None
|
||||
rights: str = None
|
||||
extra: str = None
|
||||
tags = list
|
||||
collections = list
|
||||
relations = dict
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
ret = {}
|
||||
for key, value in self.__dict__.items():
|
||||
if value:
|
||||
ret[key] = value
|
||||
return ret
|
||||
|
||||
def assign(self, book: dict) -> None:
|
||||
for key, value in book.__dict__.items():
|
||||
if key in self.__dict__.keys():
|
||||
try:
|
||||
setattr(self, key, value)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
|
||||
class ZoteroController:
|
||||
zoterocfg = settings.zotero
|
||||
|
||||
def __init__(self):
|
||||
if self.zoterocfg.library_id is None:
|
||||
return
|
||||
self.zot = zotero.Zotero( # type: ignore
|
||||
self.zoterocfg.library_id,
|
||||
self.zoterocfg.library_type,
|
||||
self.zoterocfg.api_key,
|
||||
)
|
||||
|
||||
def get_books(self) -> list:
|
||||
ret = []
|
||||
items = self.zot.top() # type: ignore
|
||||
for item in items:
|
||||
if item["data"]["itemType"] == "book":
|
||||
ret.append(item)
|
||||
return ret
|
||||
|
||||
# create item in zotero
|
||||
# item is a part of a book
|
||||
def __get_data(self, isbn) -> dict:
|
||||
web = WebRequest()
|
||||
web.get_ppn(isbn)
|
||||
data = web.get_data_elsa()
|
||||
bib = BibTextTransformer()
|
||||
bib.get_data(data)
|
||||
book = bib.return_data()
|
||||
return book
|
||||
|
||||
# # #print(zot.item_template("bookSection"))
|
||||
def createBook(self, isbn) -> Book:
|
||||
book = self.__get_data(isbn)
|
||||
|
||||
bookdata = Book()
|
||||
bookdata.title = book.title.split(":")[0]
|
||||
bookdata.ISBN = book.isbn
|
||||
bookdata.language = book.language
|
||||
bookdata.date = book.year
|
||||
bookdata.publisher = book.publisher
|
||||
bookdata.url = book.link
|
||||
bookdata.edition = book.edition
|
||||
bookdata.place = book.place
|
||||
bookdata.numPages = book.pages
|
||||
authors = [
|
||||
Creator().from_string(author).__dict__ for author in book.author.split(";")
|
||||
]
|
||||
authors = [author for author in authors if author["lastName"] is not None]
|
||||
bookdata.creators = authors
|
||||
return bookdata
|
||||
|
||||
def createItem(self, item) -> Optional[str]:
|
||||
resp = self.zot.create_items([item]) # type: ignore
|
||||
if "successful" in resp.keys():
|
||||
log.debug(resp)
|
||||
return resp["successful"]["0"]["key"]
|
||||
else:
|
||||
return None
|
||||
|
||||
def deleteItem(self, key) -> None:
|
||||
items = self.zot.items()
|
||||
for item in items:
|
||||
if item["key"] == key:
|
||||
self.zot.delete_item(item) # type: ignore
|
||||
# #print(item)
|
||||
break
|
||||
|
||||
def createHGSection(self, book: Book, data: dict) -> Optional[str]:
|
||||
log.debug(book)
|
||||
chapter = BookSection()
|
||||
chapter.assign(book)
|
||||
chapter.pages = data["pages"]
|
||||
chapter.itemType = "bookSection"
|
||||
chapter.ISBN = ""
|
||||
chapter.url = ""
|
||||
chapter.title = data["chapter_title"]
|
||||
creators = chapter.creators
|
||||
for creator in creators:
|
||||
creator["creatorType"] = "editor"
|
||||
chapter.creators = creators
|
||||
authors = [
|
||||
Creator().from_string(author).__dict__
|
||||
for author in data["section_author"].split(";")
|
||||
]
|
||||
chapter.creators += authors
|
||||
|
||||
log.debug(chapter.to_dict())
|
||||
return self.createItem(chapter.to_dict())
|
||||
pass
|
||||
|
||||
def createBookSection(self, book: Book, data: dict) -> Optional[str]:
|
||||
chapter = BookSection()
|
||||
chapter.assign(book)
|
||||
chapter.pages = data["pages"]
|
||||
chapter.itemType = "bookSection"
|
||||
chapter.ISBN = ""
|
||||
chapter.url = ""
|
||||
chapter.title = ""
|
||||
return self.createItem(chapter.to_dict())
|
||||
# chapter.creators
|
||||
|
||||
def createJournalArticle(self, journal, article) -> Optional[str]:
|
||||
# #print(type(article))
|
||||
journalarticle = JournalArticle()
|
||||
journalarticle.assign(journal)
|
||||
journalarticle.itemType = "journalArticle"
|
||||
journalarticle.creators = [
|
||||
Creator().from_string(author).__dict__
|
||||
for author in article["section_author"].split(";")
|
||||
]
|
||||
journalarticle.date = article["year"]
|
||||
journalarticle.title = article["chapter_title"]
|
||||
journalarticle.publicationTitle = article["work_title"].split(":")[0].strip()
|
||||
journalarticle.pages = article["pages"]
|
||||
journalarticle.ISSN = article["isbn"]
|
||||
journalarticle.issue = article["issue"]
|
||||
journalarticle.url = article["isbn"]
|
||||
|
||||
# #print(journalarticle.to_dict())
|
||||
|
||||
return self.createItem(journalarticle.to_dict())
|
||||
|
||||
def get_citation(self, item) -> str:
|
||||
title = self.zot.item( # type: ignore
|
||||
item,
|
||||
content="bib",
|
||||
style="deutsche-gesellschaft-fur-psychologie",
|
||||
)[0]
|
||||
# title = title[0]
|
||||
title = (
|
||||
title.replace("<i>", "")
|
||||
.replace("</i>", "")
|
||||
.replace('<div class="csl-entry">', "")
|
||||
.replace("</div>", "")
|
||||
.replace("&", "&")
|
||||
)
|
||||
return title
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
zot = ZoteroController()
|
||||
book = zot.createBook("DV 3000 D649 (4)")
|
||||
row = "Döbert, Hans & Hörner, Wolfgang & Kopp, Bortho von & Reuter, Lutz R."
|
||||
zot.createBookSection()
|
||||
|
||||
# book = Book()
|
||||
# # # book.
|
||||
# ISBN = "9783801718718"
|
||||
# book = createBook(isbn=ISBN)
|
||||
# chapter = BookSection()
|
||||
# chapter.title = "Geistige Behinderung"
|
||||
# chapter.bookTitle = book.title
|
||||
# chapter.pages = "511 - 538"
|
||||
# chapter.publisher = book.publisher
|
||||
# authors = [
|
||||
# Creator("Jennifer M.", "Phillips").__dict__,
|
||||
# Creator("Hower", "Kwon").__dict__,
|
||||
# Creator("Carl", "Feinstein").__dict__,
|
||||
# Creator("Inco", "Spintczok von Brisinski").__dict__,
|
||||
# ]
|
||||
# publishers = book.author
|
||||
# if isinstance(publishers, str):
|
||||
# publishers = [publishers]
|
||||
# for publisher in publishers:
|
||||
# # #print(publisher)
|
||||
# creator = Creator().from_string(publisher)
|
||||
# creator.creatorType = "editor"
|
||||
# authors.append(creator.__dict__)
|
||||
|
||||
# chapter.creators = authors
|
||||
# chapter.publisher = book.publisher
|
||||
# # #print(chapter.to_dict())
|
||||
# createBookSection(chapter.to_dict())
|
||||
# get_citation("9ZXH8DDE")
|
||||
# # # #print()
|
||||
# # #print(get_books())
|
||||
# # #print(zot.item_creator_types("bookSection"))
|
||||
6
src/shared/__init__.py
Normal file
6
src/shared/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
"""Shared utilities and cross-cutting concerns."""
|
||||
|
||||
from .logging import log
|
||||
from .config import Settings, load_config
|
||||
|
||||
__all__ = ["log", "Settings", "load_config"]
|
||||
66
src/shared/config.py
Normal file
66
src/shared/config.py
Normal file
@@ -0,0 +1,66 @@
|
||||
"""Application configuration and settings."""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
from src.shared.logging import log
|
||||
|
||||
|
||||
@dataclass
|
||||
class Settings:
|
||||
"""Settings for the application."""
|
||||
|
||||
save_path: str
|
||||
database_name: str
|
||||
database_path: str
|
||||
bib_id: str = ""
|
||||
default_apps: bool = True
|
||||
custom_applications: list[dict[str, Any]] = field(default_factory=list)
|
||||
|
||||
def save_settings(self, config_path: str | Path = "config.yaml") -> None:
|
||||
"""Save the settings to the config file.
|
||||
|
||||
Args:
|
||||
config_path: Path to the configuration file
|
||||
"""
|
||||
try:
|
||||
with open(config_path, "w") as f:
|
||||
yaml.dump(self.__dict__, f)
|
||||
log.info(f"Settings saved to {config_path}")
|
||||
except Exception as e:
|
||||
log.error(f"Failed to save settings: {e}")
|
||||
raise
|
||||
|
||||
@classmethod
|
||||
def load_settings(cls, config_path: str | Path = "config.yaml") -> dict[str, Any]:
|
||||
"""Load the settings from the config file.
|
||||
|
||||
Args:
|
||||
config_path: Path to the configuration file
|
||||
|
||||
Returns:
|
||||
Dictionary containing the loaded settings
|
||||
"""
|
||||
try:
|
||||
with open(config_path, "r") as f:
|
||||
data = yaml.safe_load(f)
|
||||
log.info(f"Settings loaded from {config_path}")
|
||||
return data
|
||||
except Exception as e:
|
||||
log.error(f"Failed to load settings: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def load_config(config_path: str | Path = "config.yaml") -> dict[str, Any]:
|
||||
"""Convenience function to load configuration.
|
||||
|
||||
Args:
|
||||
config_path: Path to the configuration file
|
||||
|
||||
Returns:
|
||||
Dictionary containing the loaded settings
|
||||
"""
|
||||
return Settings.load_settings(config_path)
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
from PySide6 import QtCore, QtGui, QtWidgets
|
||||
|
||||
from src.logic.dataclass import BookData
|
||||
from src.core.models import BookData
|
||||
|
||||
|
||||
class Ui_Dialog(object):
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
from PySide6 import QtCore, QtGui, QtWidgets
|
||||
|
||||
from src.logic.webrequest import BibTextTransformer, WebRequest
|
||||
from src.services.webrequest import BibTextTransformer, WebRequest
|
||||
|
||||
|
||||
class Ui_Dialog(object):
|
||||
|
||||
@@ -10,8 +10,8 @@ import hashlib
|
||||
|
||||
from PySide6 import QtCore, QtWidgets
|
||||
|
||||
from src.backend.admin_console import AdminCommands
|
||||
from src.backend.database import Database
|
||||
from src.admin import AdminCommands
|
||||
from src.database import Database
|
||||
|
||||
|
||||
class Ui_Dialog(object):
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from PySide6 import QtWidgets
|
||||
|
||||
from src.logic.dataclass import BookData
|
||||
from src.core.models import BookData
|
||||
|
||||
from .dialog_sources.edit_bookdata_ui import Ui_Dialog
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ from typing import Any
|
||||
from PySide6 import QtCore, QtWidgets
|
||||
|
||||
from src import Icon
|
||||
from src.backend.database import Database
|
||||
from src.database import Database
|
||||
|
||||
from .dialog_sources.deletedialog_ui import Ui_Dialog
|
||||
|
||||
|
||||
@@ -2,8 +2,8 @@ from natsort import natsorted
|
||||
from PySide6 import QtWidgets
|
||||
|
||||
from src import Icon
|
||||
from src.backend import Database
|
||||
from src.logic import Semester
|
||||
from src.database import Database
|
||||
from src.core.models import Semester
|
||||
from src.utils.richtext import SemapSchilder, SemesterDocument
|
||||
|
||||
from .dialog_sources.documentprint_ui import Ui_Dialog
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
from PySide6 import QtWidgets
|
||||
|
||||
from src import Icon
|
||||
from src.logic.webrequest import BibTextTransformer, WebRequest
|
||||
from src.logic.zotero import ZoteroController
|
||||
from src.services.webrequest import BibTextTransformer, WebRequest
|
||||
from src.services.zotero import ZoteroController
|
||||
from src.shared.logging import log
|
||||
from src.transformers.transformers import DictToTable
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from PySide6 import QtWidgets
|
||||
|
||||
from src.logic.webrequest import BibTextTransformer, WebRequest
|
||||
from src.services.webrequest import BibTextTransformer, WebRequest
|
||||
|
||||
from .dialog_sources.Ui_fileparser import Ui_Dialog
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import loguru
|
||||
from PySide6 import QtCore, QtWidgets
|
||||
|
||||
from src import LOG_DIR, Icon
|
||||
from src.backend.database import Database
|
||||
from src.database import Database
|
||||
|
||||
from .dialog_sources.login_ui import Ui_Dialog
|
||||
|
||||
@@ -75,7 +75,7 @@ class LoginDialog(Ui_Dialog):
|
||||
|
||||
hashed_password = hashlib.sha256(password.encode()).hexdigest()
|
||||
if len(self.db.getUsers()) == 0:
|
||||
from src.backend.admin_console import AdminCommands
|
||||
from src.admin import AdminCommands
|
||||
|
||||
AdminCommands().create_admin()
|
||||
self.lresult = 1 # Indicate successful login
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from PySide6 import QtCore, QtWidgets
|
||||
|
||||
from src.backend.catalogue import Catalogue
|
||||
from src.backend.database import Database
|
||||
from src.services.catalogue import Catalogue
|
||||
from src.database import Database
|
||||
from src.ui.dialogs.mail import Mail_Dialog
|
||||
|
||||
from .dialog_sources.order_neweditions_ui import Ui_Dialog
|
||||
|
||||
@@ -4,7 +4,7 @@ import loguru
|
||||
from PySide6 import QtWidgets
|
||||
|
||||
from src import LOG_DIR
|
||||
from src.backend import AutoAdder
|
||||
from src.background import AutoAdder
|
||||
|
||||
from .dialog_sources.parsed_titles_ui import Ui_Form
|
||||
|
||||
|
||||
@@ -5,9 +5,9 @@ from PySide6 import QtCore
|
||||
from PySide6.QtWidgets import QDialog, QPushButton, QVBoxLayout
|
||||
from qtqdm import Qtqdm, QtqdmProgressBar
|
||||
|
||||
from src.logic import BookData
|
||||
from src.logic.lehmannsapi import LehmannsClient
|
||||
from src.logic.SRU import SWB
|
||||
from src.core.models import BookData
|
||||
from src.services.lehmanns import LehmannsClient
|
||||
from src.services.sru import SWB
|
||||
|
||||
|
||||
class CheckThread(QtCore.QThread):
|
||||
|
||||
@@ -15,24 +15,27 @@ from PySide6.QtGui import QRegularExpressionValidator
|
||||
from PySide6.QtMultimedia import QAudioOutput, QMediaPlayer
|
||||
|
||||
from src import Icon
|
||||
from src.backend import (
|
||||
from src.database import Database
|
||||
from src.background import (
|
||||
AvailChecker,
|
||||
BookGrabber,
|
||||
Database,
|
||||
DocumentationThread,
|
||||
NewEditionCheckerThread,
|
||||
)
|
||||
from src.backend.create_file import recreateFile
|
||||
from src.backend.delete_temp_contents import delete_temp_contents as tempdelete
|
||||
from src.logic import (
|
||||
APP_NRS,
|
||||
from src.utils.files import recreateFile, delete_temp_contents as tempdelete
|
||||
from src.core.models import (
|
||||
Apparat,
|
||||
ApparatData,
|
||||
BookData,
|
||||
Prof,
|
||||
SemapDocument,
|
||||
Semester,
|
||||
)
|
||||
from src.core.constants import APP_NRS
|
||||
from src.parsers import (
|
||||
csv_to_list,
|
||||
)
|
||||
from src.logic import (
|
||||
eml_to_semap,
|
||||
pdf_to_semap,
|
||||
word_to_semap,
|
||||
|
||||
@@ -5,7 +5,7 @@ from PySide6 import QtCore, QtWidgets
|
||||
from PySide6.QtCore import QDate
|
||||
from PySide6.QtGui import QColor, QPen
|
||||
|
||||
from src.backend import Database
|
||||
from src.database import Database
|
||||
from src.shared.logging import log
|
||||
|
||||
color = "#ddfb00" if darkdetect.isDark() else "#2204ff"
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
from PySide6 import QtWidgets
|
||||
from PySide6.QtCore import Signal
|
||||
from .widget_sources.admin_create_user_ui import Ui_Dialog
|
||||
from src.backend import AdminCommands, Database
|
||||
from src.admin import AdminCommands
|
||||
from src.database import Database
|
||||
|
||||
|
||||
class UserCreate(QtWidgets.QDialog, Ui_Dialog):
|
||||
|
||||
@@ -4,8 +4,8 @@ import loguru
|
||||
from PySide6 import QtWidgets
|
||||
|
||||
from src import LOG_DIR
|
||||
from src.backend import Database
|
||||
from src.logic import Prof
|
||||
from src.database import Database
|
||||
from src.core.models import Prof
|
||||
|
||||
from .widget_sources.admin_edit_prof_ui import Ui_Dialog #
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from PySide6 import QtWidgets
|
||||
|
||||
from src.backend import AdminCommands, Database
|
||||
from src.admin import AdminCommands
|
||||
from src.database import Database
|
||||
|
||||
from .widget_sources.admin_edit_user_ui import Ui_Dialog
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from PySide6 import QtCore, QtWidgets
|
||||
|
||||
from src import Icon
|
||||
from src.backend import Database
|
||||
from src.database import Database
|
||||
|
||||
from .widget_sources. import Ui_Form
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@ from PySide6 import QtWidgets
|
||||
from PySide6.QtCore import Signal
|
||||
|
||||
from src import Icon
|
||||
from src.backend.database import Database
|
||||
from src.database import Database
|
||||
|
||||
from .widget_sources.calendar_entry_ui import Ui_Dialog
|
||||
|
||||
|
||||
@@ -5,8 +5,10 @@ from PySide6.QtCore import QDate
|
||||
from PySide6.QtGui import QRegularExpressionValidator
|
||||
|
||||
from src import Icon
|
||||
from src.backend import Database, recreateElsaFile
|
||||
from src.logic import Prof, Semester, elsa_word_to_csv
|
||||
from src.database import Database
|
||||
from src.utils.files import recreateElsaFile
|
||||
from src.core.models import Prof, Semester
|
||||
from src.logic import elsa_word_to_csv
|
||||
from src.shared.logging import log
|
||||
from src.ui.dialogs import ElsaAddEntry, popus_confirm
|
||||
from src.ui.widgets.filepicker import FilePicker
|
||||
|
||||
@@ -5,7 +5,7 @@ from PySide6 import QtCore, QtGui, QtWidgets
|
||||
from PySide6.QtCharts import QCategoryAxis, QChart, QChartView, QLineSeries, QValueAxis
|
||||
from PySide6.QtGui import QColor, QPainter, QPen
|
||||
|
||||
from src.logic.semester import Semester
|
||||
from src.core.models import Semester
|
||||
|
||||
|
||||
def mergedicts(d1: dict[str, Any], d2: dict[str, Any]):
|
||||
@@ -101,7 +101,7 @@ class DataQtGraph(QtWidgets.QWidget):
|
||||
|
||||
self.chart.createDefaultAxes()
|
||||
for entry in lst:
|
||||
# print("entry:", entry)
|
||||
print("entry:", entry)
|
||||
entryseries = QLineSeries()
|
||||
for x_val, y_val in zip(entry["x"], entry["y"]):
|
||||
#
|
||||
|
||||
@@ -4,8 +4,8 @@ from PySide6 import QtWidgets
|
||||
from PySide6.QtCore import Qt
|
||||
|
||||
from src import Icon
|
||||
from src.backend.catalogue import Catalogue
|
||||
from src.logic import BookData
|
||||
from src.services.catalogue import Catalogue
|
||||
from src.core.models import BookData
|
||||
|
||||
from .widget_sources.new_edition_check_book_ui import (
|
||||
Ui_Dialog as Ui_NewEditionCheckBook,
|
||||
|
||||
@@ -4,9 +4,9 @@ from natsort import natsorted
|
||||
from PySide6 import QtCore, QtGui, QtWidgets
|
||||
from PySide6.QtCore import Signal
|
||||
|
||||
from src.backend import Database
|
||||
from src.logic import BookData, Prof, Semester, custom_sort, sort_semesters_list
|
||||
from src.logic.dataclass import Apparat
|
||||
from src.core.models import Apparat, BookData, Prof, Semester
|
||||
from src.database import Database
|
||||
from src.logic import custom_sort, sort_semesters_list
|
||||
from src.shared.logging import log
|
||||
from src.ui.dialogs import ApparatExtendDialog, Mail_Dialog, ReminderDialog
|
||||
from src.ui.widgets import DataQtGraph, StatusWidget
|
||||
@@ -374,6 +374,7 @@ class SearchStatisticPage(QtWidgets.QDialog, Ui_Dialog):
|
||||
"x": [i[0] for i in data],
|
||||
"y": {"Erstellt": [i[1] for i in data], "Gelöscht": [i[2] for i in data]},
|
||||
}
|
||||
log.debug(graph_data)
|
||||
graph = DataQtGraph(
|
||||
title="Erstellte und gelöschte Apparate",
|
||||
data=graph_data,
|
||||
|
||||
@@ -6,10 +6,10 @@ from queue import Empty, Queue
|
||||
from PySide6 import QtCore, QtWidgets
|
||||
from PySide6.QtMultimedia import QAudioOutput, QMediaPlayer
|
||||
|
||||
from src.backend.catalogue import Catalogue
|
||||
from src.backend.database import Database
|
||||
from src.backend.webadis import get_book_medianr
|
||||
from src.logic.SRU import SWB
|
||||
from src.services.catalogue import Catalogue
|
||||
from src.database import Database
|
||||
from src.services.webadis import get_book_medianr
|
||||
from src.services.sru import SWB
|
||||
from src.shared.logging import log
|
||||
|
||||
from .widget_sources.admin_update_signatures_ui import Ui_Dialog
|
||||
|
||||
@@ -5,7 +5,7 @@ from appdirs import AppDirs
|
||||
from PySide6 import QtCore, QtWidgets
|
||||
|
||||
from src import settings
|
||||
from src.backend import Database
|
||||
from src.database import Database
|
||||
from src.shared.logging import log
|
||||
|
||||
from .widget_sources.welcome_wizard_ui import Ui_Wizard
|
||||
@@ -80,7 +80,7 @@ class WelcomeWizard(QtWidgets.QWizard, Ui_Wizard):
|
||||
self.settings_database_name.setText("semesterapparate.db")
|
||||
|
||||
def test_login_data(self):
|
||||
from src.backend import AdminCommands
|
||||
from src.admin import AdminCommands
|
||||
|
||||
log.info("Testing login data for SAM user")
|
||||
db_path = (
|
||||
@@ -109,7 +109,7 @@ class WelcomeWizard(QtWidgets.QWizard, Ui_Wizard):
|
||||
|
||||
def create_sam_user(self):
|
||||
"""Create a SAM user in the database."""
|
||||
from src.backend import AdminCommands
|
||||
from src.admin import AdminCommands
|
||||
|
||||
db_path = (
|
||||
self.settings_database.text() + "/" + self.settings_database_name.text()
|
||||
|
||||
100
src/utils/files.py
Normal file
100
src/utils/files.py
Normal file
@@ -0,0 +1,100 @@
|
||||
"""File operations and management utilities."""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from src import LOG_DIR, settings
|
||||
from src.database import Database
|
||||
from src.shared.logging import log
|
||||
|
||||
|
||||
def recreate_file(name: str, app_id: int, filetype: str, open_file: bool = True) -> Path:
|
||||
"""
|
||||
Recreate a file from the database and optionally open it.
|
||||
|
||||
Args:
|
||||
name: The filename selected by the user.
|
||||
app_id: The ID of the apparatus.
|
||||
filetype: The extension of the file to be created.
|
||||
open_file: Determines if the file should be opened. Defaults to True.
|
||||
|
||||
Returns:
|
||||
Absolute path to the file.
|
||||
"""
|
||||
db = Database()
|
||||
path = db.recreateFile(name, app_id, filetype=filetype)
|
||||
path = Path(path)
|
||||
log.info(f"File created: {path}")
|
||||
|
||||
if open_file:
|
||||
path = path.resolve()
|
||||
if os.getenv("OS") == "Windows_NT":
|
||||
os.startfile(path)
|
||||
else:
|
||||
os.system(f"open {path}")
|
||||
|
||||
return path
|
||||
|
||||
|
||||
# Legacy name for backwards compatibility
|
||||
def recreateFile(name: str, app_id: int, filetype: str, open: bool = True) -> Path:
|
||||
"""Legacy function name - use recreate_file instead."""
|
||||
return recreate_file(name, app_id, filetype, open)
|
||||
|
||||
|
||||
def recreate_elsa_file(filename: str, filetype: str, open_file: bool = True) -> Path:
|
||||
"""
|
||||
Recreate an ELSA file from the database and optionally open it.
|
||||
|
||||
Args:
|
||||
filename: The filename selected by the user.
|
||||
filetype: The file extension.
|
||||
open_file: Determines if the file should be opened. Defaults to True.
|
||||
|
||||
Returns:
|
||||
Absolute path to the file.
|
||||
"""
|
||||
if filename.startswith("(") and filename.endswith(")"):
|
||||
filename = str(filename[1:-1].replace("'", ""))
|
||||
|
||||
if not isinstance(filename, str):
|
||||
raise ValueError("filename must be a string")
|
||||
|
||||
db = Database()
|
||||
path = db.recreateElsaFile(filename, filetype)
|
||||
path = Path(path)
|
||||
|
||||
if open_file:
|
||||
path = path.resolve()
|
||||
if os.getenv("OS") == "Windows_NT":
|
||||
os.startfile(path)
|
||||
else:
|
||||
os.system(f"open {path}")
|
||||
|
||||
return path
|
||||
|
||||
|
||||
# Legacy name for backwards compatibility
|
||||
def recreateElsaFile(filename: str, filetype: str, open: bool = True) -> Path:
|
||||
"""Legacy function name - use recreate_elsa_file instead."""
|
||||
return recreate_elsa_file(filename, filetype, open)
|
||||
|
||||
|
||||
def delete_temp_contents() -> None:
|
||||
"""Delete the contents of the temp directory."""
|
||||
database = settings.database
|
||||
path = database.temp.expanduser()
|
||||
|
||||
for root, dirs, files in os.walk(path, topdown=False):
|
||||
for file in files:
|
||||
try:
|
||||
os.remove(os.path.join(root, file))
|
||||
except Exception as e:
|
||||
log.warning(f"Could not remove file {file}: {e}")
|
||||
for dir in dirs:
|
||||
try:
|
||||
os.rmdir(os.path.join(root, dir))
|
||||
except Exception as e:
|
||||
log.warning(f"Could not remove directory {dir}: {e}")
|
||||
|
||||
log.info(f"Temp directory cleared: {path}")
|
||||
Reference in New Issue
Block a user