dev #21
@@ -1,11 +1,12 @@
|
||||
from PySide6.QtCore import QThread
|
||||
from PySide6.QtCore import Signal
|
||||
from src.backend import Database
|
||||
|
||||
from src.logic.webrequest import BibTextTransformer, WebRequest
|
||||
import loguru
|
||||
import sys
|
||||
|
||||
import loguru
|
||||
from PySide6.QtCore import QThread, Signal
|
||||
|
||||
from src import LOG_DIR
|
||||
from src.backend import Database
|
||||
from src.logic.webrequest import BibTextTransformer, WebRequest
|
||||
|
||||
log = loguru.logger
|
||||
log.remove()
|
||||
log.add(sys.stdout, level="INFO")
|
||||
@@ -31,9 +32,11 @@ class BookGrabber(QThread):
|
||||
self.book_id = None
|
||||
self.use_any = False
|
||||
self.use_exact = False
|
||||
self.app_id = None
|
||||
self.app_nr = None
|
||||
self.tstate = (self.app_id, self.prof_id, self.mode, self.data)
|
||||
self.request = WebRequest()
|
||||
self.db = Database()
|
||||
|
||||
|
||||
def add_values(
|
||||
self, app_id: int, prof_id: int, mode: str, data, any_book=False, exact=False
|
||||
@@ -45,13 +48,13 @@ class BookGrabber(QThread):
|
||||
self.use_any = any_book
|
||||
self.use_exact = exact
|
||||
log.info(f"Working on {len(self.data)} entries")
|
||||
self.tstate = (self.app_id, self.prof_id, self.mode, self.data)
|
||||
self.tstate = (self.app_nr, self.prof_id, self.mode, self.data)
|
||||
log.debug("State: " + str(self.tstate))
|
||||
self.request.set_apparat(self.app_id)
|
||||
app_nr = self.db.query_db("SELECT appnr FROM semesterapparat WHERE id = ?", (self.app_id,))[0][0]
|
||||
self.request.set_apparat(app_nr)
|
||||
# log.debug(self.tstate)
|
||||
|
||||
def run(self):
|
||||
self.db = Database()
|
||||
item = 0
|
||||
iterdata = self.data
|
||||
# log.debug(iterdata)
|
||||
@@ -91,7 +94,7 @@ class BookGrabber(QThread):
|
||||
state = 0
|
||||
for result in transformer.RDS_DATA:
|
||||
# log.debug(result.RDS_LOCATION)
|
||||
if str(self.app_id) in result.RDS_LOCATION:
|
||||
if str(self.app_nr) in result.RDS_LOCATION:
|
||||
state = 1
|
||||
break
|
||||
|
||||
@@ -126,27 +129,27 @@ class BookGrabberTest(QThread):
|
||||
self.is_Running = True
|
||||
log.info("Starting worker thread")
|
||||
self.data = None
|
||||
self.app_id = None
|
||||
self.app_nr = None
|
||||
self.prof_id = None
|
||||
self.mode = None
|
||||
self.book_id = None
|
||||
self.use_any = False
|
||||
self.use_exact = False
|
||||
self.app_id = appnr
|
||||
self.tstate = (self.app_id, self.prof_id, self.mode, self.data)
|
||||
self.app_nr = appnr
|
||||
self.tstate = (self.app_nr, self.prof_id, self.mode, self.data)
|
||||
self.results = []
|
||||
|
||||
def add_values(
|
||||
self, app_id: int, prof_id: int, mode: str, data, any_book=False, exact=False
|
||||
self, app_nr: int, prof_id: int, mode: str, data, any_book=False, exact=False
|
||||
):
|
||||
self.app_id = app_id
|
||||
self.app_nr = app_nr
|
||||
self.prof_id = prof_id
|
||||
self.mode = mode
|
||||
self.data = data
|
||||
self.use_any = any_book
|
||||
self.use_exact = exact
|
||||
log.info(f"Working on {len(self.data)} entries")
|
||||
self.tstate = (self.app_id, self.prof_id, self.mode, self.data)
|
||||
self.tstate = (self.app_nr, self.prof_id, self.mode, self.data)
|
||||
log.debug("State: " + str(self.tstate))
|
||||
# log.debug(self.tstate)
|
||||
|
||||
@@ -159,7 +162,7 @@ class BookGrabberTest(QThread):
|
||||
signature = str(entry)
|
||||
log.info("Processing entry: " + signature)
|
||||
|
||||
webdata = WebRequest().set_apparat(self.app_id).get_ppn(entry)
|
||||
webdata = WebRequest().set_apparat(self.app_nr).get_ppn(entry)
|
||||
if self.use_any:
|
||||
webdata = webdata.use_any_book
|
||||
webdata = webdata.get_data()
|
||||
@@ -186,7 +189,7 @@ class BookGrabberTest(QThread):
|
||||
state = 0
|
||||
for result in transformer.RDS_DATA:
|
||||
# log.debug(result.RDS_LOCATION)
|
||||
if str(self.app_id) in result.RDS_LOCATION:
|
||||
if str(self.app_nr) in result.RDS_LOCATION:
|
||||
state = 1
|
||||
break
|
||||
|
||||
|
||||
@@ -1,19 +1,26 @@
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from datetime import datetime
|
||||
from math import ceil
|
||||
from queue import Empty, Queue
|
||||
from typing import List, Optional, Set, Union
|
||||
from time import monotonic # <-- NEW
|
||||
from typing import List, Optional
|
||||
|
||||
import loguru
|
||||
from PySide6.QtCore import QThread, Signal
|
||||
|
||||
from src import LOG_DIR
|
||||
|
||||
# from src.logic.webrequest import BibTextTransformer, WebRequest
|
||||
from src.backend.catalogue import Catalogue
|
||||
from src.logic import BookData
|
||||
from src.logic.lehmannsapi import LehmannsClient
|
||||
from src.logic.swb import SWB
|
||||
from src.logic.SRU import SWB
|
||||
|
||||
# use all available cores - 2, but at least 1
|
||||
THREAD_COUNT = max(os.cpu_count() - 2, 1)
|
||||
THREAD_MIN_ITEMS = 5
|
||||
|
||||
log = loguru.logger
|
||||
log.remove()
|
||||
@@ -23,89 +30,136 @@ log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
|
||||
log.add(
|
||||
f"{LOG_DIR}/{datetime.now().strftime('%Y-%m-%d')}.log",
|
||||
rotation="1 day",
|
||||
retention="1 month",
|
||||
retention="7 days",
|
||||
)
|
||||
|
||||
swb = SWB()
|
||||
dnb = SWB()
|
||||
cat = Catalogue()
|
||||
|
||||
def _norm_text(s: Optional[str]) -> str:
|
||||
if not s:
|
||||
return ""
|
||||
# lowercase, collapse whitespace, drop some punctuation
|
||||
s = s.lower()
|
||||
s = re.sub(r"[\s\-\u2013\u2014]+", " ", s) # spaces/dashes
|
||||
s = re.sub(r"[\"'`:.,;!?()\[\]{}]", "", s)
|
||||
return s.strip()
|
||||
RVK_ALLOWED = r"[A-Z0-9.\-\/]" # conservative RVK character set
|
||||
|
||||
|
||||
def _same_book(a: BookData, b: BookData) -> bool:
|
||||
"""Heuristic: same if ISBNs intersect; fallback to (title, author, year) normalized."""
|
||||
isbns_a = _norm_isbns(a.isbn)
|
||||
isbns_b = _norm_isbns(b.isbn)
|
||||
if isbns_a and isbns_b and (isbns_a & isbns_b):
|
||||
return True
|
||||
|
||||
ta, tb = _norm_text(a.title), _norm_text(b.title)
|
||||
aa, ab = _norm_text(a.author), _norm_text(b.author)
|
||||
ya, yb = (a.year or "").strip(), (b.year or "").strip()
|
||||
|
||||
# strong title match required; then author if available; then year if available
|
||||
if ta and tb and ta == tb:
|
||||
if aa and ab and aa == ab:
|
||||
if ya and yb:
|
||||
return ya == yb
|
||||
return True
|
||||
if ya and yb:
|
||||
return ya == yb
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _norm_isbns(value: Union[str, List[str], None]) -> Set[str]:
|
||||
"""Return a set of 10/13-digit ISBNs (digits only, keep X for ISBN-10 if present)."""
|
||||
if value is None:
|
||||
return set()
|
||||
vals = value if isinstance(value, list) else [value]
|
||||
out: Set[str] = set()
|
||||
for v in vals:
|
||||
s = str(v)
|
||||
digits = re.sub(r"[^0-9Xx]", "", s)
|
||||
# keep 13-digit or 10-digit tokens
|
||||
m13 = re.findall(r"97[89]\d{10}", digits)
|
||||
if m13:
|
||||
out.update(m13)
|
||||
else:
|
||||
m10 = re.findall(r"\d{9}[0-9Xx]", digits)
|
||||
out.update(x.upper() for x in m10)
|
||||
return out
|
||||
|
||||
|
||||
def filter_prefer_swb(records: List[BookData]) -> List[BookData]:
|
||||
def find_newer_edition(
|
||||
swb_result: BookData, dnb_result: List[BookData]
|
||||
) -> Optional[List[BookData]]:
|
||||
"""
|
||||
If an SWB entry with a non-empty signature exists for a book, drop the HTTP(S) duplicate(s).
|
||||
Returns a NEW list (does not mutate the input).
|
||||
New edition if:
|
||||
- year > swb.year OR
|
||||
- edition_number > swb.edition_number
|
||||
BUT: discard any candidate with year < swb.year (if both years are known).
|
||||
|
||||
Same-work check:
|
||||
- Compare RVK roots of signatures (after stripping trailing '+N' and '(N)').
|
||||
- If both have signatures and RVKs differ -> skip.
|
||||
|
||||
Preferences (in order):
|
||||
1) RVK matches SWB
|
||||
2) Print over Online-Ressource
|
||||
3) Has signature
|
||||
4) Newer: (year desc, edition_number desc)
|
||||
"""
|
||||
swb_with_sig = [
|
||||
r
|
||||
for r in records
|
||||
if (r.link == "SWB") and (r.signature and r.signature.strip())
|
||||
]
|
||||
if not swb_with_sig:
|
||||
return list(records)
|
||||
|
||||
to_remove: Set[int] = set()
|
||||
def strip_copy_and_edition(s: str) -> str:
|
||||
s = re.sub(r"\(\s*\d+\s*\)", "", s) # remove '(N)'
|
||||
s = re.sub(r"\s*\+\s*\d+\s*$", "", s) # remove trailing '+N'
|
||||
return s
|
||||
|
||||
# For each URL entry, see if it matches any SWB-with-signature entry
|
||||
for idx, rec in enumerate(records):
|
||||
if not rec.link or not rec.link.lower().startswith("http"):
|
||||
continue
|
||||
for swb in swb_with_sig:
|
||||
if _same_book(swb, rec):
|
||||
to_remove.add(idx)
|
||||
break
|
||||
def extract_rvk_root(sig: Optional[str]) -> str:
|
||||
if not sig:
|
||||
return ""
|
||||
t = strip_copy_and_edition(sig.upper())
|
||||
t = re.sub(r"\s+", " ", t).strip()
|
||||
m = re.match(rf"^([A-Z]{{1,3}}\s*{RVK_ALLOWED}*)", t)
|
||||
if not m:
|
||||
cleaned = re.sub(rf"[^{RVK_ALLOWED} ]+", "", t).strip()
|
||||
return cleaned.split(" ")[0] if cleaned else ""
|
||||
return re.sub(r"\s+", " ", m.group(1)).strip()
|
||||
|
||||
# Build filtered list
|
||||
return [rec for i, rec in enumerate(records) if i not in to_remove]
|
||||
def has_sig(b: BookData) -> bool:
|
||||
return bool(getattr(b, "signature", None))
|
||||
|
||||
def is_online(b: BookData) -> bool:
|
||||
return (getattr(b, "media_type", None) or "").strip() == "Online-Ressource"
|
||||
|
||||
def is_print(b: BookData) -> bool:
|
||||
return not is_online(b)
|
||||
|
||||
def rvk_matches_swb(b: BookData) -> bool:
|
||||
if not has_sig(b) or not has_sig(swb_result):
|
||||
return False
|
||||
return extract_rvk_root(b.signature) == extract_rvk_root(swb_result.signature)
|
||||
|
||||
def strictly_newer(b: BookData) -> bool:
|
||||
# Hard guard: if both years are known and candidate is older, discard
|
||||
if (
|
||||
b.year is not None
|
||||
and swb_result.year is not None
|
||||
and b.year < swb_result.year
|
||||
):
|
||||
return False
|
||||
|
||||
newer_by_year = (
|
||||
b.year is not None
|
||||
and swb_result.year is not None
|
||||
and b.year > swb_result.year
|
||||
)
|
||||
newer_by_edition = (
|
||||
b.edition_number is not None
|
||||
and swb_result.edition_number is not None
|
||||
and b.edition_number > swb_result.edition_number
|
||||
)
|
||||
# Thanks to the guard above, newer_by_edition can't pick something with a smaller year.
|
||||
return newer_by_year or newer_by_edition
|
||||
|
||||
swb_has_sig = has_sig(swb_result)
|
||||
swb_rvk = extract_rvk_root(getattr(swb_result, "signature", None))
|
||||
|
||||
# 1) Filter: same work (by RVK if both have sigs) AND strictly newer
|
||||
candidates: List[BookData] = []
|
||||
for b in dnb_result:
|
||||
if has_sig(b) and swb_has_sig:
|
||||
if extract_rvk_root(b.signature) != swb_rvk:
|
||||
continue # different work
|
||||
if strictly_newer(b):
|
||||
candidates.append(b)
|
||||
|
||||
if not candidates:
|
||||
return None
|
||||
|
||||
# 2) Dedupe by PPN → prefer (rvk-match, is-print, has-signature)
|
||||
def pref_score(x: BookData) -> tuple[int, int, int]:
|
||||
return (
|
||||
1 if rvk_matches_swb(x) else 0,
|
||||
1 if is_print(x) else 0,
|
||||
1 if has_sig(x) else 0,
|
||||
)
|
||||
|
||||
by_ppn: dict[Optional[str], BookData] = {}
|
||||
for b in candidates:
|
||||
key = getattr(b, "ppn", None)
|
||||
prev = by_ppn.get(key)
|
||||
if prev is None or pref_score(b) > pref_score(prev):
|
||||
by_ppn[key] = b
|
||||
|
||||
deduped = list(by_ppn.values())
|
||||
if not deduped:
|
||||
return None
|
||||
|
||||
# 3) Final pick (single best)
|
||||
def sort_key(b: BookData):
|
||||
year = b.year if b.year is not None else -1
|
||||
ed = b.edition_number if b.edition_number is not None else -1
|
||||
return (
|
||||
1 if rvk_matches_swb(b) else 0,
|
||||
1 if is_print(b) else 0,
|
||||
1 if has_sig(b) else 0,
|
||||
year,
|
||||
ed,
|
||||
)
|
||||
|
||||
best = max(deduped, key=sort_key)
|
||||
return [best] if best else None
|
||||
|
||||
|
||||
class NewEditionCheckerThread(QThread):
|
||||
@@ -115,8 +169,8 @@ class NewEditionCheckerThread(QThread):
|
||||
resultsSignal = Signal(list) # list[tuple[BookData, list[BookData]]]
|
||||
|
||||
# NEW: metrics signals
|
||||
rateSignal = Signal(float) # items per second ("it/s")
|
||||
etaSignal = Signal(int) # seconds remaining (-1 when unknown)
|
||||
rateSignal = Signal(float) # items per second ("it/s")
|
||||
etaSignal = Signal(int) # seconds remaining (-1 when unknown)
|
||||
|
||||
def __init__(self, entries: Optional[list["BookData"]] = None, parent=None):
|
||||
super().__init__(parent)
|
||||
@@ -157,54 +211,64 @@ class NewEditionCheckerThread(QThread):
|
||||
def _process_book(
|
||||
cls, book: "BookData"
|
||||
) -> tuple["BookData", list["BookData"]] | None:
|
||||
author = (
|
||||
book.author.split(";")[0].replace(" ", "")
|
||||
if (book.author and ";" in book.author)
|
||||
else (book.author or "").replace(" ", "")
|
||||
)
|
||||
title = cls._clean_title(book.title or "")
|
||||
|
||||
# Query SWB
|
||||
response: list[BookData] = SWB().getBooks(
|
||||
[
|
||||
"pica.bib=20735",
|
||||
f"pica.tit={title.split(':')[0].strip()}",
|
||||
# f"pica.per={author}",
|
||||
]
|
||||
)
|
||||
|
||||
# Remove same PPN
|
||||
response = [entry for entry in response if entry.ppn != book.ppn]
|
||||
for respo in response:
|
||||
respo.link = "SWB"
|
||||
|
||||
# Query Lehmanns
|
||||
with LehmannsClient() as client:
|
||||
results = client.search_by_title(title, strict=True)
|
||||
if results:
|
||||
for res in results:
|
||||
response.append(BookData().from_LehmannsSearchResult(res))
|
||||
|
||||
if not response:
|
||||
"""Process one book; returns (original, [found editions]) or None on failure."""
|
||||
if not book.title:
|
||||
return None
|
||||
|
||||
response = filter_prefer_swb(response)
|
||||
|
||||
# Remove entries matching the same ISBN as the current book
|
||||
response = [
|
||||
entry
|
||||
for entry in response
|
||||
if not (_norm_isbns(entry.isbn) & _norm_isbns(book.isbn))
|
||||
]
|
||||
response = [
|
||||
entry
|
||||
for entry in response
|
||||
if book.publisher in entry.publisher
|
||||
response: list["BookData"] = []
|
||||
query = [
|
||||
f"pica.tit={book.title}",
|
||||
f"pica.vlg={book.publisher}",
|
||||
]
|
||||
|
||||
if not response:
|
||||
return None
|
||||
swb_result = swb.getBooks(["pica.bib=20735", f"pica.ppn={book.ppn}"])[0]
|
||||
dnb_results = swb.getBooks(query)
|
||||
new_editions = find_newer_edition(swb_result, dnb_results)
|
||||
|
||||
if new_editions is not None:
|
||||
for new_edition in new_editions:
|
||||
new_edition.library_location = cat.get_location(new_edition.ppn)
|
||||
try:
|
||||
isbn = (
|
||||
str(new_edition.isbn[0])
|
||||
if isinstance(new_edition.isbn, list)
|
||||
else str(new_edition.isbn)
|
||||
)
|
||||
new_edition.link = (
|
||||
f"https://www.lehmanns.de/search/quick?mediatype_id=2&q={isbn}"
|
||||
)
|
||||
except (IndexError, TypeError):
|
||||
isbn = None
|
||||
new_edition.in_library = cat.in_library(new_edition.ppn)
|
||||
response = new_editions
|
||||
|
||||
# client = SWB()
|
||||
# response: list["BookData"] = []
|
||||
# # First, search by title only
|
||||
# results = client.getBooks([f"pica.title={title}", f"pica.vlg={book.publisher}"])
|
||||
|
||||
# lehmanns = LehmannsClient()
|
||||
# results = lehmanns.search_by_title(title)
|
||||
# for result in results:
|
||||
# if "(eBook)" in result.title:
|
||||
# result.title = result.title.replace("(eBook)", "").strip()
|
||||
# swb_results = client.getBooks(
|
||||
# [
|
||||
# f"pica.tit={result.title}",
|
||||
# f"pica.vlg={result.publisher.split(',')[0]}",
|
||||
# ]
|
||||
# )
|
||||
# for swb in swb_results:
|
||||
# if swb.isbn == result.isbn:
|
||||
# result.ppn = swb.ppn
|
||||
# result.signature = swb.signature
|
||||
# response.append(result)
|
||||
# if (result.edition_number < swb.edition_number) and (
|
||||
# swb.year > result.year
|
||||
# ):
|
||||
# response.append(result)
|
||||
if response == []:
|
||||
return None
|
||||
# Remove duplicates based on ppn
|
||||
return (book, response)
|
||||
|
||||
@classmethod
|
||||
@@ -240,7 +304,7 @@ class NewEditionCheckerThread(QThread):
|
||||
return
|
||||
|
||||
# Up to 4 workers; ~20 items per worker
|
||||
num_workers = min(4, max(1, ceil(total / 20)))
|
||||
num_workers = min(THREAD_COUNT, max(1, ceil(total / THREAD_MIN_ITEMS)))
|
||||
chunks = self._split_evenly(self.entries, num_workers)
|
||||
sizes = [len(ch) for ch in chunks]
|
||||
|
||||
|
||||
@@ -1,13 +1,15 @@
|
||||
import sys
|
||||
import time
|
||||
|
||||
import loguru
|
||||
|
||||
# from icecream import ic
|
||||
from PySide6.QtCore import QThread
|
||||
from PySide6.QtCore import Signal as Signal
|
||||
|
||||
from src.backend import Database
|
||||
import loguru
|
||||
import sys
|
||||
from src import LOG_DIR
|
||||
from src.backend import Database
|
||||
|
||||
log = loguru.logger
|
||||
log.remove()
|
||||
log.add(sys.stdout, level="INFO")
|
||||
@@ -29,8 +31,8 @@ class AutoAdder(QThread):
|
||||
self.app_id = app_id
|
||||
self.prof_id = prof_id
|
||||
|
||||
# print("Launched AutoAdder")
|
||||
# print(self.data, self.app_id, self.prof_id)
|
||||
# #print("Launched AutoAdder")
|
||||
# #print(self.data, self.app_id, self.prof_id)
|
||||
|
||||
def run(self):
|
||||
self.db = Database()
|
||||
@@ -46,7 +48,7 @@ class AutoAdder(QThread):
|
||||
time.sleep(1)
|
||||
|
||||
except Exception as e:
|
||||
# print(e)
|
||||
# #print(e)
|
||||
log.exception(
|
||||
f"The query failed with message {e} for signature {entry}"
|
||||
)
|
||||
|
||||
@@ -1,24 +1,23 @@
|
||||
import sys
|
||||
import time
|
||||
|
||||
# from src.transformers import RDS_AVAIL_DATA
|
||||
import loguru
|
||||
|
||||
# from icecream import ic
|
||||
from PySide6.QtCore import QThread
|
||||
from PySide6.QtCore import Signal as Signal
|
||||
|
||||
from src.backend.database import Database
|
||||
from src import LOG_DIR
|
||||
from src.backend.database import Database
|
||||
from src.logic.webrequest import BibTextTransformer, WebRequest
|
||||
|
||||
# from src.transformers import RDS_AVAIL_DATA
|
||||
import loguru
|
||||
import sys
|
||||
|
||||
log = loguru.logger
|
||||
log.remove()
|
||||
log.add(sys.stdout, level="INFO")
|
||||
log.add(f"{LOG_DIR}/application.log", rotation="1 MB", retention="10 days")
|
||||
|
||||
|
||||
|
||||
class AvailChecker(QThread):
|
||||
updateSignal = Signal(str, int)
|
||||
updateProgress = Signal(int, int)
|
||||
@@ -62,8 +61,8 @@ class AvailChecker(QThread):
|
||||
for item in rds.items:
|
||||
sign = item.superlocation
|
||||
loc = item.location
|
||||
# # print(item.location)
|
||||
if self.appnumber in sign or self.appnumber in loc:
|
||||
# # #print(item.location)
|
||||
if str(self.appnumber) in sign or str(self.appnumber) in loc:
|
||||
state = 1
|
||||
break
|
||||
for book in self.books:
|
||||
@@ -71,7 +70,7 @@ class AvailChecker(QThread):
|
||||
book_id = book["id"]
|
||||
break
|
||||
log.info(f"State of {link}: " + str(state))
|
||||
# print("Updating availability of " + str(book_id) + " to " + str(state))
|
||||
# #print("Updating availability of " + str(book_id) + " to " + str(state))
|
||||
self.db.setAvailability(book_id, state)
|
||||
count += 1
|
||||
self.updateProgress.emit(count, len(self.links))
|
||||
|
||||
Reference in New Issue
Block a user