From 7e07bdea0c27094f762c6a5498b56fa88e1aa028 Mon Sep 17 00:00:00 2001 From: WorldTeacher Date: Tue, 7 Oct 2025 14:42:40 +0200 Subject: [PATCH] commit AI suggested performance enhancements --- src/backend/threads_availchecker.py | 18 +++++++++------ src/logic/SRU.py | 34 ++++++++++++++++++++++------- src/ui/dialogs/progress.py | 7 ++++-- 3 files changed, 42 insertions(+), 17 deletions(-) diff --git a/src/backend/threads_availchecker.py b/src/backend/threads_availchecker.py index 9014ccd..f7193c6 100644 --- a/src/backend/threads_availchecker.py +++ b/src/backend/threads_availchecker.py @@ -1,5 +1,4 @@ import sys -import time # from src.transformers import RDS_AVAIL_DATA import loguru @@ -23,7 +22,11 @@ class AvailChecker(QThread): updateProgress = Signal(int, int) def __init__( - self, links: list = None, appnumber: int = None, parent=None, books=list[dict] + self, + links: list[str] | None = None, + appnumber: int | None = None, + parent=None, + books: list[dict] | None = None, ): if links is None: links = [] @@ -38,11 +41,13 @@ class AvailChecker(QThread): ) self.links = links self.appnumber = appnumber - self.books = books + self.books = books or [] log.info( f"Started worker with appnumber: {self.appnumber} and links: {self.links} and {len(self.books)} books..." ) - time.sleep(2) + # Pre-create reusable request and transformer to avoid per-item overhead + self._request = WebRequest().set_apparat(self.appnumber) + self._rds_transformer = BibTextTransformer("RDS") def run(self): self.db = Database() @@ -50,9 +55,8 @@ class AvailChecker(QThread): count = 0 for link in self.links: log.info("Processing entry: " + str(link)) - data = WebRequest().set_apparat(self.appnumber).get_ppn(link).get_data() - transformer = BibTextTransformer("RDS") - rds = transformer.get_data(data).return_data("rds_availability") + data = self._request.get_ppn(link).get_data() + rds = self._rds_transformer.get_data(data).return_data("rds_availability") book_id = None if not rds or not rds.items: diff --git a/src/logic/SRU.py b/src/logic/SRU.py index 0141217..3a5ef9e 100644 --- a/src/logic/SRU.py +++ b/src/logic/SRU.py @@ -1,3 +1,4 @@ +import re import sys import xml.etree.ElementTree as ET from dataclasses import dataclass, field @@ -7,6 +8,7 @@ from typing import Dict, Iterable, List, Optional, Tuple import loguru import requests +from requests.adapters import HTTPAdapter from src import LOG_DIR from src.logic.dataclass import BookData @@ -556,7 +558,22 @@ class Api: self.site = site self.url = url self.prefix = prefix - pass + # Reuse TCP connections across requests for better performance + self._session = requests.Session() + # Slightly larger connection pool for concurrent calls + adapter = HTTPAdapter(pool_connections=10, pool_maxsize=20) + self._session.mount("http://", adapter) + self._session.mount("https://", adapter) + + def close(self): + try: + self._session.close() + except Exception: + pass + + def __del__(self): + # Best-effort cleanup + self.close() def get(self, query_args: Iterable[str]) -> List[Record]: # if any query_arg ends with =, remove it @@ -580,18 +597,18 @@ class Api: "Accept": "application/xml", "Accept-Charset": "latin1,utf-8;q=0.7,*;q=0.3", } - response = requests.get(url, headers=headers) + # Use persistent session and set timeouts to avoid hanging + response = self._session.get(url, headers=headers, timeout=(3.05, 20)) if response.status_code != 200: raise Exception(f"Error fetching data from SWB: {response.status_code}") - data = response.content - - # extract top-level response - response = parse_search_retrieve_response(data) + # extract top-level response (decode to text for the XML parser) + response = parse_search_retrieve_response(response.text) return response.records def getBooks(self, query_args: Iterable[str]) -> List[BookData]: records: List[Record] = self.get(query_args) - print(f"{self.site} found {len(records)} records") + # Avoid printing on hot paths; rely on logger if needed + log.debug(f"{self.site} found {len(records)} records for args={query_args}") books: List[BookData] = [] # extract title from query_args if present title = None @@ -611,7 +628,8 @@ class Api: return books def getLinkForBook(self, book: BookData) -> str: - results = self.getBooks() + # Not implemented: depends on catalog front-end; return empty string for now + return "" class SWB(Api): diff --git a/src/ui/dialogs/progress.py b/src/ui/dialogs/progress.py index 3d83f26..cdeab3b 100644 --- a/src/ui/dialogs/progress.py +++ b/src/ui/dialogs/progress.py @@ -32,6 +32,7 @@ class CheckThread(QtCore.QThread): range(len(self.items)), unit_scale=True, ) + swb_client = SWB() for i in tqdm_object: book: BookData = self.items[i] author = ( @@ -43,7 +44,7 @@ class CheckThread(QtCore.QThread): # remove trailing punctuation from title title = book.title.rstrip(" .:,;!?") response: list[BookData] = [] - response = SWB().getBooks( + response = swb_client.getBooks( [ "pica.bib=20735", f"pica.tit={title.split(':')[0].strip()}", @@ -88,4 +89,6 @@ class ProgressDialog(QDialog): layout.addWidget(self.start_button) def start(self): - + # Start logic is managed externally; keep method for UI wiring + pass +