commit AI suggested performance enhancements

This commit is contained in:
2025-10-07 14:42:40 +02:00
parent 06965db26a
commit 7e07bdea0c
3 changed files with 42 additions and 17 deletions

View File

@@ -1,5 +1,4 @@
import sys
import time
# from src.transformers import RDS_AVAIL_DATA
import loguru
@@ -23,7 +22,11 @@ class AvailChecker(QThread):
updateProgress = Signal(int, int)
def __init__(
self, links: list = None, appnumber: int = None, parent=None, books=list[dict]
self,
links: list[str] | None = None,
appnumber: int | None = None,
parent=None,
books: list[dict] | None = None,
):
if links is None:
links = []
@@ -38,11 +41,13 @@ class AvailChecker(QThread):
)
self.links = links
self.appnumber = appnumber
self.books = books
self.books = books or []
log.info(
f"Started worker with appnumber: {self.appnumber} and links: {self.links} and {len(self.books)} books..."
)
time.sleep(2)
# Pre-create reusable request and transformer to avoid per-item overhead
self._request = WebRequest().set_apparat(self.appnumber)
self._rds_transformer = BibTextTransformer("RDS")
def run(self):
self.db = Database()
@@ -50,9 +55,8 @@ class AvailChecker(QThread):
count = 0
for link in self.links:
log.info("Processing entry: " + str(link))
data = WebRequest().set_apparat(self.appnumber).get_ppn(link).get_data()
transformer = BibTextTransformer("RDS")
rds = transformer.get_data(data).return_data("rds_availability")
data = self._request.get_ppn(link).get_data()
rds = self._rds_transformer.get_data(data).return_data("rds_availability")
book_id = None
if not rds or not rds.items:

View File

@@ -1,3 +1,4 @@
import re
import sys
import xml.etree.ElementTree as ET
from dataclasses import dataclass, field
@@ -7,6 +8,7 @@ from typing import Dict, Iterable, List, Optional, Tuple
import loguru
import requests
from requests.adapters import HTTPAdapter
from src import LOG_DIR
from src.logic.dataclass import BookData
@@ -556,7 +558,22 @@ class Api:
self.site = site
self.url = url
self.prefix = prefix
pass
# Reuse TCP connections across requests for better performance
self._session = requests.Session()
# Slightly larger connection pool for concurrent calls
adapter = HTTPAdapter(pool_connections=10, pool_maxsize=20)
self._session.mount("http://", adapter)
self._session.mount("https://", adapter)
def close(self):
try:
self._session.close()
except Exception:
pass
def __del__(self):
# Best-effort cleanup
self.close()
def get(self, query_args: Iterable[str]) -> List[Record]:
# if any query_arg ends with =, remove it
@@ -580,18 +597,18 @@ class Api:
"Accept": "application/xml",
"Accept-Charset": "latin1,utf-8;q=0.7,*;q=0.3",
}
response = requests.get(url, headers=headers)
# Use persistent session and set timeouts to avoid hanging
response = self._session.get(url, headers=headers, timeout=(3.05, 20))
if response.status_code != 200:
raise Exception(f"Error fetching data from SWB: {response.status_code}")
data = response.content
# extract top-level response
response = parse_search_retrieve_response(data)
# extract top-level response (decode to text for the XML parser)
response = parse_search_retrieve_response(response.text)
return response.records
def getBooks(self, query_args: Iterable[str]) -> List[BookData]:
records: List[Record] = self.get(query_args)
print(f"{self.site} found {len(records)} records")
# Avoid printing on hot paths; rely on logger if needed
log.debug(f"{self.site} found {len(records)} records for args={query_args}")
books: List[BookData] = []
# extract title from query_args if present
title = None
@@ -611,7 +628,8 @@ class Api:
return books
def getLinkForBook(self, book: BookData) -> str:
results = self.getBooks()
# Not implemented: depends on catalog front-end; return empty string for now
return ""
class SWB(Api):

View File

@@ -32,6 +32,7 @@ class CheckThread(QtCore.QThread):
range(len(self.items)),
unit_scale=True,
)
swb_client = SWB()
for i in tqdm_object:
book: BookData = self.items[i]
author = (
@@ -43,7 +44,7 @@ class CheckThread(QtCore.QThread):
# remove trailing punctuation from title
title = book.title.rstrip(" .:,;!?")
response: list[BookData] = []
response = SWB().getBooks(
response = swb_client.getBooks(
[
"pica.bib=20735",
f"pica.tit={title.split(':')[0].strip()}",
@@ -88,4 +89,6 @@ class ProgressDialog(QDialog):
layout.addWidget(self.start_button)
def start(self):
# Start logic is managed externally; keep method for UI wiring
pass