commit AI suggested performance enhancements
This commit is contained in:
@@ -1,5 +1,4 @@
|
||||
import sys
|
||||
import time
|
||||
|
||||
# from src.transformers import RDS_AVAIL_DATA
|
||||
import loguru
|
||||
@@ -23,7 +22,11 @@ class AvailChecker(QThread):
|
||||
updateProgress = Signal(int, int)
|
||||
|
||||
def __init__(
|
||||
self, links: list = None, appnumber: int = None, parent=None, books=list[dict]
|
||||
self,
|
||||
links: list[str] | None = None,
|
||||
appnumber: int | None = None,
|
||||
parent=None,
|
||||
books: list[dict] | None = None,
|
||||
):
|
||||
if links is None:
|
||||
links = []
|
||||
@@ -38,11 +41,13 @@ class AvailChecker(QThread):
|
||||
)
|
||||
self.links = links
|
||||
self.appnumber = appnumber
|
||||
self.books = books
|
||||
self.books = books or []
|
||||
log.info(
|
||||
f"Started worker with appnumber: {self.appnumber} and links: {self.links} and {len(self.books)} books..."
|
||||
)
|
||||
time.sleep(2)
|
||||
# Pre-create reusable request and transformer to avoid per-item overhead
|
||||
self._request = WebRequest().set_apparat(self.appnumber)
|
||||
self._rds_transformer = BibTextTransformer("RDS")
|
||||
|
||||
def run(self):
|
||||
self.db = Database()
|
||||
@@ -50,9 +55,8 @@ class AvailChecker(QThread):
|
||||
count = 0
|
||||
for link in self.links:
|
||||
log.info("Processing entry: " + str(link))
|
||||
data = WebRequest().set_apparat(self.appnumber).get_ppn(link).get_data()
|
||||
transformer = BibTextTransformer("RDS")
|
||||
rds = transformer.get_data(data).return_data("rds_availability")
|
||||
data = self._request.get_ppn(link).get_data()
|
||||
rds = self._rds_transformer.get_data(data).return_data("rds_availability")
|
||||
|
||||
book_id = None
|
||||
if not rds or not rds.items:
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import re
|
||||
import sys
|
||||
import xml.etree.ElementTree as ET
|
||||
from dataclasses import dataclass, field
|
||||
@@ -7,6 +8,7 @@ from typing import Dict, Iterable, List, Optional, Tuple
|
||||
|
||||
import loguru
|
||||
import requests
|
||||
from requests.adapters import HTTPAdapter
|
||||
|
||||
from src import LOG_DIR
|
||||
from src.logic.dataclass import BookData
|
||||
@@ -556,7 +558,22 @@ class Api:
|
||||
self.site = site
|
||||
self.url = url
|
||||
self.prefix = prefix
|
||||
pass
|
||||
# Reuse TCP connections across requests for better performance
|
||||
self._session = requests.Session()
|
||||
# Slightly larger connection pool for concurrent calls
|
||||
adapter = HTTPAdapter(pool_connections=10, pool_maxsize=20)
|
||||
self._session.mount("http://", adapter)
|
||||
self._session.mount("https://", adapter)
|
||||
|
||||
def close(self):
|
||||
try:
|
||||
self._session.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def __del__(self):
|
||||
# Best-effort cleanup
|
||||
self.close()
|
||||
|
||||
def get(self, query_args: Iterable[str]) -> List[Record]:
|
||||
# if any query_arg ends with =, remove it
|
||||
@@ -580,18 +597,18 @@ class Api:
|
||||
"Accept": "application/xml",
|
||||
"Accept-Charset": "latin1,utf-8;q=0.7,*;q=0.3",
|
||||
}
|
||||
response = requests.get(url, headers=headers)
|
||||
# Use persistent session and set timeouts to avoid hanging
|
||||
response = self._session.get(url, headers=headers, timeout=(3.05, 20))
|
||||
if response.status_code != 200:
|
||||
raise Exception(f"Error fetching data from SWB: {response.status_code}")
|
||||
data = response.content
|
||||
|
||||
# extract top-level response
|
||||
response = parse_search_retrieve_response(data)
|
||||
# extract top-level response (decode to text for the XML parser)
|
||||
response = parse_search_retrieve_response(response.text)
|
||||
return response.records
|
||||
|
||||
def getBooks(self, query_args: Iterable[str]) -> List[BookData]:
|
||||
records: List[Record] = self.get(query_args)
|
||||
print(f"{self.site} found {len(records)} records")
|
||||
# Avoid printing on hot paths; rely on logger if needed
|
||||
log.debug(f"{self.site} found {len(records)} records for args={query_args}")
|
||||
books: List[BookData] = []
|
||||
# extract title from query_args if present
|
||||
title = None
|
||||
@@ -611,7 +628,8 @@ class Api:
|
||||
return books
|
||||
|
||||
def getLinkForBook(self, book: BookData) -> str:
|
||||
results = self.getBooks()
|
||||
# Not implemented: depends on catalog front-end; return empty string for now
|
||||
return ""
|
||||
|
||||
|
||||
class SWB(Api):
|
||||
|
||||
@@ -32,6 +32,7 @@ class CheckThread(QtCore.QThread):
|
||||
range(len(self.items)),
|
||||
unit_scale=True,
|
||||
)
|
||||
swb_client = SWB()
|
||||
for i in tqdm_object:
|
||||
book: BookData = self.items[i]
|
||||
author = (
|
||||
@@ -43,7 +44,7 @@ class CheckThread(QtCore.QThread):
|
||||
# remove trailing punctuation from title
|
||||
title = book.title.rstrip(" .:,;!?")
|
||||
response: list[BookData] = []
|
||||
response = SWB().getBooks(
|
||||
response = swb_client.getBooks(
|
||||
[
|
||||
"pica.bib=20735",
|
||||
f"pica.tit={title.split(':')[0].strip()}",
|
||||
@@ -88,4 +89,6 @@ class ProgressDialog(QDialog):
|
||||
layout.addWidget(self.start_button)
|
||||
|
||||
def start(self):
|
||||
|
||||
# Start logic is managed externally; keep method for UI wiring
|
||||
pass
|
||||
|
||||
|
||||
Reference in New Issue
Block a user