commit AI suggested performance enhancements
This commit is contained in:
@@ -1,5 +1,4 @@
|
|||||||
import sys
|
import sys
|
||||||
import time
|
|
||||||
|
|
||||||
# from src.transformers import RDS_AVAIL_DATA
|
# from src.transformers import RDS_AVAIL_DATA
|
||||||
import loguru
|
import loguru
|
||||||
@@ -23,7 +22,11 @@ class AvailChecker(QThread):
|
|||||||
updateProgress = Signal(int, int)
|
updateProgress = Signal(int, int)
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, links: list = None, appnumber: int = None, parent=None, books=list[dict]
|
self,
|
||||||
|
links: list[str] | None = None,
|
||||||
|
appnumber: int | None = None,
|
||||||
|
parent=None,
|
||||||
|
books: list[dict] | None = None,
|
||||||
):
|
):
|
||||||
if links is None:
|
if links is None:
|
||||||
links = []
|
links = []
|
||||||
@@ -38,11 +41,13 @@ class AvailChecker(QThread):
|
|||||||
)
|
)
|
||||||
self.links = links
|
self.links = links
|
||||||
self.appnumber = appnumber
|
self.appnumber = appnumber
|
||||||
self.books = books
|
self.books = books or []
|
||||||
log.info(
|
log.info(
|
||||||
f"Started worker with appnumber: {self.appnumber} and links: {self.links} and {len(self.books)} books..."
|
f"Started worker with appnumber: {self.appnumber} and links: {self.links} and {len(self.books)} books..."
|
||||||
)
|
)
|
||||||
time.sleep(2)
|
# Pre-create reusable request and transformer to avoid per-item overhead
|
||||||
|
self._request = WebRequest().set_apparat(self.appnumber)
|
||||||
|
self._rds_transformer = BibTextTransformer("RDS")
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
self.db = Database()
|
self.db = Database()
|
||||||
@@ -50,9 +55,8 @@ class AvailChecker(QThread):
|
|||||||
count = 0
|
count = 0
|
||||||
for link in self.links:
|
for link in self.links:
|
||||||
log.info("Processing entry: " + str(link))
|
log.info("Processing entry: " + str(link))
|
||||||
data = WebRequest().set_apparat(self.appnumber).get_ppn(link).get_data()
|
data = self._request.get_ppn(link).get_data()
|
||||||
transformer = BibTextTransformer("RDS")
|
rds = self._rds_transformer.get_data(data).return_data("rds_availability")
|
||||||
rds = transformer.get_data(data).return_data("rds_availability")
|
|
||||||
|
|
||||||
book_id = None
|
book_id = None
|
||||||
if not rds or not rds.items:
|
if not rds or not rds.items:
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
@@ -7,6 +8,7 @@ from typing import Dict, Iterable, List, Optional, Tuple
|
|||||||
|
|
||||||
import loguru
|
import loguru
|
||||||
import requests
|
import requests
|
||||||
|
from requests.adapters import HTTPAdapter
|
||||||
|
|
||||||
from src import LOG_DIR
|
from src import LOG_DIR
|
||||||
from src.logic.dataclass import BookData
|
from src.logic.dataclass import BookData
|
||||||
@@ -556,8 +558,23 @@ class Api:
|
|||||||
self.site = site
|
self.site = site
|
||||||
self.url = url
|
self.url = url
|
||||||
self.prefix = prefix
|
self.prefix = prefix
|
||||||
|
# Reuse TCP connections across requests for better performance
|
||||||
|
self._session = requests.Session()
|
||||||
|
# Slightly larger connection pool for concurrent calls
|
||||||
|
adapter = HTTPAdapter(pool_connections=10, pool_maxsize=20)
|
||||||
|
self._session.mount("http://", adapter)
|
||||||
|
self._session.mount("https://", adapter)
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
try:
|
||||||
|
self._session.close()
|
||||||
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
# Best-effort cleanup
|
||||||
|
self.close()
|
||||||
|
|
||||||
def get(self, query_args: Iterable[str]) -> List[Record]:
|
def get(self, query_args: Iterable[str]) -> List[Record]:
|
||||||
# if any query_arg ends with =, remove it
|
# if any query_arg ends with =, remove it
|
||||||
if self.site == "DNB":
|
if self.site == "DNB":
|
||||||
@@ -580,18 +597,18 @@ class Api:
|
|||||||
"Accept": "application/xml",
|
"Accept": "application/xml",
|
||||||
"Accept-Charset": "latin1,utf-8;q=0.7,*;q=0.3",
|
"Accept-Charset": "latin1,utf-8;q=0.7,*;q=0.3",
|
||||||
}
|
}
|
||||||
response = requests.get(url, headers=headers)
|
# Use persistent session and set timeouts to avoid hanging
|
||||||
|
response = self._session.get(url, headers=headers, timeout=(3.05, 20))
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
raise Exception(f"Error fetching data from SWB: {response.status_code}")
|
raise Exception(f"Error fetching data from SWB: {response.status_code}")
|
||||||
data = response.content
|
# extract top-level response (decode to text for the XML parser)
|
||||||
|
response = parse_search_retrieve_response(response.text)
|
||||||
# extract top-level response
|
|
||||||
response = parse_search_retrieve_response(data)
|
|
||||||
return response.records
|
return response.records
|
||||||
|
|
||||||
def getBooks(self, query_args: Iterable[str]) -> List[BookData]:
|
def getBooks(self, query_args: Iterable[str]) -> List[BookData]:
|
||||||
records: List[Record] = self.get(query_args)
|
records: List[Record] = self.get(query_args)
|
||||||
print(f"{self.site} found {len(records)} records")
|
# Avoid printing on hot paths; rely on logger if needed
|
||||||
|
log.debug(f"{self.site} found {len(records)} records for args={query_args}")
|
||||||
books: List[BookData] = []
|
books: List[BookData] = []
|
||||||
# extract title from query_args if present
|
# extract title from query_args if present
|
||||||
title = None
|
title = None
|
||||||
@@ -611,7 +628,8 @@ class Api:
|
|||||||
return books
|
return books
|
||||||
|
|
||||||
def getLinkForBook(self, book: BookData) -> str:
|
def getLinkForBook(self, book: BookData) -> str:
|
||||||
results = self.getBooks()
|
# Not implemented: depends on catalog front-end; return empty string for now
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
class SWB(Api):
|
class SWB(Api):
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ class CheckThread(QtCore.QThread):
|
|||||||
range(len(self.items)),
|
range(len(self.items)),
|
||||||
unit_scale=True,
|
unit_scale=True,
|
||||||
)
|
)
|
||||||
|
swb_client = SWB()
|
||||||
for i in tqdm_object:
|
for i in tqdm_object:
|
||||||
book: BookData = self.items[i]
|
book: BookData = self.items[i]
|
||||||
author = (
|
author = (
|
||||||
@@ -43,7 +44,7 @@ class CheckThread(QtCore.QThread):
|
|||||||
# remove trailing punctuation from title
|
# remove trailing punctuation from title
|
||||||
title = book.title.rstrip(" .:,;!?")
|
title = book.title.rstrip(" .:,;!?")
|
||||||
response: list[BookData] = []
|
response: list[BookData] = []
|
||||||
response = SWB().getBooks(
|
response = swb_client.getBooks(
|
||||||
[
|
[
|
||||||
"pica.bib=20735",
|
"pica.bib=20735",
|
||||||
f"pica.tit={title.split(':')[0].strip()}",
|
f"pica.tit={title.split(':')[0].strip()}",
|
||||||
@@ -88,4 +89,6 @@ class ProgressDialog(QDialog):
|
|||||||
layout.addWidget(self.start_button)
|
layout.addWidget(self.start_button)
|
||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
|
# Start logic is managed externally; keep method for UI wiring
|
||||||
|
pass
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user