Files
BibAPI/tests/test_sru.py

389 lines
14 KiB
Python

"""Comprehensive tests for the SRU module."""
import xml.etree.ElementTree as ET
from unittest.mock import MagicMock, patch
import pytest
import requests
from bibapi.schemas.api_types import ALMASchema, DublinCoreSchema, PicaSchema
from bibapi.schemas.bookdata import BookData
from bibapi.sru import (
Api,
QueryTransformer,
book_from_marc,
find_newer_edition,
parse_marc_record,
)
from src.bibapi import SWB
# --- Integration test (requires network) ---
def test_swb_schema() -> None:
"""Integration test that requires network access."""
result = SWB().getBooks(["TITLE=Java ist auch eine Insel", "LIBRARY=20735"])
assert len(result) == 1
assert result[0].title == "Java ist auch eine Insel"
# --- Api class tests ---
class TestApiClass:
"""Tests for the Api class."""
def test_api_initialization(self):
"""Test Api class initialization."""
api = Api(
site="TestSite",
url="https://example.com/sru?query={}",
prefix=PicaSchema,
library_identifier="924$b",
)
assert api.site == "TestSite"
assert api.url == "https://example.com/sru?query={}"
assert api.prefix == PicaSchema
assert api.library_identifier == "924$b"
assert api._rate_limit_seconds == 1.0
assert api._max_retries == 5
assert api._overall_timeout_seconds == 30.0
api.close()
def test_api_with_notsupported_args(self):
"""Test Api initialization with unsupported arguments."""
api = Api(
site="TestSite",
url="https://example.com/sru?query={}",
prefix=PicaSchema,
library_identifier="924$b",
notsupported_args=["YEAR", "PPN"],
)
assert "YEAR" in api.notsupported_args
assert "PPN" in api.notsupported_args
api.close()
def test_api_with_replace_dict(self):
"""Test Api initialization with replace dictionary."""
api = Api(
site="TestSite",
url="https://example.com/sru?query={}",
prefix=PicaSchema,
library_identifier="924$b",
replace={" ": "+", "&": "%26"},
)
assert api.replace == {" ": "+", "&": "%26"}
api.close()
@patch.object(requests.Session, "get")
def test_api_get_success(self, mock_get, sample_sru_response_xml):
"""Test successful API get request."""
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.content = sample_sru_response_xml
mock_get.return_value = mock_response
api = Api(
site="TestSite",
url="https://example.com/sru?query={}",
prefix=PicaSchema,
library_identifier="924$b",
)
records = api.get(["title=Test"])
assert len(records) == 1
api.close()
@patch.object(requests.Session, "get")
def test_api_get_with_string_query(self, mock_get, sample_sru_response_xml):
"""Test API get with string query (not list)."""
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.content = sample_sru_response_xml
mock_get.return_value = mock_response
api = Api(
site="TestSite",
url="https://example.com/sru?query={}",
prefix=PicaSchema,
library_identifier="924$b",
)
records = api.get("title=Test")
assert len(records) == 1
api.close()
@patch.object(requests.Session, "get")
def test_api_get_filters_notsupported_args(self, mock_get, sample_sru_response_xml):
"""Test that unsupported args are filtered out."""
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.content = sample_sru_response_xml
mock_get.return_value = mock_response
api = Api(
site="TestSite",
url="https://example.com/sru?query={}",
prefix=PicaSchema,
library_identifier="924$b",
notsupported_args=["YEAR"],
)
# YEAR should be filtered out
records = api.get(["title=Test", "YEAR=2023"])
assert len(records) == 1
api.close()
@patch.object(requests.Session, "get")
def test_api_get_http_error_retries(self, mock_get):
"""Test that API retries on HTTP errors."""
mock_response = MagicMock()
mock_response.status_code = 500
mock_get.return_value = mock_response
api = Api(
site="TestSite",
url="https://example.com/sru?query={}",
prefix=PicaSchema,
library_identifier="924$b",
)
api._max_retries = 2
api._rate_limit_seconds = 0.01 # Speed up test
api._overall_timeout_seconds = 5.0
with pytest.raises(Exception, match="HTTP 500"):
api.get(["title=Test"])
api.close()
@patch.object(requests.Session, "get")
def test_api_get_timeout_returns_empty_bookdata(self, mock_get):
"""Test that timeout returns empty BookData list."""
mock_get.side_effect = requests.exceptions.ReadTimeout("Timeout")
api = Api(
site="TestSite",
url="https://example.com/sru?query={}",
prefix=PicaSchema,
library_identifier="924$b",
)
api._max_retries = 1
api._rate_limit_seconds = 0.01
books = api.getBooks(["title=Test"])
assert len(books) == 1
assert books[0].ppn is None # Empty BookData
api.close()
@patch.object(requests.Session, "get")
def test_api_getbooks_filters_by_title(self, mock_get, sample_sru_response_xml):
"""Test that getBooks filters results by title prefix."""
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.content = sample_sru_response_xml
mock_get.return_value = mock_response
api = Api(
site="TestSite",
url="https://example.com/sru?query={}",
prefix=PicaSchema,
library_identifier="924$b",
)
# Title in sample is "Test Book" - filtering for "Test" should match
books = api.getBooks(["pica.tit=Test"])
assert len(books) == 1
# Filtering for "NonExistent" should not match
books = api.getBooks(["pica.tit=NonExistent"])
assert len(books) == 0
api.close()
def test_api_close(self):
"""Test Api close method."""
api = Api(
site="TestSite",
url="https://example.com/sru?query={}",
prefix=PicaSchema,
library_identifier="924$b",
)
# Should not raise
api.close()
api.close() # Double close should be safe
# --- QueryTransformer tests ---
class TestQueryTransformer:
"""Tests for the QueryTransformer class."""
def test_transform_pica_schema(self):
"""Test transformation with PicaSchema."""
args = ["TITLE=Test Book", "AUTHOR=Smith, John"]
transformer = QueryTransformer(PicaSchema, args)
result = transformer.transform()
assert len(result) == 2
# Check that pica.tit is in the result
assert any(r.startswith("pica.tit=") for r in result)
# Author should have comma without space
assert any(r.startswith("pica.per=") for r in result)
def test_transform_alma_schema(self):
"""Test transformation with ALMASchema."""
args = ["TITLE=Test Book", "AUTHOR=Smith, John"]
transformer = QueryTransformer(ALMASchema, args)
result = transformer.transform()
assert len(result) == 2
# Title should be enclosed in quotes
assert any('alma.title="Test Book"' in r for r in result)
def test_transform_dublin_core_schema(self):
"""Test transformation with DublinCoreSchema."""
args = ["TITLE=Test Book", "AUTHOR=Smith,John"]
transformer = QueryTransformer(DublinCoreSchema, args)
result = transformer.transform()
assert len(result) == 2
# Check that dc.title is in the result
assert any(r.startswith("dc.title=") for r in result)
# Author should have space after comma
assert any(r.startswith("dc.creator=") for r in result)
def test_transform_string_input(self):
"""Test transformation with string input instead of list."""
transformer = QueryTransformer(PicaSchema, "TITLE=Test Book")
result = transformer.transform()
assert len(result) == 1
def test_transform_drops_empty_values(self):
"""Test that empty values are dropped when drop_empty is True."""
args = ["TITLE=Test Book", "AUTHOR="]
transformer = QueryTransformer(PicaSchema, args)
result = transformer.transform()
assert len(result) == 1
def test_transform_invalid_format_ignored(self):
"""Test that arguments without = are ignored."""
args = ["TITLE=Test Book", "InvalidArg", "AUTHOR=Smith"]
transformer = QueryTransformer(PicaSchema, args)
result = transformer.transform()
assert len(result) == 2
def test_transform_unknown_key_ignored(self):
"""Test that unknown keys are ignored."""
args = ["TITLE=Test Book", "UNKNOWNKEY=value"]
transformer = QueryTransformer(PicaSchema, args)
result = transformer.transform()
assert len(result) == 1
# --- book_from_marc tests ---
class TestBookFromMarc:
"""Tests for the book_from_marc function."""
def test_book_from_marc_basic(self, sample_marc_record_xml):
"""Test basic book extraction from MARC record."""
root = ET.fromstring(sample_marc_record_xml)
record = parse_marc_record(root)
book = book_from_marc(record, "924$b")
assert book.ppn == "123456789"
assert book.title == "Test Book Title"
assert book.edition == "2nd edition"
assert book.year == "2023"
assert book.publisher == "Test Publisher"
assert "9783123456789" in book.isbn
assert book.pages == "456 pages"
assert book.media_type == "Band"
assert book.author == "Author, Test"
def test_book_from_marc_signature(self, sample_marc_record_xml):
"""Test signature extraction from MARC record with Frei 129."""
root = ET.fromstring(sample_marc_record_xml)
record = parse_marc_record(root)
book = book_from_marc(record, "924$b")
# Signature should be from 924 where $9 == "Frei 129" -> $g
assert book.signature == "ABC 123"
def test_book_from_marc_libraries(self, sample_marc_record_xml):
"""Test library extraction from MARC record."""
root = ET.fromstring(sample_marc_record_xml)
record = parse_marc_record(root)
book = book_from_marc(record, "924$b")
assert "DE-Frei129" in book.libraries
# --- find_newer_edition tests ---
class TestFindNewerEdition:
"""Tests for the find_newer_edition function."""
def test_find_newer_edition_by_year(self):
"""Test finding newer edition by year."""
swb = BookData(ppn="1", year=2020, edition="1st edition")
dnb = [
BookData(ppn="2", year=2023, edition="3rd edition"),
BookData(ppn="3", year=2019, edition="1st edition"),
]
result = find_newer_edition(swb, dnb)
assert result is not None
assert len(result) == 1
# Year is stored as string after post_init
assert result[0].year == "2023"
def test_find_newer_edition_by_edition_number(self):
"""Test finding newer edition by edition number."""
swb = BookData(ppn="1", year=2020, edition="1st edition")
dnb = [
BookData(ppn="2", year=2020, edition="3rd edition"),
]
result = find_newer_edition(swb, dnb)
assert result is not None
assert len(result) == 1
assert result[0].edition_number == 3
def test_find_newer_edition_none_found(self):
"""Test when no newer edition exists."""
swb = BookData(ppn="1", year=2023, edition="5th edition")
dnb = [
BookData(ppn="2", year=2020, edition="1st edition"),
BookData(ppn="3", year=2019, edition="2nd edition"),
]
result = find_newer_edition(swb, dnb)
assert result is None
def test_find_newer_edition_empty_list(self):
"""Test with empty DNB result list."""
swb = BookData(ppn="1", year=2020)
result = find_newer_edition(swb, [])
assert result is None
def test_find_newer_edition_prefers_matching_signature(self):
"""Test that matching signature is preferred."""
swb = BookData(ppn="1", year=2020, signature="ABC 123")
dnb = [
BookData(ppn="2", year=2023, signature="ABC 123"),
BookData(ppn="3", year=2023, signature="XYZ 789"),
]
result = find_newer_edition(swb, dnb)
assert result is not None
assert len(result) == 1
# Should prefer matching signature (first one) but XYZ 789 differs
# so it's filtered out. Result should be the matching one.
def test_find_newer_edition_deduplicates_by_ppn(self):
"""Test that results are deduplicated by PPN."""
swb = BookData(ppn="1", year=2020)
dnb = [
BookData(ppn="2", year=2023, signature="ABC"),
BookData(ppn="2", year=2023), # Duplicate PPN, no signature
]
result = find_newer_edition(swb, dnb)
assert result is not None
assert len(result) == 1
# Should prefer the one with signature
assert result[0].signature == "ABC"