390 lines
14 KiB
Python
390 lines
14 KiB
Python
"""Comprehensive tests for the SRU module."""
|
|
|
|
import xml.etree.ElementTree as ET
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import pytest
|
|
import requests
|
|
|
|
from bibapi.schemas.api_types import ALMASchema, DublinCoreSchema, PicaSchema
|
|
from bibapi.schemas.bookdata import BookData
|
|
from bibapi.sru import (
|
|
Api,
|
|
QueryTransformer,
|
|
book_from_marc,
|
|
find_newer_edition,
|
|
parse_marc_record,
|
|
)
|
|
from src.bibapi import SWB
|
|
|
|
# --- Integration test (requires network) ---
|
|
|
|
|
|
@pytest.mark.integration
|
|
def test_swb_schema() -> None:
|
|
"""Integration test that requires network access."""
|
|
result = SWB().getBooks(["pica.tit=Java ist auch eine Insel", "pica.bib=20735"])
|
|
assert len(result) == 1
|
|
assert result[0].title == "Java ist auch eine Insel"
|
|
|
|
|
|
# --- Api class tests ---
|
|
|
|
|
|
class TestApiClass:
|
|
"""Tests for the Api class."""
|
|
|
|
def test_api_initialization(self):
|
|
"""Test Api class initialization."""
|
|
api = Api(
|
|
site="TestSite",
|
|
url="https://example.com/sru?query={}",
|
|
prefix=PicaSchema,
|
|
library_identifier="924$b",
|
|
)
|
|
assert api.site == "TestSite"
|
|
assert api.url == "https://example.com/sru?query={}"
|
|
assert api.prefix == PicaSchema
|
|
assert api.library_identifier == "924$b"
|
|
assert api._rate_limit_seconds == 1.0
|
|
assert api._max_retries == 5
|
|
assert api._overall_timeout_seconds == 30.0
|
|
api.close()
|
|
|
|
def test_api_with_notsupported_args(self):
|
|
"""Test Api initialization with unsupported arguments."""
|
|
api = Api(
|
|
site="TestSite",
|
|
url="https://example.com/sru?query={}",
|
|
prefix=PicaSchema,
|
|
library_identifier="924$b",
|
|
notsupported_args=["YEAR", "PPN"],
|
|
)
|
|
assert "YEAR" in api.notsupported_args
|
|
assert "PPN" in api.notsupported_args
|
|
api.close()
|
|
|
|
def test_api_with_replace_dict(self):
|
|
"""Test Api initialization with replace dictionary."""
|
|
api = Api(
|
|
site="TestSite",
|
|
url="https://example.com/sru?query={}",
|
|
prefix=PicaSchema,
|
|
library_identifier="924$b",
|
|
replace={" ": "+", "&": "%26"},
|
|
)
|
|
assert api.replace == {" ": "+", "&": "%26"}
|
|
api.close()
|
|
|
|
@patch.object(requests.Session, "get")
|
|
def test_api_get_success(self, mock_get, sample_sru_response_xml):
|
|
"""Test successful API get request."""
|
|
mock_response = MagicMock()
|
|
mock_response.status_code = 200
|
|
mock_response.content = sample_sru_response_xml
|
|
mock_get.return_value = mock_response
|
|
|
|
api = Api(
|
|
site="TestSite",
|
|
url="https://example.com/sru?query={}",
|
|
prefix=PicaSchema,
|
|
library_identifier="924$b",
|
|
)
|
|
records = api.get(["title=Test"])
|
|
assert len(records) == 1
|
|
api.close()
|
|
|
|
@patch.object(requests.Session, "get")
|
|
def test_api_get_with_string_query(self, mock_get, sample_sru_response_xml):
|
|
"""Test API get with string query (not list)."""
|
|
mock_response = MagicMock()
|
|
mock_response.status_code = 200
|
|
mock_response.content = sample_sru_response_xml
|
|
mock_get.return_value = mock_response
|
|
|
|
api = Api(
|
|
site="TestSite",
|
|
url="https://example.com/sru?query={}",
|
|
prefix=PicaSchema,
|
|
library_identifier="924$b",
|
|
)
|
|
records = api.get("title=Test")
|
|
assert len(records) == 1
|
|
api.close()
|
|
|
|
@patch.object(requests.Session, "get")
|
|
def test_api_get_filters_notsupported_args(self, mock_get, sample_sru_response_xml):
|
|
"""Test that unsupported args are filtered out."""
|
|
mock_response = MagicMock()
|
|
mock_response.status_code = 200
|
|
mock_response.content = sample_sru_response_xml
|
|
mock_get.return_value = mock_response
|
|
|
|
api = Api(
|
|
site="TestSite",
|
|
url="https://example.com/sru?query={}",
|
|
prefix=PicaSchema,
|
|
library_identifier="924$b",
|
|
notsupported_args=["YEAR"],
|
|
)
|
|
# YEAR should be filtered out
|
|
records = api.get(["title=Test", "YEAR=2023"])
|
|
assert len(records) == 1
|
|
api.close()
|
|
|
|
@patch.object(requests.Session, "get")
|
|
def test_api_get_http_error_retries(self, mock_get):
|
|
"""Test that API retries on HTTP errors."""
|
|
mock_response = MagicMock()
|
|
mock_response.status_code = 500
|
|
mock_get.return_value = mock_response
|
|
|
|
api = Api(
|
|
site="TestSite",
|
|
url="https://example.com/sru?query={}",
|
|
prefix=PicaSchema,
|
|
library_identifier="924$b",
|
|
)
|
|
api._max_retries = 2
|
|
api._rate_limit_seconds = 0.01 # Speed up test
|
|
api._overall_timeout_seconds = 5.0
|
|
|
|
with pytest.raises(Exception, match="HTTP 500"):
|
|
api.get(["title=Test"])
|
|
api.close()
|
|
|
|
@patch.object(requests.Session, "get")
|
|
def test_api_get_timeout_returns_empty_bookdata(self, mock_get):
|
|
"""Test that timeout returns empty BookData list."""
|
|
mock_get.side_effect = requests.exceptions.ReadTimeout("Timeout")
|
|
|
|
api = Api(
|
|
site="TestSite",
|
|
url="https://example.com/sru?query={}",
|
|
prefix=PicaSchema,
|
|
library_identifier="924$b",
|
|
)
|
|
api._max_retries = 1
|
|
api._rate_limit_seconds = 0.01
|
|
|
|
books = api.getBooks(["title=Test"])
|
|
assert len(books) == 1
|
|
assert books[0].ppn is None # Empty BookData
|
|
api.close()
|
|
|
|
@patch.object(requests.Session, "get")
|
|
def test_api_getbooks_filters_by_title(self, mock_get, sample_sru_response_xml):
|
|
"""Test that getBooks filters results by title prefix."""
|
|
mock_response = MagicMock()
|
|
mock_response.status_code = 200
|
|
mock_response.content = sample_sru_response_xml
|
|
mock_get.return_value = mock_response
|
|
|
|
api = Api(
|
|
site="TestSite",
|
|
url="https://example.com/sru?query={}",
|
|
prefix=PicaSchema,
|
|
library_identifier="924$b",
|
|
)
|
|
# Title in sample is "Test Book" - filtering for "Test" should match
|
|
books = api.getBooks(["pica.tit=Test"])
|
|
assert len(books) == 1
|
|
|
|
# Filtering for "NonExistent" should not match
|
|
books = api.getBooks(["pica.tit=NonExistent"])
|
|
assert len(books) == 0
|
|
api.close()
|
|
|
|
def test_api_close(self):
|
|
"""Test Api close method."""
|
|
api = Api(
|
|
site="TestSite",
|
|
url="https://example.com/sru?query={}",
|
|
prefix=PicaSchema,
|
|
library_identifier="924$b",
|
|
)
|
|
# Should not raise
|
|
api.close()
|
|
api.close() # Double close should be safe
|
|
|
|
|
|
# --- QueryTransformer tests ---
|
|
|
|
|
|
class TestQueryTransformer:
|
|
"""Tests for the QueryTransformer class."""
|
|
|
|
def test_transform_pica_schema(self):
|
|
"""Test transformation with PicaSchema."""
|
|
args = ["TITLE=Test Book", "AUTHOR=Smith, John"]
|
|
transformer = QueryTransformer(PicaSchema, args)
|
|
result = transformer.transform()
|
|
|
|
assert len(result) == 2
|
|
# Check that pica.tit is in the result
|
|
assert any(r.startswith("pica.tit=") for r in result)
|
|
# Author should have comma without space
|
|
assert any(r.startswith("pica.per=") for r in result)
|
|
|
|
def test_transform_alma_schema(self):
|
|
"""Test transformation with ALMASchema."""
|
|
args = ["TITLE=Test Book", "AUTHOR=Smith, John"]
|
|
transformer = QueryTransformer(ALMASchema, args)
|
|
result = transformer.transform()
|
|
|
|
assert len(result) == 2
|
|
# Title should be enclosed in quotes
|
|
assert any('alma.title="Test Book"' in r for r in result)
|
|
|
|
def test_transform_dublin_core_schema(self):
|
|
"""Test transformation with DublinCoreSchema."""
|
|
args = ["TITLE=Test Book", "AUTHOR=Smith,John"]
|
|
transformer = QueryTransformer(DublinCoreSchema, args)
|
|
result = transformer.transform()
|
|
|
|
assert len(result) == 2
|
|
# Check that dc.title is in the result
|
|
assert any(r.startswith("dc.title=") for r in result)
|
|
# Author should have space after comma
|
|
assert any(r.startswith("dc.creator=") for r in result)
|
|
|
|
def test_transform_string_input(self):
|
|
"""Test transformation with string input instead of list."""
|
|
transformer = QueryTransformer(PicaSchema, "TITLE=Test Book")
|
|
result = transformer.transform()
|
|
assert len(result) == 1
|
|
|
|
def test_transform_drops_empty_values(self):
|
|
"""Test that empty values are dropped when drop_empty is True."""
|
|
args = ["TITLE=Test Book", "AUTHOR="]
|
|
transformer = QueryTransformer(PicaSchema, args)
|
|
result = transformer.transform()
|
|
assert len(result) == 1
|
|
|
|
def test_transform_invalid_format_ignored(self):
|
|
"""Test that arguments without = are ignored."""
|
|
args = ["TITLE=Test Book", "InvalidArg", "AUTHOR=Smith"]
|
|
transformer = QueryTransformer(PicaSchema, args)
|
|
result = transformer.transform()
|
|
assert len(result) == 2
|
|
|
|
def test_transform_unknown_key_ignored(self):
|
|
"""Test that unknown keys are ignored."""
|
|
args = ["TITLE=Test Book", "UNKNOWNKEY=value"]
|
|
transformer = QueryTransformer(PicaSchema, args)
|
|
result = transformer.transform()
|
|
assert len(result) == 1
|
|
|
|
|
|
# --- book_from_marc tests ---
|
|
|
|
|
|
class TestBookFromMarc:
|
|
"""Tests for the book_from_marc function."""
|
|
|
|
def test_book_from_marc_basic(self, sample_marc_record_xml):
|
|
"""Test basic book extraction from MARC record."""
|
|
root = ET.fromstring(sample_marc_record_xml)
|
|
record = parse_marc_record(root)
|
|
book = book_from_marc(record, "924$b")
|
|
|
|
assert book.ppn == "123456789"
|
|
assert book.title == "Test Book Title"
|
|
assert book.edition == "2nd edition"
|
|
assert book.year == "2023"
|
|
assert book.publisher == "Test Publisher"
|
|
assert "9783123456789" in book.isbn
|
|
assert book.pages == "456 pages"
|
|
assert book.media_type == "Band"
|
|
assert book.author == "Author, Test"
|
|
|
|
def test_book_from_marc_signature(self, sample_marc_record_xml):
|
|
"""Test signature extraction from MARC record with Frei 129."""
|
|
root = ET.fromstring(sample_marc_record_xml)
|
|
record = parse_marc_record(root)
|
|
book = book_from_marc(record, "924$b")
|
|
|
|
# Signature should be from 924 where $9 == "Frei 129" -> $g
|
|
assert book.signature == "ABC 123"
|
|
|
|
def test_book_from_marc_libraries(self, sample_marc_record_xml):
|
|
"""Test library extraction from MARC record."""
|
|
root = ET.fromstring(sample_marc_record_xml)
|
|
record = parse_marc_record(root)
|
|
book = book_from_marc(record, "924$b")
|
|
|
|
assert "DE-Frei129" in book.libraries
|
|
|
|
|
|
# --- find_newer_edition tests ---
|
|
|
|
|
|
class TestFindNewerEdition:
|
|
"""Tests for the find_newer_edition function."""
|
|
|
|
def test_find_newer_edition_by_year(self):
|
|
"""Test finding newer edition by year."""
|
|
swb = BookData(ppn="1", year=2020, edition="1st edition")
|
|
dnb = [
|
|
BookData(ppn="2", year=2023, edition="3rd edition"),
|
|
BookData(ppn="3", year=2019, edition="1st edition"),
|
|
]
|
|
result = find_newer_edition(swb, dnb)
|
|
assert result is not None
|
|
assert len(result) == 1
|
|
# Year is stored as string after post_init
|
|
assert result[0].year == "2023"
|
|
|
|
def test_find_newer_edition_by_edition_number(self):
|
|
"""Test finding newer edition by edition number."""
|
|
swb = BookData(ppn="1", year=2020, edition="1st edition")
|
|
dnb = [
|
|
BookData(ppn="2", year=2020, edition="3rd edition"),
|
|
]
|
|
result = find_newer_edition(swb, dnb)
|
|
assert result is not None
|
|
assert len(result) == 1
|
|
assert result[0].edition_number == 3
|
|
|
|
def test_find_newer_edition_none_found(self):
|
|
"""Test when no newer edition exists."""
|
|
swb = BookData(ppn="1", year=2023, edition="5th edition")
|
|
dnb = [
|
|
BookData(ppn="2", year=2020, edition="1st edition"),
|
|
BookData(ppn="3", year=2019, edition="2nd edition"),
|
|
]
|
|
result = find_newer_edition(swb, dnb)
|
|
assert result is None
|
|
|
|
def test_find_newer_edition_empty_list(self):
|
|
"""Test with empty DNB result list."""
|
|
swb = BookData(ppn="1", year=2020)
|
|
result = find_newer_edition(swb, [])
|
|
assert result is None
|
|
|
|
def test_find_newer_edition_prefers_matching_signature(self):
|
|
"""Test that matching signature is preferred."""
|
|
swb = BookData(ppn="1", year=2020, signature="ABC 123")
|
|
dnb = [
|
|
BookData(ppn="2", year=2023, signature="ABC 123"),
|
|
BookData(ppn="3", year=2023, signature="XYZ 789"),
|
|
]
|
|
result = find_newer_edition(swb, dnb)
|
|
assert result is not None
|
|
assert len(result) == 1
|
|
# Should prefer matching signature (first one) but XYZ 789 differs
|
|
# so it's filtered out. Result should be the matching one.
|
|
|
|
def test_find_newer_edition_deduplicates_by_ppn(self):
|
|
"""Test that results are deduplicated by PPN."""
|
|
swb = BookData(ppn="1", year=2020)
|
|
dnb = [
|
|
BookData(ppn="2", year=2023, signature="ABC"),
|
|
BookData(ppn="2", year=2023), # Duplicate PPN, no signature
|
|
]
|
|
result = find_newer_edition(swb, dnb)
|
|
assert result is not None
|
|
assert len(result) == 1
|
|
# Should prefer the one with signature
|
|
assert result[0].signature == "ABC"
|