"""Comprehensive tests for the SRU module.""" import xml.etree.ElementTree as ET from unittest.mock import MagicMock, patch import pytest import requests from bibapi.schemas.api_types import ALMASchema, DublinCoreSchema, PicaSchema from bibapi.schemas.bookdata import BookData from bibapi.sru import ( Api, QueryTransformer, book_from_marc, find_newer_edition, parse_marc_record, ) from src.bibapi import SWB # --- Integration test (requires network) --- def test_swb_schema() -> None: """Integration test that requires network access.""" result = SWB().getBooks(["TITLE=Java ist auch eine Insel", "LIBRARY=20735"]) assert len(result) == 1 assert result[0].title == "Java ist auch eine Insel" # --- Api class tests --- class TestApiClass: """Tests for the Api class.""" def test_api_initialization(self): """Test Api class initialization.""" api = Api( site="TestSite", url="https://example.com/sru?query={}", prefix=PicaSchema, library_identifier="924$b", ) assert api.site == "TestSite" assert api.url == "https://example.com/sru?query={}" assert api.prefix == PicaSchema assert api.library_identifier == "924$b" assert api._rate_limit_seconds == 1.0 assert api._max_retries == 5 assert api._overall_timeout_seconds == 30.0 api.close() def test_api_with_notsupported_args(self): """Test Api initialization with unsupported arguments.""" api = Api( site="TestSite", url="https://example.com/sru?query={}", prefix=PicaSchema, library_identifier="924$b", notsupported_args=["YEAR", "PPN"], ) assert "YEAR" in api.notsupported_args assert "PPN" in api.notsupported_args api.close() def test_api_with_replace_dict(self): """Test Api initialization with replace dictionary.""" api = Api( site="TestSite", url="https://example.com/sru?query={}", prefix=PicaSchema, library_identifier="924$b", replace={" ": "+", "&": "%26"}, ) assert api.replace == {" ": "+", "&": "%26"} api.close() @patch.object(requests.Session, "get") def test_api_get_success(self, mock_get, sample_sru_response_xml): """Test successful API get request.""" mock_response = MagicMock() mock_response.status_code = 200 mock_response.content = sample_sru_response_xml mock_get.return_value = mock_response api = Api( site="TestSite", url="https://example.com/sru?query={}", prefix=PicaSchema, library_identifier="924$b", ) records = api.get(["title=Test"]) assert len(records) == 1 api.close() @patch.object(requests.Session, "get") def test_api_get_with_string_query(self, mock_get, sample_sru_response_xml): """Test API get with string query (not list).""" mock_response = MagicMock() mock_response.status_code = 200 mock_response.content = sample_sru_response_xml mock_get.return_value = mock_response api = Api( site="TestSite", url="https://example.com/sru?query={}", prefix=PicaSchema, library_identifier="924$b", ) records = api.get("title=Test") assert len(records) == 1 api.close() @patch.object(requests.Session, "get") def test_api_get_filters_notsupported_args(self, mock_get, sample_sru_response_xml): """Test that unsupported args are filtered out.""" mock_response = MagicMock() mock_response.status_code = 200 mock_response.content = sample_sru_response_xml mock_get.return_value = mock_response api = Api( site="TestSite", url="https://example.com/sru?query={}", prefix=PicaSchema, library_identifier="924$b", notsupported_args=["YEAR"], ) # YEAR should be filtered out records = api.get(["title=Test", "YEAR=2023"]) assert len(records) == 1 api.close() @patch.object(requests.Session, "get") def test_api_get_http_error_retries(self, mock_get): """Test that API retries on HTTP errors.""" mock_response = MagicMock() mock_response.status_code = 500 mock_get.return_value = mock_response api = Api( site="TestSite", url="https://example.com/sru?query={}", prefix=PicaSchema, library_identifier="924$b", ) api._max_retries = 2 api._rate_limit_seconds = 0.01 # Speed up test api._overall_timeout_seconds = 5.0 with pytest.raises(Exception, match="HTTP 500"): api.get(["title=Test"]) api.close() @patch.object(requests.Session, "get") def test_api_get_timeout_returns_empty_bookdata(self, mock_get): """Test that timeout returns empty BookData list.""" mock_get.side_effect = requests.exceptions.ReadTimeout("Timeout") api = Api( site="TestSite", url="https://example.com/sru?query={}", prefix=PicaSchema, library_identifier="924$b", ) api._max_retries = 1 api._rate_limit_seconds = 0.01 books = api.getBooks(["title=Test"]) assert len(books) == 1 assert books[0].ppn is None # Empty BookData api.close() @patch.object(requests.Session, "get") def test_api_getbooks_filters_by_title(self, mock_get, sample_sru_response_xml): """Test that getBooks filters results by title prefix.""" mock_response = MagicMock() mock_response.status_code = 200 mock_response.content = sample_sru_response_xml mock_get.return_value = mock_response api = Api( site="TestSite", url="https://example.com/sru?query={}", prefix=PicaSchema, library_identifier="924$b", ) # Title in sample is "Test Book" - filtering for "Test" should match books = api.getBooks(["pica.tit=Test"]) assert len(books) == 1 # Filtering for "NonExistent" should not match books = api.getBooks(["pica.tit=NonExistent"]) assert len(books) == 0 api.close() def test_api_close(self): """Test Api close method.""" api = Api( site="TestSite", url="https://example.com/sru?query={}", prefix=PicaSchema, library_identifier="924$b", ) # Should not raise api.close() api.close() # Double close should be safe # --- QueryTransformer tests --- class TestQueryTransformer: """Tests for the QueryTransformer class.""" def test_transform_pica_schema(self): """Test transformation with PicaSchema.""" args = ["TITLE=Test Book", "AUTHOR=Smith, John"] transformer = QueryTransformer(PicaSchema, args) result = transformer.transform() assert len(result) == 2 # Check that pica.tit is in the result assert any(r.startswith("pica.tit=") for r in result) # Author should have comma without space assert any(r.startswith("pica.per=") for r in result) def test_transform_alma_schema(self): """Test transformation with ALMASchema.""" args = ["TITLE=Test Book", "AUTHOR=Smith, John"] transformer = QueryTransformer(ALMASchema, args) result = transformer.transform() assert len(result) == 2 # Title should be enclosed in quotes assert any('alma.title="Test Book"' in r for r in result) def test_transform_dublin_core_schema(self): """Test transformation with DublinCoreSchema.""" args = ["TITLE=Test Book", "AUTHOR=Smith,John"] transformer = QueryTransformer(DublinCoreSchema, args) result = transformer.transform() assert len(result) == 2 # Check that dc.title is in the result assert any(r.startswith("dc.title=") for r in result) # Author should have space after comma assert any(r.startswith("dc.creator=") for r in result) def test_transform_string_input(self): """Test transformation with string input instead of list.""" transformer = QueryTransformer(PicaSchema, "TITLE=Test Book") result = transformer.transform() assert len(result) == 1 def test_transform_drops_empty_values(self): """Test that empty values are dropped when drop_empty is True.""" args = ["TITLE=Test Book", "AUTHOR="] transformer = QueryTransformer(PicaSchema, args) result = transformer.transform() assert len(result) == 1 def test_transform_invalid_format_ignored(self): """Test that arguments without = are ignored.""" args = ["TITLE=Test Book", "InvalidArg", "AUTHOR=Smith"] transformer = QueryTransformer(PicaSchema, args) result = transformer.transform() assert len(result) == 2 def test_transform_unknown_key_ignored(self): """Test that unknown keys are ignored.""" args = ["TITLE=Test Book", "UNKNOWNKEY=value"] transformer = QueryTransformer(PicaSchema, args) result = transformer.transform() assert len(result) == 1 # --- book_from_marc tests --- class TestBookFromMarc: """Tests for the book_from_marc function.""" def test_book_from_marc_basic(self, sample_marc_record_xml): """Test basic book extraction from MARC record.""" root = ET.fromstring(sample_marc_record_xml) record = parse_marc_record(root) book = book_from_marc(record, "924$b") assert book.ppn == "123456789" assert book.title == "Test Book Title" assert book.edition == "2nd edition" assert book.year == "2023" assert book.publisher == "Test Publisher" assert "9783123456789" in book.isbn assert book.pages == "456 pages" assert book.media_type == "Band" assert book.author == "Author, Test" def test_book_from_marc_signature(self, sample_marc_record_xml): """Test signature extraction from MARC record with Frei 129.""" root = ET.fromstring(sample_marc_record_xml) record = parse_marc_record(root) book = book_from_marc(record, "924$b") # Signature should be from 924 where $9 == "Frei 129" -> $g assert book.signature == "ABC 123" def test_book_from_marc_libraries(self, sample_marc_record_xml): """Test library extraction from MARC record.""" root = ET.fromstring(sample_marc_record_xml) record = parse_marc_record(root) book = book_from_marc(record, "924$b") assert "DE-Frei129" in book.libraries # --- find_newer_edition tests --- class TestFindNewerEdition: """Tests for the find_newer_edition function.""" def test_find_newer_edition_by_year(self): """Test finding newer edition by year.""" swb = BookData(ppn="1", year=2020, edition="1st edition") dnb = [ BookData(ppn="2", year=2023, edition="3rd edition"), BookData(ppn="3", year=2019, edition="1st edition"), ] result = find_newer_edition(swb, dnb) assert result is not None assert len(result) == 1 # Year is stored as string after post_init assert result[0].year == "2023" def test_find_newer_edition_by_edition_number(self): """Test finding newer edition by edition number.""" swb = BookData(ppn="1", year=2020, edition="1st edition") dnb = [ BookData(ppn="2", year=2020, edition="3rd edition"), ] result = find_newer_edition(swb, dnb) assert result is not None assert len(result) == 1 assert result[0].edition_number == 3 def test_find_newer_edition_none_found(self): """Test when no newer edition exists.""" swb = BookData(ppn="1", year=2023, edition="5th edition") dnb = [ BookData(ppn="2", year=2020, edition="1st edition"), BookData(ppn="3", year=2019, edition="2nd edition"), ] result = find_newer_edition(swb, dnb) assert result is None def test_find_newer_edition_empty_list(self): """Test with empty DNB result list.""" swb = BookData(ppn="1", year=2020) result = find_newer_edition(swb, []) assert result is None def test_find_newer_edition_prefers_matching_signature(self): """Test that matching signature is preferred.""" swb = BookData(ppn="1", year=2020, signature="ABC 123") dnb = [ BookData(ppn="2", year=2023, signature="ABC 123"), BookData(ppn="3", year=2023, signature="XYZ 789"), ] result = find_newer_edition(swb, dnb) assert result is not None assert len(result) == 1 # Should prefer matching signature (first one) but XYZ 789 differs # so it's filtered out. Result should be the matching one. def test_find_newer_edition_deduplicates_by_ppn(self): """Test that results are deduplicated by PPN.""" swb = BookData(ppn="1", year=2020) dnb = [ BookData(ppn="2", year=2023, signature="ABC"), BookData(ppn="2", year=2023), # Duplicate PPN, no signature ] result = find_newer_edition(swb, dnb) assert result is not None assert len(result) == 1 # Should prefer the one with signature assert result[0].signature == "ABC"