BibAPI/tests/test_sru.py

"""Comprehensive tests for the SRU module."""

import xml.etree.ElementTree as ET
from unittest.mock import MagicMock, patch

import pytest
import requests

from bibapi.schemas.api_types import ALMASchema, DublinCoreSchema, PicaSchema
from bibapi.schemas.bookdata import BookData
from bibapi.sru import (
    Api,
    QueryTransformer,
    book_from_marc,
    find_newer_edition,
    parse_marc_record,
)
from src.bibapi import SWB

# --- Integration test (requires network) ---


def test_swb_schema() -> None:
    """Integration test that requires network access."""
    result = SWB().getBooks(["TITLE=Java ist auch eine Insel", "LIBRARY=20735"])
    assert len(result) == 1
    assert result[0].title == "Java ist auch eine Insel"


# --- Api class tests ---


class TestApiClass:
    """Tests for the Api class."""

    def test_api_initialization(self):
        """Test Api class initialization."""
        api = Api(
            site="TestSite",
            url="https://example.com/sru?query={}",
            prefix=PicaSchema,
            library_identifier="924$b",
        )
        assert api.site == "TestSite"
        assert api.url == "https://example.com/sru?query={}"
        assert api.prefix == PicaSchema
        assert api.library_identifier == "924$b"
        assert api._rate_limit_seconds == 1.0
        assert api._max_retries == 5
        assert api._overall_timeout_seconds == 30.0
        api.close()

    def test_api_with_notsupported_args(self):
        """Test Api initialization with unsupported arguments."""
        api = Api(
            site="TestSite",
            url="https://example.com/sru?query={}",
            prefix=PicaSchema,
            library_identifier="924$b",
            notsupported_args=["YEAR", "PPN"],
        )
        assert "YEAR" in api.notsupported_args
        assert "PPN" in api.notsupported_args
        api.close()

    def test_api_with_replace_dict(self):
        """Test Api initialization with replace dictionary."""
        api = Api(
            site="TestSite",
            url="https://example.com/sru?query={}",
            prefix=PicaSchema,
            library_identifier="924$b",
            replace={" ": "+", "&": "%26"},
        )
        assert api.replace == {" ": "+", "&": "%26"}
        api.close()

    @patch.object(requests.Session, "get")
    def test_api_get_success(self, mock_get, sample_sru_response_xml):
        """Test successful API get request."""
        mock_response = MagicMock()
        mock_response.status_code = 200
        mock_response.content = sample_sru_response_xml
        mock_get.return_value = mock_response

        api = Api(
            site="TestSite",
            url="https://example.com/sru?query={}",
            prefix=PicaSchema,
            library_identifier="924$b",
        )
        records = api.get(["title=Test"])
        assert len(records) == 1
        api.close()

    @patch.object(requests.Session, "get")
    def test_api_get_with_string_query(self, mock_get, sample_sru_response_xml):
        """Test API get with string query (not list)."""
        mock_response = MagicMock()
        mock_response.status_code = 200
        mock_response.content = sample_sru_response_xml
        mock_get.return_value = mock_response

        api = Api(
            site="TestSite",
            url="https://example.com/sru?query={}",
            prefix=PicaSchema,
            library_identifier="924$b",
        )
        records = api.get("title=Test")
        assert len(records) == 1
        api.close()

    @patch.object(requests.Session, "get")
    def test_api_get_filters_notsupported_args(self, mock_get, sample_sru_response_xml):
        """Test that unsupported args are filtered out."""
        mock_response = MagicMock()
        mock_response.status_code = 200
        mock_response.content = sample_sru_response_xml
        mock_get.return_value = mock_response

        api = Api(
            site="TestSite",
            url="https://example.com/sru?query={}",
            prefix=PicaSchema,
            library_identifier="924$b",
            notsupported_args=["YEAR"],
        )
        # YEAR should be filtered out
        records = api.get(["title=Test", "YEAR=2023"])
        assert len(records) == 1
        api.close()

    @patch.object(requests.Session, "get")
    def test_api_get_http_error_retries(self, mock_get):
        """Test that API retries on HTTP errors."""
        mock_response = MagicMock()
        mock_response.status_code = 500
        mock_get.return_value = mock_response

        api = Api(
            site="TestSite",
            url="https://example.com/sru?query={}",
            prefix=PicaSchema,
            library_identifier="924$b",
        )
        api._max_retries = 2
        api._rate_limit_seconds = 0.01  # Speed up test
        api._overall_timeout_seconds = 5.0

        with pytest.raises(Exception, match="HTTP 500"):
            api.get(["title=Test"])
        api.close()

    @patch.object(requests.Session, "get")
    def test_api_get_timeout_returns_empty_bookdata(self, mock_get):
        """Test that timeout returns empty BookData list."""
        mock_get.side_effect = requests.exceptions.ReadTimeout("Timeout")

        api = Api(
            site="TestSite",
            url="https://example.com/sru?query={}",
            prefix=PicaSchema,
            library_identifier="924$b",
        )
        api._max_retries = 1
        api._rate_limit_seconds = 0.01

        books = api.getBooks(["title=Test"])
        assert len(books) == 1
        assert books[0].ppn is None  # Empty BookData
        api.close()

    @patch.object(requests.Session, "get")
    def test_api_getbooks_filters_by_title(self, mock_get, sample_sru_response_xml):
        """Test that getBooks filters results by title prefix."""
        mock_response = MagicMock()
        mock_response.status_code = 200
        mock_response.content = sample_sru_response_xml
        mock_get.return_value = mock_response

        api = Api(
            site="TestSite",
            url="https://example.com/sru?query={}",
            prefix=PicaSchema,
            library_identifier="924$b",
        )
        # Title in sample is "Test Book" - filtering for "Test" should match
        books = api.getBooks(["pica.tit=Test"])
        assert len(books) == 1

        # Filtering for "NonExistent" should not match
        books = api.getBooks(["pica.tit=NonExistent"])
        assert len(books) == 0
        api.close()

    def test_api_close(self):
        """Test Api close method."""
        api = Api(
            site="TestSite",
            url="https://example.com/sru?query={}",
            prefix=PicaSchema,
            library_identifier="924$b",
        )
        # Should not raise
        api.close()
        api.close()  # Double close should be safe


# --- QueryTransformer tests ---


class TestQueryTransformer:
    """Tests for the QueryTransformer class."""

    def test_transform_pica_schema(self):
        """Test transformation with PicaSchema."""
        args = ["TITLE=Test Book", "AUTHOR=Smith, John"]
        transformer = QueryTransformer(PicaSchema, args)
        result = transformer.transform()

        assert len(result) == 2
        # Check that pica.tit is in the result
        assert any(r.startswith("pica.tit=") for r in result)
        # Author should have comma without space
        assert any(r.startswith("pica.per=") for r in result)

    def test_transform_alma_schema(self):
        """Test transformation with ALMASchema."""
        args = ["TITLE=Test Book", "AUTHOR=Smith, John"]
        transformer = QueryTransformer(ALMASchema, args)
        result = transformer.transform()

        assert len(result) == 2
        # Title should be enclosed in quotes
        assert any('alma.title="Test Book"' in r for r in result)

    def test_transform_dublin_core_schema(self):
        """Test transformation with DublinCoreSchema."""
        args = ["TITLE=Test Book", "AUTHOR=Smith,John"]
        transformer = QueryTransformer(DublinCoreSchema, args)
        result = transformer.transform()

        assert len(result) == 2
        # Check that dc.title is in the result
        assert any(r.startswith("dc.title=") for r in result)
        # Author should have space after comma
        assert any(r.startswith("dc.creator=") for r in result)

    def test_transform_string_input(self):
        """Test transformation with string input instead of list."""
        transformer = QueryTransformer(PicaSchema, "TITLE=Test Book")
        result = transformer.transform()
        assert len(result) == 1

    def test_transform_drops_empty_values(self):
        """Test that empty values are dropped when drop_empty is True."""
        args = ["TITLE=Test Book", "AUTHOR="]
        transformer = QueryTransformer(PicaSchema, args)
        result = transformer.transform()
        assert len(result) == 1

    def test_transform_invalid_format_ignored(self):
        """Test that arguments without = are ignored."""
        args = ["TITLE=Test Book", "InvalidArg", "AUTHOR=Smith"]
        transformer = QueryTransformer(PicaSchema, args)
        result = transformer.transform()
        assert len(result) == 2

    def test_transform_unknown_key_ignored(self):
        """Test that unknown keys are ignored."""
        args = ["TITLE=Test Book", "UNKNOWNKEY=value"]
        transformer = QueryTransformer(PicaSchema, args)
        result = transformer.transform()
        assert len(result) == 1


# --- book_from_marc tests ---


class TestBookFromMarc:
    """Tests for the book_from_marc function."""

    def test_book_from_marc_basic(self, sample_marc_record_xml):
        """Test basic book extraction from MARC record."""
        root = ET.fromstring(sample_marc_record_xml)
        record = parse_marc_record(root)
        book = book_from_marc(record, "924$b")

        assert book.ppn == "123456789"
        assert book.title == "Test Book Title"
        assert book.edition == "2nd edition"
        assert book.year == "2023"
        assert book.publisher == "Test Publisher"
        assert "9783123456789" in book.isbn
        assert book.pages == "456 pages"
        assert book.media_type == "Band"
        assert book.author == "Author, Test"

    def test_book_from_marc_signature(self, sample_marc_record_xml):
        """Test signature extraction from MARC record with Frei 129."""
        root = ET.fromstring(sample_marc_record_xml)
        record = parse_marc_record(root)
        book = book_from_marc(record, "924$b")

        # Signature should be from 924 where $9 == "Frei 129" -> $g
        assert book.signature == "ABC 123"

    def test_book_from_marc_libraries(self, sample_marc_record_xml):
        """Test library extraction from MARC record."""
        root = ET.fromstring(sample_marc_record_xml)
        record = parse_marc_record(root)
        book = book_from_marc(record, "924$b")

        assert "DE-Frei129" in book.libraries


# --- find_newer_edition tests ---


class TestFindNewerEdition:
    """Tests for the find_newer_edition function."""

    def test_find_newer_edition_by_year(self):
        """Test finding newer edition by year."""
        swb = BookData(ppn="1", year=2020, edition="1st edition")
        dnb = [
            BookData(ppn="2", year=2023, edition="3rd edition"),
            BookData(ppn="3", year=2019, edition="1st edition"),
        ]
        result = find_newer_edition(swb, dnb)
        assert result is not None
        assert len(result) == 1
        # Year is stored as string after post_init
        assert result[0].year == "2023"

    def test_find_newer_edition_by_edition_number(self):
        """Test finding newer edition by edition number."""
        swb = BookData(ppn="1", year=2020, edition="1st edition")
        dnb = [
            BookData(ppn="2", year=2020, edition="3rd edition"),
        ]
        result = find_newer_edition(swb, dnb)
        assert result is not None
        assert len(result) == 1
        assert result[0].edition_number == 3

    def test_find_newer_edition_none_found(self):
        """Test when no newer edition exists."""
        swb = BookData(ppn="1", year=2023, edition="5th edition")
        dnb = [
            BookData(ppn="2", year=2020, edition="1st edition"),
            BookData(ppn="3", year=2019, edition="2nd edition"),
        ]
        result = find_newer_edition(swb, dnb)
        assert result is None

    def test_find_newer_edition_empty_list(self):
        """Test with empty DNB result list."""
        swb = BookData(ppn="1", year=2020)
        result = find_newer_edition(swb, [])
        assert result is None

    def test_find_newer_edition_prefers_matching_signature(self):
        """Test that matching signature is preferred."""
        swb = BookData(ppn="1", year=2020, signature="ABC 123")
        dnb = [
            BookData(ppn="2", year=2023, signature="ABC 123"),
            BookData(ppn="3", year=2023, signature="XYZ 789"),
        ]
        result = find_newer_edition(swb, dnb)
        assert result is not None
        assert len(result) == 1
        # Should prefer matching signature (first one) but XYZ 789 differs
        # so it's filtered out. Result should be the matching one.

    def test_find_newer_edition_deduplicates_by_ppn(self):
        """Test that results are deduplicated by PPN."""
        swb = BookData(ppn="1", year=2020)
        dnb = [
            BookData(ppn="2", year=2023, signature="ABC"),
            BookData(ppn="2", year=2023),  # Duplicate PPN, no signature
        ]
        result = find_newer_edition(swb, dnb)
        assert result is not None
        assert len(result) == 1
        # Should prefer the one with signature
        assert result[0].signature == "ABC"