"""Tests for MARCXML parsing functions in sru.py."""
import xml.etree.ElementTree as ET
import pytest
from bibapi.schemas.marcxml import (
DataField,
SubField,
)
from bibapi.sru import (
_smart_join_title,
_text,
controlfield_value,
datafield_value,
datafields_value,
find_datafields_with_subfields,
first_subfield_value,
first_subfield_value_from_fields,
iter_datafields,
parse_marc_record,
parse_search_retrieve_response,
subfield_values,
subfield_values_from_fields,
)
# --- Fixtures for sample XML data ---
@pytest.fixture
def minimal_marc_xml() -> str:
"""Minimal MARC record XML string."""
return """
00000nam a22000001i 4500
PPN12345
20230101120000.0
Test Title
A Subtitle
"""
@pytest.fixture
def full_marc_xml() -> str:
"""More complete MARC record for testing."""
return """
00000nam a22000001i 4500
PPN98765
20231215150000.0
230101s2023 gw 000 0 ger d
9783123456789
9783987654321
ger
eng
Comprehensive Test Book
With Many Details
by Author Name
3rd edition
Berlin
Test Publisher
2023
456 pages
Band
Author, First
Author, Second
Frei 129
ABC 123
DE-Frei129
"""
@pytest.fixture
def sru_response_xml() -> bytes:
"""Complete SRU searchRetrieveResponse XML."""
return b"""
1.1
2
marcxml
xml
00000nam a22
PPN001
First Book
1
marcxml
xml
00000nam a22
PPN002
Second Book
2
1.1
pica.tit=Test
100
xml
marcxml
"""
@pytest.fixture
def sru_response_no_records() -> bytes:
"""SRU response with zero records."""
return b"""
1.1
0
"""
# --- Tests for _text helper ---
class TestTextHelper:
def test_text_with_element_and_text(self):
elem = ET.fromstring("Hello")
assert _text(elem) == "Hello"
def test_text_with_element_no_text(self):
elem = ET.fromstring("")
assert _text(elem) == ""
def test_text_with_none(self):
assert _text(None) == ""
def test_text_with_whitespace(self):
elem = ET.fromstring(" spaced ")
assert _text(elem) == " spaced "
# --- Tests for parse_marc_record ---
class TestParseMarcRecord:
def test_parse_minimal_record(self, minimal_marc_xml):
root = ET.fromstring(minimal_marc_xml)
record = parse_marc_record(root)
assert record.leader == "00000nam a22000001i 4500"
assert len(record.controlfields) == 2
assert record.controlfields[0].tag == "001"
assert record.controlfields[0].value == "PPN12345"
def test_parse_datafields(self, minimal_marc_xml):
root = ET.fromstring(minimal_marc_xml)
record = parse_marc_record(root)
assert len(record.datafields) == 1
df = record.datafields[0]
assert df.tag == "245"
assert df.ind1 == "1"
assert df.ind2 == "0"
assert len(df.subfields) == 2
assert df.subfields[0].code == "a"
assert df.subfields[0].value == "Test Title"
def test_parse_full_record(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
assert len(record.controlfields) == 3
# Check multiple datafields
tags = [df.tag for df in record.datafields]
assert "020" in tags
assert "245" in tags
assert "700" in tags
assert "924" in tags
def test_parse_multiple_subfields_same_code(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
# Find 041 field with multiple $a subfields
df_041 = next(df for df in record.datafields if df.tag == "041")
a_values = [sf.value for sf in df_041.subfields if sf.code == "a"]
assert a_values == ["ger", "eng"]
# --- Tests for parse_search_retrieve_response ---
class TestParseSearchRetrieveResponse:
def test_parse_response_with_records(self, sru_response_xml):
response = parse_search_retrieve_response(sru_response_xml)
assert response.version == "1.1"
assert response.numberOfRecords == 2
assert len(response.records) == 2
def test_parse_response_record_details(self, sru_response_xml):
response = parse_search_retrieve_response(sru_response_xml)
rec1 = response.records[0]
assert rec1.recordSchema == "marcxml"
assert rec1.recordPacking == "xml"
assert rec1.recordPosition == 1
assert controlfield_value(rec1.recordData, "001") == "PPN001"
def test_parse_response_no_records(self, sru_response_no_records):
response = parse_search_retrieve_response(sru_response_no_records)
assert response.version == "1.1"
assert response.numberOfRecords == 0
assert len(response.records) == 0
def test_parse_echoed_request(self, sru_response_xml):
response = parse_search_retrieve_response(sru_response_xml)
echoed = response.echoedSearchRetrieveRequest
assert echoed is not None
assert echoed.version == "1.1"
assert echoed.query == "pica.tit=Test"
assert echoed.maximumRecords == 100
assert echoed.recordSchema == "marcxml"
def test_parse_response_as_string(self, sru_response_xml):
# Should also work with string input
response = parse_search_retrieve_response(sru_response_xml.decode("utf-8"))
assert response.numberOfRecords == 2
# --- Tests for query helper functions ---
class TestIterDatafields:
def test_iter_all_datafields(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
all_fields = list(iter_datafields(record))
assert len(all_fields) == len(record.datafields)
def test_iter_datafields_by_tag(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
fields_020 = list(iter_datafields(record, tag="020"))
assert len(fields_020) == 2 # Two ISBN fields
def test_iter_datafields_by_indicator(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
fields = list(iter_datafields(record, tag="264", ind2="1"))
assert len(fields) == 1
class TestSubfieldValues:
def test_subfield_values_single(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
values = subfield_values(record, "245", "a")
assert values == ["Comprehensive Test Book"]
def test_subfield_values_multiple(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
# Multiple ISBN values
values = subfield_values(record, "020", "a")
assert len(values) == 2
assert "9783123456789" in values
assert "9783987654321" in values
def test_subfield_values_empty(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
values = subfield_values(record, "999", "x")
assert values == []
class TestFirstSubfieldValue:
def test_first_subfield_value_found(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
value = first_subfield_value(record, "245", "a")
assert value == "Comprehensive Test Book"
def test_first_subfield_value_not_found(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
value = first_subfield_value(record, "999", "x")
assert value is None
def test_first_subfield_value_with_default(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
value = first_subfield_value(record, "999", "x", default="N/A")
assert value == "N/A"
def test_first_subfield_value_with_indicator(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
value = first_subfield_value(record, "264", "c", ind2="1")
assert value == "2023"
class TestControlFieldValue:
def test_controlfield_value_found(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
value = controlfield_value(record, "001")
assert value == "PPN98765"
def test_controlfield_value_not_found(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
value = controlfield_value(record, "999")
assert value is None
def test_controlfield_value_with_default(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
value = controlfield_value(record, "999", default="unknown")
assert value == "unknown"
class TestFindDatafieldsWithSubfields:
def test_find_with_where_all(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
fields = find_datafields_with_subfields(
record,
"924",
where_all={"9": "Frei 129"},
)
assert len(fields) == 1
assert fields[0].tag == "924"
def test_find_with_where_all_not_found(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
fields = find_datafields_with_subfields(
record,
"924",
where_all={"9": "NonExistent"},
)
assert len(fields) == 0
def test_find_with_casefold(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
fields = find_datafields_with_subfields(
record,
"924",
where_all={"9": "frei 129"}, # lowercase
casefold=True,
)
assert len(fields) == 1
class TestDatafieldValue:
def test_datafield_value_found(self):
df = DataField(
tag="245",
subfields=[
SubField(code="a", value="Title"),
SubField(code="b", value="Subtitle"),
],
)
assert datafield_value(df, "a") == "Title"
assert datafield_value(df, "b") == "Subtitle"
def test_datafield_value_not_found(self):
df = DataField(tag="245", subfields=[SubField(code="a", value="Title")])
assert datafield_value(df, "z") is None
def test_datafield_value_with_default(self):
df = DataField(tag="245", subfields=[])
assert datafield_value(df, "a", default="N/A") == "N/A"
class TestDatafieldsValue:
def test_datafields_value_found(self):
fields = [
DataField(tag="700", subfields=[SubField(code="a", value="Author One")]),
DataField(tag="700", subfields=[SubField(code="a", value="Author Two")]),
]
assert datafields_value(fields, "a") == "Author One"
def test_datafields_value_empty_list(self):
assert datafields_value([], "a") is None
class TestSubfieldValuesFromFields:
def test_values_from_multiple_fields(self):
fields = [
DataField(tag="700", subfields=[SubField(code="a", value="Author One")]),
DataField(tag="700", subfields=[SubField(code="a", value="Author Two")]),
]
values = subfield_values_from_fields(fields, "a")
assert values == ["Author One", "Author Two"]
class TestFirstSubfieldValueFromFields:
def test_first_value_from_fields(self):
fields = [
DataField(tag="700", subfields=[SubField(code="a", value="First")]),
DataField(tag="700", subfields=[SubField(code="a", value="Second")]),
]
assert first_subfield_value_from_fields(fields, "a") == "First"
# --- Tests for _smart_join_title ---
class TestSmartJoinTitle:
def test_join_with_subtitle(self):
result = _smart_join_title("Main Title", "Subtitle")
assert result == "Main Title : Subtitle"
def test_join_without_subtitle(self):
result = _smart_join_title("Main Title", None)
assert result == "Main Title"
def test_join_with_empty_subtitle(self):
result = _smart_join_title("Main Title", "")
assert result == "Main Title"
def test_join_with_existing_colon(self):
result = _smart_join_title("Main Title:", "Subtitle")
assert result == "Main Title: Subtitle"
def test_join_with_existing_semicolon(self):
result = _smart_join_title("Main Title;", "More")
assert result == "Main Title; More"
def test_join_strips_whitespace(self):
result = _smart_join_title(" Main Title ", " Subtitle ")
assert result == "Main Title : Subtitle"