487 lines
17 KiB
Python
487 lines
17 KiB
Python
"""Tests for MARCXML parsing functions in sru.py."""
|
|
|
|
import xml.etree.ElementTree as ET
|
|
|
|
import pytest
|
|
|
|
from bibapi.schemas.marcxml import (
|
|
DataField,
|
|
SubField,
|
|
)
|
|
from bibapi.sru import (
|
|
_smart_join_title,
|
|
_text,
|
|
controlfield_value,
|
|
datafield_value,
|
|
datafields_value,
|
|
find_datafields_with_subfields,
|
|
first_subfield_value,
|
|
first_subfield_value_from_fields,
|
|
iter_datafields,
|
|
parse_marc_record,
|
|
parse_search_retrieve_response,
|
|
subfield_values,
|
|
subfield_values_from_fields,
|
|
)
|
|
|
|
# --- Fixtures for sample XML data ---
|
|
|
|
|
|
@pytest.fixture
|
|
def minimal_marc_xml() -> str:
|
|
"""Minimal MARC record XML string."""
|
|
return """<?xml version="1.0" encoding="UTF-8"?>
|
|
<marc:record xmlns:marc="http://www.loc.gov/MARC21/slim">
|
|
<marc:leader>00000nam a22000001i 4500</marc:leader>
|
|
<marc:controlfield tag="001">PPN12345</marc:controlfield>
|
|
<marc:controlfield tag="005">20230101120000.0</marc:controlfield>
|
|
<marc:datafield tag="245" ind1="1" ind2="0">
|
|
<marc:subfield code="a">Test Title</marc:subfield>
|
|
<marc:subfield code="b">A Subtitle</marc:subfield>
|
|
</marc:datafield>
|
|
</marc:record>"""
|
|
|
|
|
|
@pytest.fixture
|
|
def full_marc_xml() -> str:
|
|
"""More complete MARC record for testing."""
|
|
return """<?xml version="1.0" encoding="UTF-8"?>
|
|
<marc:record xmlns:marc="http://www.loc.gov/MARC21/slim">
|
|
<marc:leader>00000nam a22000001i 4500</marc:leader>
|
|
<marc:controlfield tag="001">PPN98765</marc:controlfield>
|
|
<marc:controlfield tag="005">20231215150000.0</marc:controlfield>
|
|
<marc:controlfield tag="008">230101s2023 gw 000 0 ger d</marc:controlfield>
|
|
<marc:datafield tag="020" ind1=" " ind2=" ">
|
|
<marc:subfield code="a">9783123456789</marc:subfield>
|
|
</marc:datafield>
|
|
<marc:datafield tag="020" ind1=" " ind2=" ">
|
|
<marc:subfield code="a">9783987654321</marc:subfield>
|
|
</marc:datafield>
|
|
<marc:datafield tag="041" ind1=" " ind2=" ">
|
|
<marc:subfield code="a">ger</marc:subfield>
|
|
<marc:subfield code="a">eng</marc:subfield>
|
|
</marc:datafield>
|
|
<marc:datafield tag="245" ind1="1" ind2="0">
|
|
<marc:subfield code="a">Comprehensive Test Book</marc:subfield>
|
|
<marc:subfield code="b">With Many Details</marc:subfield>
|
|
<marc:subfield code="c">by Author Name</marc:subfield>
|
|
</marc:datafield>
|
|
<marc:datafield tag="250" ind1=" " ind2=" ">
|
|
<marc:subfield code="a">3rd edition</marc:subfield>
|
|
</marc:datafield>
|
|
<marc:datafield tag="264" ind1=" " ind2="1">
|
|
<marc:subfield code="a">Berlin</marc:subfield>
|
|
<marc:subfield code="b">Test Publisher</marc:subfield>
|
|
<marc:subfield code="c">2023</marc:subfield>
|
|
</marc:datafield>
|
|
<marc:datafield tag="300" ind1=" " ind2=" ">
|
|
<marc:subfield code="a">456 pages</marc:subfield>
|
|
</marc:datafield>
|
|
<marc:datafield tag="338" ind1=" " ind2=" ">
|
|
<marc:subfield code="a">Band</marc:subfield>
|
|
</marc:datafield>
|
|
<marc:datafield tag="700" ind1="1" ind2=" ">
|
|
<marc:subfield code="a">Author, First</marc:subfield>
|
|
</marc:datafield>
|
|
<marc:datafield tag="700" ind1="1" ind2=" ">
|
|
<marc:subfield code="a">Author, Second</marc:subfield>
|
|
</marc:datafield>
|
|
<marc:datafield tag="924" ind1=" " ind2=" ">
|
|
<marc:subfield code="9">Frei 129</marc:subfield>
|
|
<marc:subfield code="g">ABC 123</marc:subfield>
|
|
<marc:subfield code="b">DE-Frei129</marc:subfield>
|
|
</marc:datafield>
|
|
</marc:record>"""
|
|
|
|
|
|
@pytest.fixture
|
|
def sru_response_xml() -> bytes:
|
|
"""Complete SRU searchRetrieveResponse XML."""
|
|
return b"""<?xml version="1.0" encoding="UTF-8"?>
|
|
<zs:searchRetrieveResponse xmlns:zs="http://www.loc.gov/zing/srw/"
|
|
xmlns:marc="http://www.loc.gov/MARC21/slim">
|
|
<zs:version>1.1</zs:version>
|
|
<zs:numberOfRecords>2</zs:numberOfRecords>
|
|
<zs:records>
|
|
<zs:record>
|
|
<zs:recordSchema>marcxml</zs:recordSchema>
|
|
<zs:recordPacking>xml</zs:recordPacking>
|
|
<zs:recordData>
|
|
<marc:record>
|
|
<marc:leader>00000nam a22</marc:leader>
|
|
<marc:controlfield tag="001">PPN001</marc:controlfield>
|
|
<marc:datafield tag="245" ind1=" " ind2=" ">
|
|
<marc:subfield code="a">First Book</marc:subfield>
|
|
</marc:datafield>
|
|
</marc:record>
|
|
</zs:recordData>
|
|
<zs:recordPosition>1</zs:recordPosition>
|
|
</zs:record>
|
|
<zs:record>
|
|
<zs:recordSchema>marcxml</zs:recordSchema>
|
|
<zs:recordPacking>xml</zs:recordPacking>
|
|
<zs:recordData>
|
|
<marc:record>
|
|
<marc:leader>00000nam a22</marc:leader>
|
|
<marc:controlfield tag="001">PPN002</marc:controlfield>
|
|
<marc:datafield tag="245" ind1=" " ind2=" ">
|
|
<marc:subfield code="a">Second Book</marc:subfield>
|
|
</marc:datafield>
|
|
</marc:record>
|
|
</zs:recordData>
|
|
<zs:recordPosition>2</zs:recordPosition>
|
|
</zs:record>
|
|
</zs:records>
|
|
<zs:echoedSearchRetrieveRequest>
|
|
<zs:version>1.1</zs:version>
|
|
<zs:query>pica.tit=Test</zs:query>
|
|
<zs:maximumRecords>100</zs:maximumRecords>
|
|
<zs:recordPacking>xml</zs:recordPacking>
|
|
<zs:recordSchema>marcxml</zs:recordSchema>
|
|
</zs:echoedSearchRetrieveRequest>
|
|
</zs:searchRetrieveResponse>"""
|
|
|
|
|
|
@pytest.fixture
|
|
def sru_response_no_records() -> bytes:
|
|
"""SRU response with zero records."""
|
|
return b"""<?xml version="1.0" encoding="UTF-8"?>
|
|
<zs:searchRetrieveResponse xmlns:zs="http://www.loc.gov/zing/srw/">
|
|
<zs:version>1.1</zs:version>
|
|
<zs:numberOfRecords>0</zs:numberOfRecords>
|
|
</zs:searchRetrieveResponse>"""
|
|
|
|
|
|
# --- Tests for _text helper ---
|
|
|
|
|
|
class TestTextHelper:
|
|
def test_text_with_element_and_text(self):
|
|
elem = ET.fromstring("<tag>Hello</tag>")
|
|
assert _text(elem) == "Hello"
|
|
|
|
def test_text_with_element_no_text(self):
|
|
elem = ET.fromstring("<tag></tag>")
|
|
assert _text(elem) == ""
|
|
|
|
def test_text_with_none(self):
|
|
assert _text(None) == ""
|
|
|
|
def test_text_with_whitespace(self):
|
|
elem = ET.fromstring("<tag> spaced </tag>")
|
|
assert _text(elem) == " spaced "
|
|
|
|
|
|
# --- Tests for parse_marc_record ---
|
|
|
|
|
|
class TestParseMarcRecord:
|
|
def test_parse_minimal_record(self, minimal_marc_xml):
|
|
root = ET.fromstring(minimal_marc_xml)
|
|
record = parse_marc_record(root)
|
|
|
|
assert record.leader == "00000nam a22000001i 4500"
|
|
assert len(record.controlfields) == 2
|
|
assert record.controlfields[0].tag == "001"
|
|
assert record.controlfields[0].value == "PPN12345"
|
|
|
|
def test_parse_datafields(self, minimal_marc_xml):
|
|
root = ET.fromstring(minimal_marc_xml)
|
|
record = parse_marc_record(root)
|
|
|
|
assert len(record.datafields) == 1
|
|
df = record.datafields[0]
|
|
assert df.tag == "245"
|
|
assert df.ind1 == "1"
|
|
assert df.ind2 == "0"
|
|
assert len(df.subfields) == 2
|
|
assert df.subfields[0].code == "a"
|
|
assert df.subfields[0].value == "Test Title"
|
|
|
|
def test_parse_full_record(self, full_marc_xml):
|
|
root = ET.fromstring(full_marc_xml)
|
|
record = parse_marc_record(root)
|
|
|
|
assert len(record.controlfields) == 3
|
|
# Check multiple datafields
|
|
tags = [df.tag for df in record.datafields]
|
|
assert "020" in tags
|
|
assert "245" in tags
|
|
assert "700" in tags
|
|
assert "924" in tags
|
|
|
|
def test_parse_multiple_subfields_same_code(self, full_marc_xml):
|
|
root = ET.fromstring(full_marc_xml)
|
|
record = parse_marc_record(root)
|
|
|
|
# Find 041 field with multiple $a subfields
|
|
df_041 = next(df for df in record.datafields if df.tag == "041")
|
|
a_values = [sf.value for sf in df_041.subfields if sf.code == "a"]
|
|
assert a_values == ["ger", "eng"]
|
|
|
|
|
|
# --- Tests for parse_search_retrieve_response ---
|
|
|
|
|
|
class TestParseSearchRetrieveResponse:
|
|
def test_parse_response_with_records(self, sru_response_xml):
|
|
response = parse_search_retrieve_response(sru_response_xml)
|
|
|
|
assert response.version == "1.1"
|
|
assert response.numberOfRecords == 2
|
|
assert len(response.records) == 2
|
|
|
|
def test_parse_response_record_details(self, sru_response_xml):
|
|
response = parse_search_retrieve_response(sru_response_xml)
|
|
|
|
rec1 = response.records[0]
|
|
assert rec1.recordSchema == "marcxml"
|
|
assert rec1.recordPacking == "xml"
|
|
assert rec1.recordPosition == 1
|
|
assert controlfield_value(rec1.recordData, "001") == "PPN001"
|
|
|
|
def test_parse_response_no_records(self, sru_response_no_records):
|
|
response = parse_search_retrieve_response(sru_response_no_records)
|
|
|
|
assert response.version == "1.1"
|
|
assert response.numberOfRecords == 0
|
|
assert len(response.records) == 0
|
|
|
|
def test_parse_echoed_request(self, sru_response_xml):
|
|
response = parse_search_retrieve_response(sru_response_xml)
|
|
|
|
echoed = response.echoedSearchRetrieveRequest
|
|
assert echoed is not None
|
|
assert echoed.version == "1.1"
|
|
assert echoed.query == "pica.tit=Test"
|
|
assert echoed.maximumRecords == 100
|
|
assert echoed.recordSchema == "marcxml"
|
|
|
|
def test_parse_response_as_string(self, sru_response_xml):
|
|
# Should also work with string input
|
|
response = parse_search_retrieve_response(sru_response_xml.decode("utf-8"))
|
|
assert response.numberOfRecords == 2
|
|
|
|
|
|
# --- Tests for query helper functions ---
|
|
|
|
|
|
class TestIterDatafields:
|
|
def test_iter_all_datafields(self, full_marc_xml):
|
|
root = ET.fromstring(full_marc_xml)
|
|
record = parse_marc_record(root)
|
|
|
|
all_fields = list(iter_datafields(record))
|
|
assert len(all_fields) == len(record.datafields)
|
|
|
|
def test_iter_datafields_by_tag(self, full_marc_xml):
|
|
root = ET.fromstring(full_marc_xml)
|
|
record = parse_marc_record(root)
|
|
|
|
fields_020 = list(iter_datafields(record, tag="020"))
|
|
assert len(fields_020) == 2 # Two ISBN fields
|
|
|
|
def test_iter_datafields_by_indicator(self, full_marc_xml):
|
|
root = ET.fromstring(full_marc_xml)
|
|
record = parse_marc_record(root)
|
|
|
|
fields = list(iter_datafields(record, tag="264", ind2="1"))
|
|
assert len(fields) == 1
|
|
|
|
|
|
class TestSubfieldValues:
|
|
def test_subfield_values_single(self, full_marc_xml):
|
|
root = ET.fromstring(full_marc_xml)
|
|
record = parse_marc_record(root)
|
|
|
|
values = subfield_values(record, "245", "a")
|
|
assert values == ["Comprehensive Test Book"]
|
|
|
|
def test_subfield_values_multiple(self, full_marc_xml):
|
|
root = ET.fromstring(full_marc_xml)
|
|
record = parse_marc_record(root)
|
|
|
|
# Multiple ISBN values
|
|
values = subfield_values(record, "020", "a")
|
|
assert len(values) == 2
|
|
assert "9783123456789" in values
|
|
assert "9783987654321" in values
|
|
|
|
def test_subfield_values_empty(self, full_marc_xml):
|
|
root = ET.fromstring(full_marc_xml)
|
|
record = parse_marc_record(root)
|
|
|
|
values = subfield_values(record, "999", "x")
|
|
assert values == []
|
|
|
|
|
|
class TestFirstSubfieldValue:
|
|
def test_first_subfield_value_found(self, full_marc_xml):
|
|
root = ET.fromstring(full_marc_xml)
|
|
record = parse_marc_record(root)
|
|
|
|
value = first_subfield_value(record, "245", "a")
|
|
assert value == "Comprehensive Test Book"
|
|
|
|
def test_first_subfield_value_not_found(self, full_marc_xml):
|
|
root = ET.fromstring(full_marc_xml)
|
|
record = parse_marc_record(root)
|
|
|
|
value = first_subfield_value(record, "999", "x")
|
|
assert value is None
|
|
|
|
def test_first_subfield_value_with_default(self, full_marc_xml):
|
|
root = ET.fromstring(full_marc_xml)
|
|
record = parse_marc_record(root)
|
|
|
|
value = first_subfield_value(record, "999", "x", default="N/A")
|
|
assert value == "N/A"
|
|
|
|
def test_first_subfield_value_with_indicator(self, full_marc_xml):
|
|
root = ET.fromstring(full_marc_xml)
|
|
record = parse_marc_record(root)
|
|
|
|
value = first_subfield_value(record, "264", "c", ind2="1")
|
|
assert value == "2023"
|
|
|
|
|
|
class TestControlFieldValue:
|
|
def test_controlfield_value_found(self, full_marc_xml):
|
|
root = ET.fromstring(full_marc_xml)
|
|
record = parse_marc_record(root)
|
|
|
|
value = controlfield_value(record, "001")
|
|
assert value == "PPN98765"
|
|
|
|
def test_controlfield_value_not_found(self, full_marc_xml):
|
|
root = ET.fromstring(full_marc_xml)
|
|
record = parse_marc_record(root)
|
|
|
|
value = controlfield_value(record, "999")
|
|
assert value is None
|
|
|
|
def test_controlfield_value_with_default(self, full_marc_xml):
|
|
root = ET.fromstring(full_marc_xml)
|
|
record = parse_marc_record(root)
|
|
|
|
value = controlfield_value(record, "999", default="unknown")
|
|
assert value == "unknown"
|
|
|
|
|
|
class TestFindDatafieldsWithSubfields:
|
|
def test_find_with_where_all(self, full_marc_xml):
|
|
root = ET.fromstring(full_marc_xml)
|
|
record = parse_marc_record(root)
|
|
|
|
fields = find_datafields_with_subfields(
|
|
record,
|
|
"924",
|
|
where_all={"9": "Frei 129"},
|
|
)
|
|
assert len(fields) == 1
|
|
assert fields[0].tag == "924"
|
|
|
|
def test_find_with_where_all_not_found(self, full_marc_xml):
|
|
root = ET.fromstring(full_marc_xml)
|
|
record = parse_marc_record(root)
|
|
|
|
fields = find_datafields_with_subfields(
|
|
record,
|
|
"924",
|
|
where_all={"9": "NonExistent"},
|
|
)
|
|
assert len(fields) == 0
|
|
|
|
def test_find_with_casefold(self, full_marc_xml):
|
|
root = ET.fromstring(full_marc_xml)
|
|
record = parse_marc_record(root)
|
|
|
|
fields = find_datafields_with_subfields(
|
|
record,
|
|
"924",
|
|
where_all={"9": "frei 129"}, # lowercase
|
|
casefold=True,
|
|
)
|
|
assert len(fields) == 1
|
|
|
|
|
|
class TestDatafieldValue:
|
|
def test_datafield_value_found(self):
|
|
df = DataField(
|
|
tag="245",
|
|
subfields=[
|
|
SubField(code="a", value="Title"),
|
|
SubField(code="b", value="Subtitle"),
|
|
],
|
|
)
|
|
assert datafield_value(df, "a") == "Title"
|
|
assert datafield_value(df, "b") == "Subtitle"
|
|
|
|
def test_datafield_value_not_found(self):
|
|
df = DataField(tag="245", subfields=[SubField(code="a", value="Title")])
|
|
assert datafield_value(df, "z") is None
|
|
|
|
def test_datafield_value_with_default(self):
|
|
df = DataField(tag="245", subfields=[])
|
|
assert datafield_value(df, "a", default="N/A") == "N/A"
|
|
|
|
|
|
class TestDatafieldsValue:
|
|
def test_datafields_value_found(self):
|
|
fields = [
|
|
DataField(tag="700", subfields=[SubField(code="a", value="Author One")]),
|
|
DataField(tag="700", subfields=[SubField(code="a", value="Author Two")]),
|
|
]
|
|
assert datafields_value(fields, "a") == "Author One"
|
|
|
|
def test_datafields_value_empty_list(self):
|
|
assert datafields_value([], "a") is None
|
|
|
|
|
|
class TestSubfieldValuesFromFields:
|
|
def test_values_from_multiple_fields(self):
|
|
fields = [
|
|
DataField(tag="700", subfields=[SubField(code="a", value="Author One")]),
|
|
DataField(tag="700", subfields=[SubField(code="a", value="Author Two")]),
|
|
]
|
|
values = subfield_values_from_fields(fields, "a")
|
|
assert values == ["Author One", "Author Two"]
|
|
|
|
|
|
class TestFirstSubfieldValueFromFields:
|
|
def test_first_value_from_fields(self):
|
|
fields = [
|
|
DataField(tag="700", subfields=[SubField(code="a", value="First")]),
|
|
DataField(tag="700", subfields=[SubField(code="a", value="Second")]),
|
|
]
|
|
assert first_subfield_value_from_fields(fields, "a") == "First"
|
|
|
|
|
|
# --- Tests for _smart_join_title ---
|
|
|
|
|
|
class TestSmartJoinTitle:
|
|
def test_join_with_subtitle(self):
|
|
result = _smart_join_title("Main Title", "Subtitle")
|
|
assert result == "Main Title : Subtitle"
|
|
|
|
def test_join_without_subtitle(self):
|
|
result = _smart_join_title("Main Title", None)
|
|
assert result == "Main Title"
|
|
|
|
def test_join_with_empty_subtitle(self):
|
|
result = _smart_join_title("Main Title", "")
|
|
assert result == "Main Title"
|
|
|
|
def test_join_with_existing_colon(self):
|
|
result = _smart_join_title("Main Title:", "Subtitle")
|
|
assert result == "Main Title: Subtitle"
|
|
|
|
def test_join_with_existing_semicolon(self):
|
|
result = _smart_join_title("Main Title;", "More")
|
|
assert result == "Main Title; More"
|
|
|
|
def test_join_strips_whitespace(self):
|
|
result = _smart_join_title(" Main Title ", " Subtitle ")
|
|
assert result == "Main Title : Subtitle"
|