"""Tests for MARCXML parsing functions in sru.py.""" import xml.etree.ElementTree as ET import pytest from bibapi.schemas.marcxml import ( DataField, SubField, ) from bibapi.sru import ( _smart_join_title, _text, controlfield_value, datafield_value, datafields_value, find_datafields_with_subfields, first_subfield_value, first_subfield_value_from_fields, iter_datafields, parse_marc_record, parse_search_retrieve_response, subfield_values, subfield_values_from_fields, ) # --- Fixtures for sample XML data --- @pytest.fixture def minimal_marc_xml() -> str: """Minimal MARC record XML string.""" return """ 00000nam a22000001i 4500 PPN12345 20230101120000.0 Test Title A Subtitle """ @pytest.fixture def full_marc_xml() -> str: """More complete MARC record for testing.""" return """ 00000nam a22000001i 4500 PPN98765 20231215150000.0 230101s2023 gw 000 0 ger d 9783123456789 9783987654321 ger eng Comprehensive Test Book With Many Details by Author Name 3rd edition Berlin Test Publisher 2023 456 pages Band Author, First Author, Second Frei 129 ABC 123 DE-Frei129 """ @pytest.fixture def sru_response_xml() -> bytes: """Complete SRU searchRetrieveResponse XML.""" return b""" 1.1 2 marcxml xml 00000nam a22 PPN001 First Book 1 marcxml xml 00000nam a22 PPN002 Second Book 2 1.1 pica.tit=Test 100 xml marcxml """ @pytest.fixture def sru_response_no_records() -> bytes: """SRU response with zero records.""" return b""" 1.1 0 """ # --- Tests for _text helper --- class TestTextHelper: def test_text_with_element_and_text(self): elem = ET.fromstring("Hello") assert _text(elem) == "Hello" def test_text_with_element_no_text(self): elem = ET.fromstring("") assert _text(elem) == "" def test_text_with_none(self): assert _text(None) == "" def test_text_with_whitespace(self): elem = ET.fromstring(" spaced ") assert _text(elem) == " spaced " # --- Tests for parse_marc_record --- class TestParseMarcRecord: def test_parse_minimal_record(self, minimal_marc_xml): root = ET.fromstring(minimal_marc_xml) record = parse_marc_record(root) assert record.leader == "00000nam a22000001i 4500" assert len(record.controlfields) == 2 assert record.controlfields[0].tag == "001" assert record.controlfields[0].value == "PPN12345" def test_parse_datafields(self, minimal_marc_xml): root = ET.fromstring(minimal_marc_xml) record = parse_marc_record(root) assert len(record.datafields) == 1 df = record.datafields[0] assert df.tag == "245" assert df.ind1 == "1" assert df.ind2 == "0" assert len(df.subfields) == 2 assert df.subfields[0].code == "a" assert df.subfields[0].value == "Test Title" def test_parse_full_record(self, full_marc_xml): root = ET.fromstring(full_marc_xml) record = parse_marc_record(root) assert len(record.controlfields) == 3 # Check multiple datafields tags = [df.tag for df in record.datafields] assert "020" in tags assert "245" in tags assert "700" in tags assert "924" in tags def test_parse_multiple_subfields_same_code(self, full_marc_xml): root = ET.fromstring(full_marc_xml) record = parse_marc_record(root) # Find 041 field with multiple $a subfields df_041 = next(df for df in record.datafields if df.tag == "041") a_values = [sf.value for sf in df_041.subfields if sf.code == "a"] assert a_values == ["ger", "eng"] # --- Tests for parse_search_retrieve_response --- class TestParseSearchRetrieveResponse: def test_parse_response_with_records(self, sru_response_xml): response = parse_search_retrieve_response(sru_response_xml) assert response.version == "1.1" assert response.numberOfRecords == 2 assert len(response.records) == 2 def test_parse_response_record_details(self, sru_response_xml): response = parse_search_retrieve_response(sru_response_xml) rec1 = response.records[0] assert rec1.recordSchema == "marcxml" assert rec1.recordPacking == "xml" assert rec1.recordPosition == 1 assert controlfield_value(rec1.recordData, "001") == "PPN001" def test_parse_response_no_records(self, sru_response_no_records): response = parse_search_retrieve_response(sru_response_no_records) assert response.version == "1.1" assert response.numberOfRecords == 0 assert len(response.records) == 0 def test_parse_echoed_request(self, sru_response_xml): response = parse_search_retrieve_response(sru_response_xml) echoed = response.echoedSearchRetrieveRequest assert echoed is not None assert echoed.version == "1.1" assert echoed.query == "pica.tit=Test" assert echoed.maximumRecords == 100 assert echoed.recordSchema == "marcxml" def test_parse_response_as_string(self, sru_response_xml): # Should also work with string input response = parse_search_retrieve_response(sru_response_xml.decode("utf-8")) assert response.numberOfRecords == 2 # --- Tests for query helper functions --- class TestIterDatafields: def test_iter_all_datafields(self, full_marc_xml): root = ET.fromstring(full_marc_xml) record = parse_marc_record(root) all_fields = list(iter_datafields(record)) assert len(all_fields) == len(record.datafields) def test_iter_datafields_by_tag(self, full_marc_xml): root = ET.fromstring(full_marc_xml) record = parse_marc_record(root) fields_020 = list(iter_datafields(record, tag="020")) assert len(fields_020) == 2 # Two ISBN fields def test_iter_datafields_by_indicator(self, full_marc_xml): root = ET.fromstring(full_marc_xml) record = parse_marc_record(root) fields = list(iter_datafields(record, tag="264", ind2="1")) assert len(fields) == 1 class TestSubfieldValues: def test_subfield_values_single(self, full_marc_xml): root = ET.fromstring(full_marc_xml) record = parse_marc_record(root) values = subfield_values(record, "245", "a") assert values == ["Comprehensive Test Book"] def test_subfield_values_multiple(self, full_marc_xml): root = ET.fromstring(full_marc_xml) record = parse_marc_record(root) # Multiple ISBN values values = subfield_values(record, "020", "a") assert len(values) == 2 assert "9783123456789" in values assert "9783987654321" in values def test_subfield_values_empty(self, full_marc_xml): root = ET.fromstring(full_marc_xml) record = parse_marc_record(root) values = subfield_values(record, "999", "x") assert values == [] class TestFirstSubfieldValue: def test_first_subfield_value_found(self, full_marc_xml): root = ET.fromstring(full_marc_xml) record = parse_marc_record(root) value = first_subfield_value(record, "245", "a") assert value == "Comprehensive Test Book" def test_first_subfield_value_not_found(self, full_marc_xml): root = ET.fromstring(full_marc_xml) record = parse_marc_record(root) value = first_subfield_value(record, "999", "x") assert value is None def test_first_subfield_value_with_default(self, full_marc_xml): root = ET.fromstring(full_marc_xml) record = parse_marc_record(root) value = first_subfield_value(record, "999", "x", default="N/A") assert value == "N/A" def test_first_subfield_value_with_indicator(self, full_marc_xml): root = ET.fromstring(full_marc_xml) record = parse_marc_record(root) value = first_subfield_value(record, "264", "c", ind2="1") assert value == "2023" class TestControlFieldValue: def test_controlfield_value_found(self, full_marc_xml): root = ET.fromstring(full_marc_xml) record = parse_marc_record(root) value = controlfield_value(record, "001") assert value == "PPN98765" def test_controlfield_value_not_found(self, full_marc_xml): root = ET.fromstring(full_marc_xml) record = parse_marc_record(root) value = controlfield_value(record, "999") assert value is None def test_controlfield_value_with_default(self, full_marc_xml): root = ET.fromstring(full_marc_xml) record = parse_marc_record(root) value = controlfield_value(record, "999", default="unknown") assert value == "unknown" class TestFindDatafieldsWithSubfields: def test_find_with_where_all(self, full_marc_xml): root = ET.fromstring(full_marc_xml) record = parse_marc_record(root) fields = find_datafields_with_subfields( record, "924", where_all={"9": "Frei 129"}, ) assert len(fields) == 1 assert fields[0].tag == "924" def test_find_with_where_all_not_found(self, full_marc_xml): root = ET.fromstring(full_marc_xml) record = parse_marc_record(root) fields = find_datafields_with_subfields( record, "924", where_all={"9": "NonExistent"}, ) assert len(fields) == 0 def test_find_with_casefold(self, full_marc_xml): root = ET.fromstring(full_marc_xml) record = parse_marc_record(root) fields = find_datafields_with_subfields( record, "924", where_all={"9": "frei 129"}, # lowercase casefold=True, ) assert len(fields) == 1 class TestDatafieldValue: def test_datafield_value_found(self): df = DataField( tag="245", subfields=[ SubField(code="a", value="Title"), SubField(code="b", value="Subtitle"), ], ) assert datafield_value(df, "a") == "Title" assert datafield_value(df, "b") == "Subtitle" def test_datafield_value_not_found(self): df = DataField(tag="245", subfields=[SubField(code="a", value="Title")]) assert datafield_value(df, "z") is None def test_datafield_value_with_default(self): df = DataField(tag="245", subfields=[]) assert datafield_value(df, "a", default="N/A") == "N/A" class TestDatafieldsValue: def test_datafields_value_found(self): fields = [ DataField(tag="700", subfields=[SubField(code="a", value="Author One")]), DataField(tag="700", subfields=[SubField(code="a", value="Author Two")]), ] assert datafields_value(fields, "a") == "Author One" def test_datafields_value_empty_list(self): assert datafields_value([], "a") is None class TestSubfieldValuesFromFields: def test_values_from_multiple_fields(self): fields = [ DataField(tag="700", subfields=[SubField(code="a", value="Author One")]), DataField(tag="700", subfields=[SubField(code="a", value="Author Two")]), ] values = subfield_values_from_fields(fields, "a") assert values == ["Author One", "Author Two"] class TestFirstSubfieldValueFromFields: def test_first_value_from_fields(self): fields = [ DataField(tag="700", subfields=[SubField(code="a", value="First")]), DataField(tag="700", subfields=[SubField(code="a", value="Second")]), ] assert first_subfield_value_from_fields(fields, "a") == "First" # --- Tests for _smart_join_title --- class TestSmartJoinTitle: def test_join_with_subtitle(self): result = _smart_join_title("Main Title", "Subtitle") assert result == "Main Title : Subtitle" def test_join_without_subtitle(self): result = _smart_join_title("Main Title", None) assert result == "Main Title" def test_join_with_empty_subtitle(self): result = _smart_join_title("Main Title", "") assert result == "Main Title" def test_join_with_existing_colon(self): result = _smart_join_title("Main Title:", "Subtitle") assert result == "Main Title: Subtitle" def test_join_with_existing_semicolon(self): result = _smart_join_title("Main Title;", "More") assert result == "Main Title; More" def test_join_strips_whitespace(self): result = _smart_join_title(" Main Title ", " Subtitle ") assert result == "Main Title : Subtitle"