From 30e4cded8f1417d91daea7524b132f66d4a1c1a0 Mon Sep 17 00:00:00 2001 From: WorldTeacher Date: Fri, 5 Dec 2025 11:21:41 +0100 Subject: [PATCH] chore: move dependencies, add more tests --- .coveragerc | 7 + pyproject.toml | 10 +- src/bibapi/__init__.py | 3 +- src/bibapi/catalogue.py | 4 + src/bibapi/schemas/__init__.py | 1 + src/bibapi/schemas/errors.py | 10 + src/bibapi/schemas/marcxml.py | 1 - tests/test_catalogue.py | 48 ++++ tests/test_marcxml_parser.py | 493 +++++++++++++++++++++++++++++++++ tests/test_schemas.py | 37 +++ 10 files changed, 608 insertions(+), 6 deletions(-) create mode 100644 .coveragerc create mode 100644 src/bibapi/schemas/__init__.py create mode 100644 src/bibapi/schemas/errors.py create mode 100644 tests/test_catalogue.py create mode 100644 tests/test_marcxml_parser.py create mode 100644 tests/test_schemas.py diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..120d0b8 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,7 @@ +[run] +branch = True +omit = */build/*,tests/*,main.py +[report] +exclude_lines = + pragma: no cover + raise NotImplementedError.* diff --git a/pyproject.toml b/pyproject.toml index 768dc33..30fc028 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,10 @@ description = "Add your description here" readme = "README.md" authors = [{ name = "WorldTeacher", email = "coding_contact@pm.me" }] requires-python = ">=3.13" -dependencies = ["regex>=2025.9.18"] +dependencies = [ + "regex>=2025.9.18", + "requests>=2.32.5", +] [project.optional-dependencies] # SRU API feature: for accessing library catalogs via SRU protocol @@ -46,8 +49,9 @@ pre_commit_hooks = [] post_commit_hooks = [] [dependency-groups] -dev = ["pylint>=4.0.3"] -test = [ +dev = [ + "pylint>=4.0.3", + "pytest-mock>=3.15.1", "types-pysocks>=1.7.1.20251001", "types-regex>=2025.9.18.20250921", "types-requests>=2.32.4.20250913", diff --git a/src/bibapi/__init__.py b/src/bibapi/__init__.py index 1978e7f..e698ffa 100644 --- a/src/bibapi/__init__.py +++ b/src/bibapi/__init__.py @@ -1,4 +1,5 @@ from .schemas.api_types import ( + ALMASchema, DNBSchema, HBZSchema, HebisSchema, @@ -89,5 +90,3 @@ class HBZ(_Api): self.prefix = HBZSchema.ARGSCHEMA.value self.library_identifier = HBZSchema.LIBRARY_NAME_LOCATION_FIELD.value super().__init__(self.site, self.url, self.prefix, self.library_identifier) - - diff --git a/src/bibapi/catalogue.py b/src/bibapi/catalogue.py index b5759d9..ac81666 100644 --- a/src/bibapi/catalogue.py +++ b/src/bibapi/catalogue.py @@ -327,3 +327,7 @@ class Catalogue: if link is None: return None return link.library_location + + def check_book_exists(self, searchterm: str) -> bool: + links = self.get_book_links(searchterm) + return len(links) > 0 diff --git a/src/bibapi/schemas/__init__.py b/src/bibapi/schemas/__init__.py new file mode 100644 index 0000000..c142f90 --- /dev/null +++ b/src/bibapi/schemas/__init__.py @@ -0,0 +1 @@ +"""Schemas for the provided APIs.""" diff --git a/src/bibapi/schemas/errors.py b/src/bibapi/schemas/errors.py new file mode 100644 index 0000000..99a7cc9 --- /dev/null +++ b/src/bibapi/schemas/errors.py @@ -0,0 +1,10 @@ +class BibAPIError(Exception): + """Base class for all BibAPI errors.""" + + +class CatalogueError(BibAPIError): + """Raised when there is an error with the library catalogue API.""" + + +class NetworkError(BibAPIError): + """Raised when there is a network-related error.""" diff --git a/src/bibapi/schemas/marcxml.py b/src/bibapi/schemas/marcxml.py index 89cbf15..f92a20c 100644 --- a/src/bibapi/schemas/marcxml.py +++ b/src/bibapi/schemas/marcxml.py @@ -1,5 +1,4 @@ from dataclasses import dataclass, field -from typing import list # --- MARC XML structures --- diff --git a/tests/test_catalogue.py b/tests/test_catalogue.py new file mode 100644 index 0000000..aecc148 --- /dev/null +++ b/tests/test_catalogue.py @@ -0,0 +1,48 @@ +"""Tests for the Catalogue class, which interacts with the library catalogue.""" + +import pytest +from pytest_mock import MockerFixture + +from src.bibapi.catalogue import Catalogue + + +class TestCatalogue: + """Tests for the Catalogue class.""" + + def test_check_book_exists(self, mocker: MockerFixture): + """Test the check_book_exists method of the Catalogue class.""" + catalogue = Catalogue() + + # Mock the get_book_links method to control its output + mocker.patch.object( + catalogue, + "get_book_links", + return_value=["link1", "link2"], + ) + + # Test with a known existing book + existing_book_searchterm = "1693321114" + assert catalogue.check_book_exists(existing_book_searchterm) is True + + # Change the mock to return an empty list for non-existing book + mocker.patch.object( + catalogue, + "get_book_links", + return_value=[], + ) + + # Test with a known non-existing book + non_existing_book_searchterm = "00000000009" + assert catalogue.check_book_exists(non_existing_book_searchterm) is False + + def test_no_connection_raises_error(self, mocker: MockerFixture): + """Test that a ConnectionError is raised when there is no internet connection.""" + # Mock the check_connection method to simulate no internet connection + mocker.patch.object( + Catalogue, + "check_connection", + return_value=False, + ) + + with pytest.raises(ConnectionError, match="No internet connection available."): + Catalogue() diff --git a/tests/test_marcxml_parser.py b/tests/test_marcxml_parser.py new file mode 100644 index 0000000..5cb4e8d --- /dev/null +++ b/tests/test_marcxml_parser.py @@ -0,0 +1,493 @@ +"""Tests for MARCXML parsing functions in sru.py.""" + +import xml.etree.ElementTree as ET + +import pytest + +from bibapi.sru import ( + NS, + _smart_join_title, + _text, + controlfield_value, + datafield_value, + datafields_value, + find_datafields_with_subfields, + first_subfield_value, + first_subfield_value_from_fields, + iter_datafields, + parse_echoed_request, + parse_marc_record, + parse_record, + parse_search_retrieve_response, + subfield_values, + subfield_values_from_fields, +) +from bibapi.schemas.marcxml import ( + ControlField, + DataField, + MarcRecord, + SubField, +) + + +# --- Fixtures for sample XML data --- + + +@pytest.fixture +def minimal_marc_xml() -> str: + """Minimal MARC record XML string.""" + return """ + + 00000nam a22000001i 4500 + PPN12345 + 20230101120000.0 + + Test Title + A Subtitle + + """ + + +@pytest.fixture +def full_marc_xml() -> str: + """More complete MARC record for testing.""" + return """ + + 00000nam a22000001i 4500 + PPN98765 + 20231215150000.0 + 230101s2023 gw 000 0 ger d + + 9783123456789 + + + 9783987654321 + + + ger + eng + + + Comprehensive Test Book + With Many Details + by Author Name + + + 3rd edition + + + Berlin + Test Publisher + 2023 + + + 456 pages + + + Band + + + Author, First + + + Author, Second + + + Frei 129 + ABC 123 + DE-Frei129 + + """ + + +@pytest.fixture +def sru_response_xml() -> bytes: + """Complete SRU searchRetrieveResponse XML.""" + return b""" + + 1.1 + 2 + + + marcxml + xml + + + 00000nam a22 + PPN001 + + First Book + + + + 1 + + + marcxml + xml + + + 00000nam a22 + PPN002 + + Second Book + + + + 2 + + + + 1.1 + pica.tit=Test + 100 + xml + marcxml + + """ + + +@pytest.fixture +def sru_response_no_records() -> bytes: + """SRU response with zero records.""" + return b""" + + 1.1 + 0 + """ + + +# --- Tests for _text helper --- + + +class TestTextHelper: + def test_text_with_element_and_text(self): + elem = ET.fromstring("Hello") + assert _text(elem) == "Hello" + + def test_text_with_element_no_text(self): + elem = ET.fromstring("") + assert _text(elem) == "" + + def test_text_with_none(self): + assert _text(None) == "" + + def test_text_with_whitespace(self): + elem = ET.fromstring(" spaced ") + assert _text(elem) == " spaced " + + +# --- Tests for parse_marc_record --- + + +class TestParseMarcRecord: + def test_parse_minimal_record(self, minimal_marc_xml): + root = ET.fromstring(minimal_marc_xml) + record = parse_marc_record(root) + + assert record.leader == "00000nam a22000001i 4500" + assert len(record.controlfields) == 2 + assert record.controlfields[0].tag == "001" + assert record.controlfields[0].value == "PPN12345" + + def test_parse_datafields(self, minimal_marc_xml): + root = ET.fromstring(minimal_marc_xml) + record = parse_marc_record(root) + + assert len(record.datafields) == 1 + df = record.datafields[0] + assert df.tag == "245" + assert df.ind1 == "1" + assert df.ind2 == "0" + assert len(df.subfields) == 2 + assert df.subfields[0].code == "a" + assert df.subfields[0].value == "Test Title" + + def test_parse_full_record(self, full_marc_xml): + root = ET.fromstring(full_marc_xml) + record = parse_marc_record(root) + + assert len(record.controlfields) == 3 + # Check multiple datafields + tags = [df.tag for df in record.datafields] + assert "020" in tags + assert "245" in tags + assert "700" in tags + assert "924" in tags + + def test_parse_multiple_subfields_same_code(self, full_marc_xml): + root = ET.fromstring(full_marc_xml) + record = parse_marc_record(root) + + # Find 041 field with multiple $a subfields + df_041 = next(df for df in record.datafields if df.tag == "041") + a_values = [sf.value for sf in df_041.subfields if sf.code == "a"] + assert a_values == ["ger", "eng"] + + +# --- Tests for parse_search_retrieve_response --- + + +class TestParseSearchRetrieveResponse: + def test_parse_response_with_records(self, sru_response_xml): + response = parse_search_retrieve_response(sru_response_xml) + + assert response.version == "1.1" + assert response.numberOfRecords == 2 + assert len(response.records) == 2 + + def test_parse_response_record_details(self, sru_response_xml): + response = parse_search_retrieve_response(sru_response_xml) + + rec1 = response.records[0] + assert rec1.recordSchema == "marcxml" + assert rec1.recordPacking == "xml" + assert rec1.recordPosition == 1 + assert controlfield_value(rec1.recordData, "001") == "PPN001" + + def test_parse_response_no_records(self, sru_response_no_records): + response = parse_search_retrieve_response(sru_response_no_records) + + assert response.version == "1.1" + assert response.numberOfRecords == 0 + assert len(response.records) == 0 + + def test_parse_echoed_request(self, sru_response_xml): + response = parse_search_retrieve_response(sru_response_xml) + + echoed = response.echoedSearchRetrieveRequest + assert echoed is not None + assert echoed.version == "1.1" + assert echoed.query == "pica.tit=Test" + assert echoed.maximumRecords == 100 + assert echoed.recordSchema == "marcxml" + + def test_parse_response_as_string(self, sru_response_xml): + # Should also work with string input + response = parse_search_retrieve_response(sru_response_xml.decode("utf-8")) + assert response.numberOfRecords == 2 + + +# --- Tests for query helper functions --- + + +class TestIterDatafields: + def test_iter_all_datafields(self, full_marc_xml): + root = ET.fromstring(full_marc_xml) + record = parse_marc_record(root) + + all_fields = list(iter_datafields(record)) + assert len(all_fields) == len(record.datafields) + + def test_iter_datafields_by_tag(self, full_marc_xml): + root = ET.fromstring(full_marc_xml) + record = parse_marc_record(root) + + fields_020 = list(iter_datafields(record, tag="020")) + assert len(fields_020) == 2 # Two ISBN fields + + def test_iter_datafields_by_indicator(self, full_marc_xml): + root = ET.fromstring(full_marc_xml) + record = parse_marc_record(root) + + fields = list(iter_datafields(record, tag="264", ind2="1")) + assert len(fields) == 1 + + +class TestSubfieldValues: + def test_subfield_values_single(self, full_marc_xml): + root = ET.fromstring(full_marc_xml) + record = parse_marc_record(root) + + values = subfield_values(record, "245", "a") + assert values == ["Comprehensive Test Book"] + + def test_subfield_values_multiple(self, full_marc_xml): + root = ET.fromstring(full_marc_xml) + record = parse_marc_record(root) + + # Multiple ISBN values + values = subfield_values(record, "020", "a") + assert len(values) == 2 + assert "9783123456789" in values + assert "9783987654321" in values + + def test_subfield_values_empty(self, full_marc_xml): + root = ET.fromstring(full_marc_xml) + record = parse_marc_record(root) + + values = subfield_values(record, "999", "x") + assert values == [] + + +class TestFirstSubfieldValue: + def test_first_subfield_value_found(self, full_marc_xml): + root = ET.fromstring(full_marc_xml) + record = parse_marc_record(root) + + value = first_subfield_value(record, "245", "a") + assert value == "Comprehensive Test Book" + + def test_first_subfield_value_not_found(self, full_marc_xml): + root = ET.fromstring(full_marc_xml) + record = parse_marc_record(root) + + value = first_subfield_value(record, "999", "x") + assert value is None + + def test_first_subfield_value_with_default(self, full_marc_xml): + root = ET.fromstring(full_marc_xml) + record = parse_marc_record(root) + + value = first_subfield_value(record, "999", "x", default="N/A") + assert value == "N/A" + + def test_first_subfield_value_with_indicator(self, full_marc_xml): + root = ET.fromstring(full_marc_xml) + record = parse_marc_record(root) + + value = first_subfield_value(record, "264", "c", ind2="1") + assert value == "2023" + + +class TestControlFieldValue: + def test_controlfield_value_found(self, full_marc_xml): + root = ET.fromstring(full_marc_xml) + record = parse_marc_record(root) + + value = controlfield_value(record, "001") + assert value == "PPN98765" + + def test_controlfield_value_not_found(self, full_marc_xml): + root = ET.fromstring(full_marc_xml) + record = parse_marc_record(root) + + value = controlfield_value(record, "999") + assert value is None + + def test_controlfield_value_with_default(self, full_marc_xml): + root = ET.fromstring(full_marc_xml) + record = parse_marc_record(root) + + value = controlfield_value(record, "999", default="unknown") + assert value == "unknown" + + +class TestFindDatafieldsWithSubfields: + def test_find_with_where_all(self, full_marc_xml): + root = ET.fromstring(full_marc_xml) + record = parse_marc_record(root) + + fields = find_datafields_with_subfields( + record, + "924", + where_all={"9": "Frei 129"}, + ) + assert len(fields) == 1 + assert fields[0].tag == "924" + + def test_find_with_where_all_not_found(self, full_marc_xml): + root = ET.fromstring(full_marc_xml) + record = parse_marc_record(root) + + fields = find_datafields_with_subfields( + record, + "924", + where_all={"9": "NonExistent"}, + ) + assert len(fields) == 0 + + def test_find_with_casefold(self, full_marc_xml): + root = ET.fromstring(full_marc_xml) + record = parse_marc_record(root) + + fields = find_datafields_with_subfields( + record, + "924", + where_all={"9": "frei 129"}, # lowercase + casefold=True, + ) + assert len(fields) == 1 + + +class TestDatafieldValue: + def test_datafield_value_found(self): + df = DataField( + tag="245", + subfields=[ + SubField(code="a", value="Title"), + SubField(code="b", value="Subtitle"), + ], + ) + assert datafield_value(df, "a") == "Title" + assert datafield_value(df, "b") == "Subtitle" + + def test_datafield_value_not_found(self): + df = DataField(tag="245", subfields=[SubField(code="a", value="Title")]) + assert datafield_value(df, "z") is None + + def test_datafield_value_with_default(self): + df = DataField(tag="245", subfields=[]) + assert datafield_value(df, "a", default="N/A") == "N/A" + + +class TestDatafieldsValue: + def test_datafields_value_found(self): + fields = [ + DataField(tag="700", subfields=[SubField(code="a", value="Author One")]), + DataField(tag="700", subfields=[SubField(code="a", value="Author Two")]), + ] + assert datafields_value(fields, "a") == "Author One" + + def test_datafields_value_empty_list(self): + assert datafields_value([], "a") is None + + +class TestSubfieldValuesFromFields: + def test_values_from_multiple_fields(self): + fields = [ + DataField(tag="700", subfields=[SubField(code="a", value="Author One")]), + DataField(tag="700", subfields=[SubField(code="a", value="Author Two")]), + ] + values = subfield_values_from_fields(fields, "a") + assert values == ["Author One", "Author Two"] + + +class TestFirstSubfieldValueFromFields: + def test_first_value_from_fields(self): + fields = [ + DataField(tag="700", subfields=[SubField(code="a", value="First")]), + DataField(tag="700", subfields=[SubField(code="a", value="Second")]), + ] + assert first_subfield_value_from_fields(fields, "a") == "First" + + +# --- Tests for _smart_join_title --- + + +class TestSmartJoinTitle: + def test_join_with_subtitle(self): + result = _smart_join_title("Main Title", "Subtitle") + assert result == "Main Title : Subtitle" + + def test_join_without_subtitle(self): + result = _smart_join_title("Main Title", None) + assert result == "Main Title" + + def test_join_with_empty_subtitle(self): + result = _smart_join_title("Main Title", "") + assert result == "Main Title" + + def test_join_with_existing_colon(self): + result = _smart_join_title("Main Title:", "Subtitle") + assert result == "Main Title: Subtitle" + + def test_join_with_existing_semicolon(self): + result = _smart_join_title("Main Title;", "More") + assert result == "Main Title; More" + + def test_join_strips_whitespace(self): + result = _smart_join_title(" Main Title ", " Subtitle ") + assert result == "Main Title : Subtitle" + diff --git a/tests/test_schemas.py b/tests/test_schemas.py new file mode 100644 index 0000000..007370a --- /dev/null +++ b/tests/test_schemas.py @@ -0,0 +1,37 @@ +from src.bibapi.schemas.api_types import ALMASchema, DublinCoreSchema, PicaSchema +from src.bibapi.sru import QueryTransformer + +arguments = [ + "TITLE=Java ist auch eine Insel", + "AUTHOR=Ullenboom, Christian", + "YEAR=2020", + "PPN=1693321114", +] + + +def test_pica_schema(): + transformer = QueryTransformer(PicaSchema, arguments) + transformed = transformer.transform() + assert len(transformed) == 4 + assert transformed[0].startswith(PicaSchema.TITLE.value) + assert transformed[1].startswith(PicaSchema.AUTHOR.value) + assert transformed[2].startswith(PicaSchema.YEAR.value) + assert transformed[3].startswith(PicaSchema.PPN.value) + + +def test_alma_schema(): + transformer = QueryTransformer(ALMASchema, arguments) + transformed = transformer.transform() + assert len(transformed) == 3 # PPN is not supported + assert transformed[0].startswith(ALMASchema.TITLE.value) + assert transformed[1].startswith(ALMASchema.AUTHOR.value) + assert transformed[2].startswith(ALMASchema.YEAR.value) + + +def test_dublin_core_schema(): + transformer = QueryTransformer(DublinCoreSchema, arguments) + transformed = transformer.transform() + assert len(transformed) == 3 # YEAR is supported, PPN is not + assert transformed[0].startswith(DublinCoreSchema.TITLE.value) + assert transformed[1].startswith(DublinCoreSchema.AUTHOR.value) + assert transformed[2].startswith(DublinCoreSchema.YEAR.value)