chore: move dependencies, add more tests

This commit is contained in:
2025-12-05 11:21:41 +01:00
parent 8455322af4
commit 30e4cded8f
10 changed files with 608 additions and 6 deletions

7
.coveragerc Normal file
View File

@@ -0,0 +1,7 @@
[run]
branch = True
omit = */build/*,tests/*,main.py
[report]
exclude_lines =
pragma: no cover
raise NotImplementedError.*

View File

@@ -5,7 +5,10 @@ description = "Add your description here"
readme = "README.md"
authors = [{ name = "WorldTeacher", email = "coding_contact@pm.me" }]
requires-python = ">=3.13"
dependencies = ["regex>=2025.9.18"]
dependencies = [
"regex>=2025.9.18",
"requests>=2.32.5",
]
[project.optional-dependencies]
# SRU API feature: for accessing library catalogs via SRU protocol
@@ -46,8 +49,9 @@ pre_commit_hooks = []
post_commit_hooks = []
[dependency-groups]
dev = ["pylint>=4.0.3"]
test = [
dev = [
"pylint>=4.0.3",
"pytest-mock>=3.15.1",
"types-pysocks>=1.7.1.20251001",
"types-regex>=2025.9.18.20250921",
"types-requests>=2.32.4.20250913",

View File

@@ -1,4 +1,5 @@
from .schemas.api_types import (
ALMASchema,
DNBSchema,
HBZSchema,
HebisSchema,
@@ -89,5 +90,3 @@ class HBZ(_Api):
self.prefix = HBZSchema.ARGSCHEMA.value
self.library_identifier = HBZSchema.LIBRARY_NAME_LOCATION_FIELD.value
super().__init__(self.site, self.url, self.prefix, self.library_identifier)

View File

@@ -327,3 +327,7 @@ class Catalogue:
if link is None:
return None
return link.library_location
def check_book_exists(self, searchterm: str) -> bool:
links = self.get_book_links(searchterm)
return len(links) > 0

View File

@@ -0,0 +1 @@
"""Schemas for the provided APIs."""

View File

@@ -0,0 +1,10 @@
class BibAPIError(Exception):
"""Base class for all BibAPI errors."""
class CatalogueError(BibAPIError):
"""Raised when there is an error with the library catalogue API."""
class NetworkError(BibAPIError):
"""Raised when there is a network-related error."""

View File

@@ -1,5 +1,4 @@
from dataclasses import dataclass, field
from typing import list
# --- MARC XML structures ---

48
tests/test_catalogue.py Normal file
View File

@@ -0,0 +1,48 @@
"""Tests for the Catalogue class, which interacts with the library catalogue."""
import pytest
from pytest_mock import MockerFixture
from src.bibapi.catalogue import Catalogue
class TestCatalogue:
"""Tests for the Catalogue class."""
def test_check_book_exists(self, mocker: MockerFixture):
"""Test the check_book_exists method of the Catalogue class."""
catalogue = Catalogue()
# Mock the get_book_links method to control its output
mocker.patch.object(
catalogue,
"get_book_links",
return_value=["link1", "link2"],
)
# Test with a known existing book
existing_book_searchterm = "1693321114"
assert catalogue.check_book_exists(existing_book_searchterm) is True
# Change the mock to return an empty list for non-existing book
mocker.patch.object(
catalogue,
"get_book_links",
return_value=[],
)
# Test with a known non-existing book
non_existing_book_searchterm = "00000000009"
assert catalogue.check_book_exists(non_existing_book_searchterm) is False
def test_no_connection_raises_error(self, mocker: MockerFixture):
"""Test that a ConnectionError is raised when there is no internet connection."""
# Mock the check_connection method to simulate no internet connection
mocker.patch.object(
Catalogue,
"check_connection",
return_value=False,
)
with pytest.raises(ConnectionError, match="No internet connection available."):
Catalogue()

View File

@@ -0,0 +1,493 @@
"""Tests for MARCXML parsing functions in sru.py."""
import xml.etree.ElementTree as ET
import pytest
from bibapi.sru import (
NS,
_smart_join_title,
_text,
controlfield_value,
datafield_value,
datafields_value,
find_datafields_with_subfields,
first_subfield_value,
first_subfield_value_from_fields,
iter_datafields,
parse_echoed_request,
parse_marc_record,
parse_record,
parse_search_retrieve_response,
subfield_values,
subfield_values_from_fields,
)
from bibapi.schemas.marcxml import (
ControlField,
DataField,
MarcRecord,
SubField,
)
# --- Fixtures for sample XML data ---
@pytest.fixture
def minimal_marc_xml() -> str:
"""Minimal MARC record XML string."""
return """<?xml version="1.0" encoding="UTF-8"?>
<marc:record xmlns:marc="http://www.loc.gov/MARC21/slim">
<marc:leader>00000nam a22000001i 4500</marc:leader>
<marc:controlfield tag="001">PPN12345</marc:controlfield>
<marc:controlfield tag="005">20230101120000.0</marc:controlfield>
<marc:datafield tag="245" ind1="1" ind2="0">
<marc:subfield code="a">Test Title</marc:subfield>
<marc:subfield code="b">A Subtitle</marc:subfield>
</marc:datafield>
</marc:record>"""
@pytest.fixture
def full_marc_xml() -> str:
"""More complete MARC record for testing."""
return """<?xml version="1.0" encoding="UTF-8"?>
<marc:record xmlns:marc="http://www.loc.gov/MARC21/slim">
<marc:leader>00000nam a22000001i 4500</marc:leader>
<marc:controlfield tag="001">PPN98765</marc:controlfield>
<marc:controlfield tag="005">20231215150000.0</marc:controlfield>
<marc:controlfield tag="008">230101s2023 gw 000 0 ger d</marc:controlfield>
<marc:datafield tag="020" ind1=" " ind2=" ">
<marc:subfield code="a">9783123456789</marc:subfield>
</marc:datafield>
<marc:datafield tag="020" ind1=" " ind2=" ">
<marc:subfield code="a">9783987654321</marc:subfield>
</marc:datafield>
<marc:datafield tag="041" ind1=" " ind2=" ">
<marc:subfield code="a">ger</marc:subfield>
<marc:subfield code="a">eng</marc:subfield>
</marc:datafield>
<marc:datafield tag="245" ind1="1" ind2="0">
<marc:subfield code="a">Comprehensive Test Book</marc:subfield>
<marc:subfield code="b">With Many Details</marc:subfield>
<marc:subfield code="c">by Author Name</marc:subfield>
</marc:datafield>
<marc:datafield tag="250" ind1=" " ind2=" ">
<marc:subfield code="a">3rd edition</marc:subfield>
</marc:datafield>
<marc:datafield tag="264" ind1=" " ind2="1">
<marc:subfield code="a">Berlin</marc:subfield>
<marc:subfield code="b">Test Publisher</marc:subfield>
<marc:subfield code="c">2023</marc:subfield>
</marc:datafield>
<marc:datafield tag="300" ind1=" " ind2=" ">
<marc:subfield code="a">456 pages</marc:subfield>
</marc:datafield>
<marc:datafield tag="338" ind1=" " ind2=" ">
<marc:subfield code="a">Band</marc:subfield>
</marc:datafield>
<marc:datafield tag="700" ind1="1" ind2=" ">
<marc:subfield code="a">Author, First</marc:subfield>
</marc:datafield>
<marc:datafield tag="700" ind1="1" ind2=" ">
<marc:subfield code="a">Author, Second</marc:subfield>
</marc:datafield>
<marc:datafield tag="924" ind1=" " ind2=" ">
<marc:subfield code="9">Frei 129</marc:subfield>
<marc:subfield code="g">ABC 123</marc:subfield>
<marc:subfield code="b">DE-Frei129</marc:subfield>
</marc:datafield>
</marc:record>"""
@pytest.fixture
def sru_response_xml() -> bytes:
"""Complete SRU searchRetrieveResponse XML."""
return b"""<?xml version="1.0" encoding="UTF-8"?>
<zs:searchRetrieveResponse xmlns:zs="http://www.loc.gov/zing/srw/"
xmlns:marc="http://www.loc.gov/MARC21/slim">
<zs:version>1.1</zs:version>
<zs:numberOfRecords>2</zs:numberOfRecords>
<zs:records>
<zs:record>
<zs:recordSchema>marcxml</zs:recordSchema>
<zs:recordPacking>xml</zs:recordPacking>
<zs:recordData>
<marc:record>
<marc:leader>00000nam a22</marc:leader>
<marc:controlfield tag="001">PPN001</marc:controlfield>
<marc:datafield tag="245" ind1=" " ind2=" ">
<marc:subfield code="a">First Book</marc:subfield>
</marc:datafield>
</marc:record>
</zs:recordData>
<zs:recordPosition>1</zs:recordPosition>
</zs:record>
<zs:record>
<zs:recordSchema>marcxml</zs:recordSchema>
<zs:recordPacking>xml</zs:recordPacking>
<zs:recordData>
<marc:record>
<marc:leader>00000nam a22</marc:leader>
<marc:controlfield tag="001">PPN002</marc:controlfield>
<marc:datafield tag="245" ind1=" " ind2=" ">
<marc:subfield code="a">Second Book</marc:subfield>
</marc:datafield>
</marc:record>
</zs:recordData>
<zs:recordPosition>2</zs:recordPosition>
</zs:record>
</zs:records>
<zs:echoedSearchRetrieveRequest>
<zs:version>1.1</zs:version>
<zs:query>pica.tit=Test</zs:query>
<zs:maximumRecords>100</zs:maximumRecords>
<zs:recordPacking>xml</zs:recordPacking>
<zs:recordSchema>marcxml</zs:recordSchema>
</zs:echoedSearchRetrieveRequest>
</zs:searchRetrieveResponse>"""
@pytest.fixture
def sru_response_no_records() -> bytes:
"""SRU response with zero records."""
return b"""<?xml version="1.0" encoding="UTF-8"?>
<zs:searchRetrieveResponse xmlns:zs="http://www.loc.gov/zing/srw/">
<zs:version>1.1</zs:version>
<zs:numberOfRecords>0</zs:numberOfRecords>
</zs:searchRetrieveResponse>"""
# --- Tests for _text helper ---
class TestTextHelper:
def test_text_with_element_and_text(self):
elem = ET.fromstring("<tag>Hello</tag>")
assert _text(elem) == "Hello"
def test_text_with_element_no_text(self):
elem = ET.fromstring("<tag></tag>")
assert _text(elem) == ""
def test_text_with_none(self):
assert _text(None) == ""
def test_text_with_whitespace(self):
elem = ET.fromstring("<tag> spaced </tag>")
assert _text(elem) == " spaced "
# --- Tests for parse_marc_record ---
class TestParseMarcRecord:
def test_parse_minimal_record(self, minimal_marc_xml):
root = ET.fromstring(minimal_marc_xml)
record = parse_marc_record(root)
assert record.leader == "00000nam a22000001i 4500"
assert len(record.controlfields) == 2
assert record.controlfields[0].tag == "001"
assert record.controlfields[0].value == "PPN12345"
def test_parse_datafields(self, minimal_marc_xml):
root = ET.fromstring(minimal_marc_xml)
record = parse_marc_record(root)
assert len(record.datafields) == 1
df = record.datafields[0]
assert df.tag == "245"
assert df.ind1 == "1"
assert df.ind2 == "0"
assert len(df.subfields) == 2
assert df.subfields[0].code == "a"
assert df.subfields[0].value == "Test Title"
def test_parse_full_record(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
assert len(record.controlfields) == 3
# Check multiple datafields
tags = [df.tag for df in record.datafields]
assert "020" in tags
assert "245" in tags
assert "700" in tags
assert "924" in tags
def test_parse_multiple_subfields_same_code(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
# Find 041 field with multiple $a subfields
df_041 = next(df for df in record.datafields if df.tag == "041")
a_values = [sf.value for sf in df_041.subfields if sf.code == "a"]
assert a_values == ["ger", "eng"]
# --- Tests for parse_search_retrieve_response ---
class TestParseSearchRetrieveResponse:
def test_parse_response_with_records(self, sru_response_xml):
response = parse_search_retrieve_response(sru_response_xml)
assert response.version == "1.1"
assert response.numberOfRecords == 2
assert len(response.records) == 2
def test_parse_response_record_details(self, sru_response_xml):
response = parse_search_retrieve_response(sru_response_xml)
rec1 = response.records[0]
assert rec1.recordSchema == "marcxml"
assert rec1.recordPacking == "xml"
assert rec1.recordPosition == 1
assert controlfield_value(rec1.recordData, "001") == "PPN001"
def test_parse_response_no_records(self, sru_response_no_records):
response = parse_search_retrieve_response(sru_response_no_records)
assert response.version == "1.1"
assert response.numberOfRecords == 0
assert len(response.records) == 0
def test_parse_echoed_request(self, sru_response_xml):
response = parse_search_retrieve_response(sru_response_xml)
echoed = response.echoedSearchRetrieveRequest
assert echoed is not None
assert echoed.version == "1.1"
assert echoed.query == "pica.tit=Test"
assert echoed.maximumRecords == 100
assert echoed.recordSchema == "marcxml"
def test_parse_response_as_string(self, sru_response_xml):
# Should also work with string input
response = parse_search_retrieve_response(sru_response_xml.decode("utf-8"))
assert response.numberOfRecords == 2
# --- Tests for query helper functions ---
class TestIterDatafields:
def test_iter_all_datafields(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
all_fields = list(iter_datafields(record))
assert len(all_fields) == len(record.datafields)
def test_iter_datafields_by_tag(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
fields_020 = list(iter_datafields(record, tag="020"))
assert len(fields_020) == 2 # Two ISBN fields
def test_iter_datafields_by_indicator(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
fields = list(iter_datafields(record, tag="264", ind2="1"))
assert len(fields) == 1
class TestSubfieldValues:
def test_subfield_values_single(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
values = subfield_values(record, "245", "a")
assert values == ["Comprehensive Test Book"]
def test_subfield_values_multiple(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
# Multiple ISBN values
values = subfield_values(record, "020", "a")
assert len(values) == 2
assert "9783123456789" in values
assert "9783987654321" in values
def test_subfield_values_empty(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
values = subfield_values(record, "999", "x")
assert values == []
class TestFirstSubfieldValue:
def test_first_subfield_value_found(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
value = first_subfield_value(record, "245", "a")
assert value == "Comprehensive Test Book"
def test_first_subfield_value_not_found(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
value = first_subfield_value(record, "999", "x")
assert value is None
def test_first_subfield_value_with_default(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
value = first_subfield_value(record, "999", "x", default="N/A")
assert value == "N/A"
def test_first_subfield_value_with_indicator(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
value = first_subfield_value(record, "264", "c", ind2="1")
assert value == "2023"
class TestControlFieldValue:
def test_controlfield_value_found(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
value = controlfield_value(record, "001")
assert value == "PPN98765"
def test_controlfield_value_not_found(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
value = controlfield_value(record, "999")
assert value is None
def test_controlfield_value_with_default(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
value = controlfield_value(record, "999", default="unknown")
assert value == "unknown"
class TestFindDatafieldsWithSubfields:
def test_find_with_where_all(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
fields = find_datafields_with_subfields(
record,
"924",
where_all={"9": "Frei 129"},
)
assert len(fields) == 1
assert fields[0].tag == "924"
def test_find_with_where_all_not_found(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
fields = find_datafields_with_subfields(
record,
"924",
where_all={"9": "NonExistent"},
)
assert len(fields) == 0
def test_find_with_casefold(self, full_marc_xml):
root = ET.fromstring(full_marc_xml)
record = parse_marc_record(root)
fields = find_datafields_with_subfields(
record,
"924",
where_all={"9": "frei 129"}, # lowercase
casefold=True,
)
assert len(fields) == 1
class TestDatafieldValue:
def test_datafield_value_found(self):
df = DataField(
tag="245",
subfields=[
SubField(code="a", value="Title"),
SubField(code="b", value="Subtitle"),
],
)
assert datafield_value(df, "a") == "Title"
assert datafield_value(df, "b") == "Subtitle"
def test_datafield_value_not_found(self):
df = DataField(tag="245", subfields=[SubField(code="a", value="Title")])
assert datafield_value(df, "z") is None
def test_datafield_value_with_default(self):
df = DataField(tag="245", subfields=[])
assert datafield_value(df, "a", default="N/A") == "N/A"
class TestDatafieldsValue:
def test_datafields_value_found(self):
fields = [
DataField(tag="700", subfields=[SubField(code="a", value="Author One")]),
DataField(tag="700", subfields=[SubField(code="a", value="Author Two")]),
]
assert datafields_value(fields, "a") == "Author One"
def test_datafields_value_empty_list(self):
assert datafields_value([], "a") is None
class TestSubfieldValuesFromFields:
def test_values_from_multiple_fields(self):
fields = [
DataField(tag="700", subfields=[SubField(code="a", value="Author One")]),
DataField(tag="700", subfields=[SubField(code="a", value="Author Two")]),
]
values = subfield_values_from_fields(fields, "a")
assert values == ["Author One", "Author Two"]
class TestFirstSubfieldValueFromFields:
def test_first_value_from_fields(self):
fields = [
DataField(tag="700", subfields=[SubField(code="a", value="First")]),
DataField(tag="700", subfields=[SubField(code="a", value="Second")]),
]
assert first_subfield_value_from_fields(fields, "a") == "First"
# --- Tests for _smart_join_title ---
class TestSmartJoinTitle:
def test_join_with_subtitle(self):
result = _smart_join_title("Main Title", "Subtitle")
assert result == "Main Title : Subtitle"
def test_join_without_subtitle(self):
result = _smart_join_title("Main Title", None)
assert result == "Main Title"
def test_join_with_empty_subtitle(self):
result = _smart_join_title("Main Title", "")
assert result == "Main Title"
def test_join_with_existing_colon(self):
result = _smart_join_title("Main Title:", "Subtitle")
assert result == "Main Title: Subtitle"
def test_join_with_existing_semicolon(self):
result = _smart_join_title("Main Title;", "More")
assert result == "Main Title; More"
def test_join_strips_whitespace(self):
result = _smart_join_title(" Main Title ", " Subtitle ")
assert result == "Main Title : Subtitle"

37
tests/test_schemas.py Normal file
View File

@@ -0,0 +1,37 @@
from src.bibapi.schemas.api_types import ALMASchema, DublinCoreSchema, PicaSchema
from src.bibapi.sru import QueryTransformer
arguments = [
"TITLE=Java ist auch eine Insel",
"AUTHOR=Ullenboom, Christian",
"YEAR=2020",
"PPN=1693321114",
]
def test_pica_schema():
transformer = QueryTransformer(PicaSchema, arguments)
transformed = transformer.transform()
assert len(transformed) == 4
assert transformed[0].startswith(PicaSchema.TITLE.value)
assert transformed[1].startswith(PicaSchema.AUTHOR.value)
assert transformed[2].startswith(PicaSchema.YEAR.value)
assert transformed[3].startswith(PicaSchema.PPN.value)
def test_alma_schema():
transformer = QueryTransformer(ALMASchema, arguments)
transformed = transformer.transform()
assert len(transformed) == 3 # PPN is not supported
assert transformed[0].startswith(ALMASchema.TITLE.value)
assert transformed[1].startswith(ALMASchema.AUTHOR.value)
assert transformed[2].startswith(ALMASchema.YEAR.value)
def test_dublin_core_schema():
transformer = QueryTransformer(DublinCoreSchema, arguments)
transformed = transformer.transform()
assert len(transformed) == 3 # YEAR is supported, PPN is not
assert transformed[0].startswith(DublinCoreSchema.TITLE.value)
assert transformed[1].startswith(DublinCoreSchema.AUTHOR.value)
assert transformed[2].startswith(DublinCoreSchema.YEAR.value)