new features #13
7
.coveragerc
Normal file
7
.coveragerc
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
[run]
|
||||||
|
branch = True
|
||||||
|
omit = */build/*,tests/*,main.py
|
||||||
|
[report]
|
||||||
|
exclude_lines =
|
||||||
|
pragma: no cover
|
||||||
|
raise NotImplementedError.*
|
||||||
@@ -2,18 +2,18 @@ on:
|
|||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
inputs:
|
||||||
github_release:
|
github_release:
|
||||||
description: 'Create Gitea Release'
|
description: "Create Gitea Release"
|
||||||
default: true
|
default: true
|
||||||
type: boolean
|
type: boolean
|
||||||
bump:
|
bump:
|
||||||
description: 'Bump type'
|
description: "Bump type"
|
||||||
required: false
|
required: false
|
||||||
default: 'patch'
|
default: "patch"
|
||||||
type: choice
|
type: choice
|
||||||
options:
|
options:
|
||||||
- 'major'
|
- "major"
|
||||||
- 'minor'
|
- "minor"
|
||||||
- 'patch'
|
- "patch"
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -26,7 +26,7 @@ jobs:
|
|||||||
fetch-tags: true # Fetch all tags (refs/tags)
|
fetch-tags: true # Fetch all tags (refs/tags)
|
||||||
|
|
||||||
- name: Install uv
|
- name: Install uv
|
||||||
uses: astral-sh/setup-uv@v5
|
uses: astral-sh/setup-uv@v7
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
run: uv python install
|
run: uv python install
|
||||||
with:
|
with:
|
||||||
@@ -66,7 +66,6 @@ jobs:
|
|||||||
USERNAME: ${{ github.repository_owner }}
|
USERNAME: ${{ github.repository_owner }}
|
||||||
run: uv publish --publish-url https://git.theprivateserver.de/api/packages/$USERNAME/pypi/ -t ${{ secrets.TOKEN }}
|
run: uv publish --publish-url https://git.theprivateserver.de/api/packages/$USERNAME/pypi/ -t ${{ secrets.TOKEN }}
|
||||||
|
|
||||||
|
|
||||||
- name: Create release
|
- name: Create release
|
||||||
id: create_release
|
id: create_release
|
||||||
if: ${{ github.event.inputs.github_release == 'true' }}
|
if: ${{ github.event.inputs.github_release == 'true' }}
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ jobs:
|
|||||||
uses: actions/checkout@master
|
uses: actions/checkout@master
|
||||||
|
|
||||||
- name: Install uv
|
- name: Install uv
|
||||||
uses: astral-sh/setup-uv@v5
|
uses: astral-sh/setup-uv@v7
|
||||||
with:
|
with:
|
||||||
python-version-file: "pyproject.toml"
|
python-version-file: "pyproject.toml"
|
||||||
|
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ jobs:
|
|||||||
uses: actions/checkout@master
|
uses: actions/checkout@master
|
||||||
|
|
||||||
- name: Install uv
|
- name: Install uv
|
||||||
uses: astral-sh/setup-uv@v5
|
uses: astral-sh/setup-uv@v7
|
||||||
with:
|
with:
|
||||||
python-version-file: "pyproject.toml"
|
python-version-file: "pyproject.toml"
|
||||||
|
|
||||||
|
|||||||
7
.pre-commit-config.yaml
Normal file
7
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
repos:
|
||||||
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||||
|
rev: v0.6.9 # latest ruff version
|
||||||
|
hooks:
|
||||||
|
- id: ruff
|
||||||
|
args: ["--fix"]
|
||||||
|
- id: ruff-format
|
||||||
@@ -3,30 +3,24 @@ name = "bibapi"
|
|||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
description = "Add your description here"
|
description = "Add your description here"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
authors = [
|
authors = [{ name = "WorldTeacher", email = "coding_contact@pm.me" }]
|
||||||
{ name = "WorldTeacher", email = "coding_contact@pm.me" }
|
|
||||||
]
|
|
||||||
requires-python = ">=3.13"
|
requires-python = ">=3.13"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"regex>=2025.9.18",
|
"regex>=2025.9.18",
|
||||||
|
"requests>=2.32.5",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
# SRU API feature: for accessing library catalogs via SRU protocol
|
# SRU API feature: for accessing library catalogs via SRU protocol
|
||||||
sru = [
|
sru = ["requests>=2.32.5"]
|
||||||
"requests>=2.32.5",
|
|
||||||
]
|
|
||||||
|
|
||||||
# Catalogue feature: web scraping local library catalog
|
# Catalogue feature: web scraping local library catalog
|
||||||
catalogue = [
|
catalogue = ["requests>=2.32.5", "beautifulsoup4>=4.12.0"]
|
||||||
"requests>=2.32.5",
|
|
||||||
"beautifulsoup4>=4.12.0",
|
webrequest = ["bibapi[catalogue]", "ratelimit>=2.2.0"]
|
||||||
]
|
|
||||||
|
|
||||||
# Install all features
|
# Install all features
|
||||||
all = [
|
all = ["bibapi[sru,catalogue]"]
|
||||||
"bibapi[sru,catalogue]",
|
|
||||||
]
|
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["uv_build >= 0.9.5, <0.10.0"]
|
requires = ["uv_build >= 0.9.5, <0.10.0"]
|
||||||
@@ -55,11 +49,25 @@ pre_commit_hooks = []
|
|||||||
post_commit_hooks = []
|
post_commit_hooks = []
|
||||||
|
|
||||||
[dependency-groups]
|
[dependency-groups]
|
||||||
test = [
|
dev = [
|
||||||
|
"pylint>=4.0.3",
|
||||||
|
"pytest-mock>=3.15.1",
|
||||||
"types-pysocks>=1.7.1.20251001",
|
"types-pysocks>=1.7.1.20251001",
|
||||||
"types-regex>=2025.9.18.20250921",
|
"types-regex>=2025.9.18.20250921",
|
||||||
"types-requests>=2.32.4.20250913",
|
"types-requests>=2.32.4.20250913",
|
||||||
"mypy>=1.18.2",
|
"mypy>=1.18.2",
|
||||||
"pytest>=8.4.2",
|
"pytest>=8.4.2",
|
||||||
"pytest-cov>=7.0.0",
|
"pytest-cov>=7.0.0",
|
||||||
|
"ratelimit>=2.2.0",
|
||||||
|
"beautifulsoup4>=4.12.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[tool.pytest.ini_options]
|
||||||
|
testpaths = ["tests"]
|
||||||
|
python_files = ["test_*.py"]
|
||||||
|
python_classes = ["Test*"]
|
||||||
|
python_functions = ["test_*"]
|
||||||
|
markers = [
|
||||||
|
"integration: marks tests as integration tests (deselect with '-m \"not integration\"')",
|
||||||
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -1,18 +1,26 @@
|
|||||||
from .schemas.api_types import *
|
from .schemas.api_types import (
|
||||||
|
ALMASchema,
|
||||||
|
DNBSchema,
|
||||||
|
HBZSchema,
|
||||||
|
HebisSchema,
|
||||||
|
KOBVSchema,
|
||||||
|
OEVKSchema,
|
||||||
|
SWBSchema,
|
||||||
|
)
|
||||||
from .sru import Api as _Api
|
from .sru import Api as _Api
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"SWB",
|
|
||||||
"DNB",
|
"DNB",
|
||||||
"KOBV",
|
|
||||||
"HEBIS",
|
|
||||||
"OEVK",
|
|
||||||
"HBZ",
|
"HBZ",
|
||||||
|
"HEBIS",
|
||||||
|
"KOBV",
|
||||||
|
"OEVK",
|
||||||
|
"SWB",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
class SWB(_Api):
|
class SWB(_Api):
|
||||||
def __init__(self):
|
def __init__(self) -> None:
|
||||||
self.site = SWBSchema.NAME.value
|
self.site = SWBSchema.NAME.value
|
||||||
self.url = SWBSchema.URL.value
|
self.url = SWBSchema.URL.value
|
||||||
self.prefix = SWBSchema.ARGSCHEMA.value
|
self.prefix = SWBSchema.ARGSCHEMA.value
|
||||||
@@ -21,7 +29,7 @@ class SWB(_Api):
|
|||||||
|
|
||||||
|
|
||||||
class DNB(_Api):
|
class DNB(_Api):
|
||||||
def __init__(self):
|
def __init__(self) -> None:
|
||||||
self.site = DNBSchema.NAME.value
|
self.site = DNBSchema.NAME.value
|
||||||
self.url = DNBSchema.URL.value
|
self.url = DNBSchema.URL.value
|
||||||
self.prefix = DNBSchema.ARGSCHEMA.value
|
self.prefix = DNBSchema.ARGSCHEMA.value
|
||||||
@@ -29,7 +37,7 @@ class DNB(_Api):
|
|||||||
|
|
||||||
|
|
||||||
class KOBV(_Api):
|
class KOBV(_Api):
|
||||||
def __init__(self):
|
def __init__(self) -> None:
|
||||||
self.site = KOBVSchema.NAME.value
|
self.site = KOBVSchema.NAME.value
|
||||||
self.url = KOBVSchema.URL.value
|
self.url = KOBVSchema.URL.value
|
||||||
self.prefix = KOBVSchema.ARGSCHEMA.value
|
self.prefix = KOBVSchema.ARGSCHEMA.value
|
||||||
@@ -38,7 +46,7 @@ class KOBV(_Api):
|
|||||||
|
|
||||||
|
|
||||||
class HEBIS(_Api):
|
class HEBIS(_Api):
|
||||||
def __init__(self):
|
def __init__(self) -> None:
|
||||||
self.site = HebisSchema.NAME.value
|
self.site = HebisSchema.NAME.value
|
||||||
self.url = HebisSchema.URL.value
|
self.url = HebisSchema.URL.value
|
||||||
self.prefix = HebisSchema.ARGSCHEMA.value
|
self.prefix = HebisSchema.ARGSCHEMA.value
|
||||||
@@ -56,7 +64,7 @@ class HEBIS(_Api):
|
|||||||
|
|
||||||
|
|
||||||
class OEVK(_Api):
|
class OEVK(_Api):
|
||||||
def __init__(self):
|
def __init__(self) -> None:
|
||||||
self.site = OEVKSchema.NAME.value
|
self.site = OEVKSchema.NAME.value
|
||||||
self.url = OEVKSchema.URL.value
|
self.url = OEVKSchema.URL.value
|
||||||
self.prefix = OEVKSchema.ARGSCHEMA.value
|
self.prefix = OEVKSchema.ARGSCHEMA.value
|
||||||
@@ -65,18 +73,18 @@ class OEVK(_Api):
|
|||||||
|
|
||||||
|
|
||||||
class HBZ(_Api):
|
class HBZ(_Api):
|
||||||
"""
|
"""Small wrapper of the SRU API used to retrieve data from the HBZ libraries.
|
||||||
Small wrapper of the SRU API used to retrieve data from the HBZ libraries
|
|
||||||
|
|
||||||
All fields are available [here](https://eu04.alma.exlibrisgroup.com/view/sru/49HBZ_NETWORK?version=1.2)
|
All fields are available [here](https://eu04.alma.exlibrisgroup.com/view/sru/49HBZ_NETWORK?version=1.2)
|
||||||
|
|
||||||
Schema
|
Schema
|
||||||
------
|
------
|
||||||
HBZSchema: <HBZSchema>
|
HBZSchema: "HBZSchema"
|
||||||
|
|
||||||
query prefix: alma.
|
query prefix: alma.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self) -> None:
|
||||||
self.site = HBZSchema.NAME.value
|
self.site = HBZSchema.NAME.value
|
||||||
self.url = HBZSchema.URL.value
|
self.url = HBZSchema.URL.value
|
||||||
self.prefix = HBZSchema.ARGSCHEMA.value
|
self.prefix = HBZSchema.ARGSCHEMA.value
|
||||||
|
|||||||
502
src/bibapi/_transformers.py
Normal file
502
src/bibapi/_transformers.py
Normal file
@@ -0,0 +1,502 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from dataclasses import field as dataclass_field
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from src.bibapi.schemas.bookdata import BookData
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Item:
|
||||||
|
superlocation: str | None = dataclass_field(default_factory=str)
|
||||||
|
status: str | None = dataclass_field(default_factory=str)
|
||||||
|
availability: str | None = dataclass_field(default_factory=str)
|
||||||
|
notes: str | None = dataclass_field(default_factory=str)
|
||||||
|
limitation: str | None = dataclass_field(default_factory=str)
|
||||||
|
duedate: str | None = dataclass_field(default_factory=str)
|
||||||
|
id: str | None = dataclass_field(default_factory=str)
|
||||||
|
item_id: str | None = dataclass_field(default_factory=str)
|
||||||
|
ilslink: str | None = dataclass_field(default_factory=str)
|
||||||
|
number: int | None = dataclass_field(default_factory=int)
|
||||||
|
barcode: str | None = dataclass_field(default_factory=str)
|
||||||
|
reserve: str | None = dataclass_field(default_factory=str)
|
||||||
|
callnumber: str | None = dataclass_field(default_factory=str)
|
||||||
|
department: str | None = dataclass_field(default_factory=str)
|
||||||
|
locationhref: str | None = dataclass_field(default_factory=str)
|
||||||
|
location: str | None = dataclass_field(default_factory=str)
|
||||||
|
ktrl_nr: str | None = dataclass_field(default_factory=str)
|
||||||
|
|
||||||
|
def from_dict(self, data: dict[str, Any]) -> Item:
|
||||||
|
"""Import data from dict."""
|
||||||
|
data = data["items"]
|
||||||
|
for entry in data:
|
||||||
|
for key, value in entry.items():
|
||||||
|
setattr(self, key, value)
|
||||||
|
return self
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RDS_AVAIL_DATA:
|
||||||
|
"""Class to store RDS availability data"""
|
||||||
|
|
||||||
|
library_sigil: str = dataclass_field(default_factory=str)
|
||||||
|
items: list[Item] = dataclass_field(default_factory=list)
|
||||||
|
|
||||||
|
def import_from_dict(self, data: str):
|
||||||
|
"""Import data from dict"""
|
||||||
|
edata = json.loads(data)
|
||||||
|
# library sigil is first key
|
||||||
|
|
||||||
|
self.library_sigil = str(list(edata.keys())[0])
|
||||||
|
# get data from first key
|
||||||
|
edata = edata[self.library_sigil]
|
||||||
|
for location in edata:
|
||||||
|
item = Item(superlocation=location).from_dict(edata[location])
|
||||||
|
|
||||||
|
self.items.append(item)
|
||||||
|
return self
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RDS_DATA:
|
||||||
|
"""Class to store RDS data"""
|
||||||
|
|
||||||
|
RDS_SIGNATURE: str = dataclass_field(default_factory=str)
|
||||||
|
RDS_STATUS: str = dataclass_field(default_factory=str)
|
||||||
|
RDS_LOCATION: str = dataclass_field(default_factory=str)
|
||||||
|
RDS_URL: Any = dataclass_field(default_factory=str)
|
||||||
|
RDS_HINT: Any = dataclass_field(default_factory=str)
|
||||||
|
RDS_COMMENT: Any = dataclass_field(default_factory=str)
|
||||||
|
RDS_HOLDING: Any = dataclass_field(default_factory=str)
|
||||||
|
RDS_HOLDING_LEAK: Any = dataclass_field(default_factory=str)
|
||||||
|
RDS_INTERN: Any = dataclass_field(default_factory=str)
|
||||||
|
RDS_PROVENIENCE: Any = dataclass_field(default_factory=str)
|
||||||
|
RDS_LOCAL_NOTATION: str = dataclass_field(default_factory=str)
|
||||||
|
RDS_LEA: Any = dataclass_field(default_factory=str)
|
||||||
|
|
||||||
|
def import_from_dict(self, data: dict) -> RDS_DATA:
|
||||||
|
"""Import data from dict"""
|
||||||
|
for key, value in data.items():
|
||||||
|
setattr(self, key, value)
|
||||||
|
return self
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RDS_GENERIC_DATA:
|
||||||
|
LibrarySigil: str = dataclass_field(default_factory=str)
|
||||||
|
RDS_DATA: list[RDS_DATA] = dataclass_field(default_factory=list)
|
||||||
|
|
||||||
|
def import_from_dict(self, data: str) -> RDS_GENERIC_DATA:
|
||||||
|
"""Import data from dict"""
|
||||||
|
edata = json.loads(data)
|
||||||
|
# library sigil is first key
|
||||||
|
self.LibrarySigil = str(list(edata.keys())[0])
|
||||||
|
# get data from first key
|
||||||
|
edata = edata[self.LibrarySigil]
|
||||||
|
for entry in edata:
|
||||||
|
rds_data = RDS_DATA() # Create a new RDS_DATA instance
|
||||||
|
# Populate the RDS_DATA instance from the entry
|
||||||
|
# This assumes that the entry is a dictionary that matches the structure of the RDS_DATA class
|
||||||
|
rds_data.import_from_dict(entry)
|
||||||
|
self.RDS_DATA.append(rds_data) # Add the RDS_DATA instance to the list
|
||||||
|
return self
|
||||||
|
|
||||||
|
|
||||||
|
class BaseStruct:
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
for key, value in kwargs.items():
|
||||||
|
setattr(self, key, value)
|
||||||
|
|
||||||
|
|
||||||
|
class ARRAYData:
|
||||||
|
def __init__(self, signature=None) -> None:
|
||||||
|
self.signature = None
|
||||||
|
|
||||||
|
def transform(self, data: str) -> BookData:
|
||||||
|
def _get_line(source: str, search: str) -> str:
|
||||||
|
try:
|
||||||
|
data = (
|
||||||
|
source.split(search)[1]
|
||||||
|
.split("\n")[0]
|
||||||
|
.strip()
|
||||||
|
.replace("=>", "")
|
||||||
|
.strip()
|
||||||
|
)
|
||||||
|
return data
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def _get_list_entry(source: str, search: str, entry: str) -> str:
|
||||||
|
try:
|
||||||
|
source = source.replace("\t", "").replace("\r", "")
|
||||||
|
source = source.split(search)[1].split(")")[0]
|
||||||
|
return _get_line(source, entry).replace("=>", "").strip()
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def _get_isbn(source: str) -> list:
|
||||||
|
try:
|
||||||
|
isbn = source.split("[isbn]")[1].split(")")[0].strip()
|
||||||
|
isbn = isbn.split("(")[1]
|
||||||
|
isbns = isbn.split("=>")
|
||||||
|
ret = []
|
||||||
|
for _ in isbns:
|
||||||
|
# remove _ from list
|
||||||
|
isb = _.split("\n")[0].strip()
|
||||||
|
if isb == "":
|
||||||
|
continue
|
||||||
|
ret.append(isb) if isb not in ret else None
|
||||||
|
return ret
|
||||||
|
except Exception:
|
||||||
|
isbn = []
|
||||||
|
return isbn
|
||||||
|
|
||||||
|
def _get_signature(data):
|
||||||
|
try:
|
||||||
|
sig_data = (
|
||||||
|
data.split("[loksatz]")[1]
|
||||||
|
.split("[0] => ")[1]
|
||||||
|
.split("\n")[0]
|
||||||
|
.strip()
|
||||||
|
)
|
||||||
|
signature_data = eval(sig_data)
|
||||||
|
return signature_data["signatur"]
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _get_author(data):
|
||||||
|
try:
|
||||||
|
array = data.split("[au_display_short]")[1].split(")\n")[0].strip()
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
entries = array.split("\n")
|
||||||
|
authors = []
|
||||||
|
hg_present = False
|
||||||
|
verf_present = False
|
||||||
|
lines = []
|
||||||
|
for entry in entries:
|
||||||
|
if "=>" in entry:
|
||||||
|
line = entry.split("=>")[1].strip()
|
||||||
|
if "[HerausgeberIn]" in line:
|
||||||
|
hg_present = True
|
||||||
|
if "[VerfasserIn]" in line:
|
||||||
|
verf_present = True
|
||||||
|
lines.append(line)
|
||||||
|
for line in lines:
|
||||||
|
if hg_present and verf_present:
|
||||||
|
if "[HerausgeberIn]" in line:
|
||||||
|
authors.append(line.split("[")[0].strip())
|
||||||
|
elif verf_present:
|
||||||
|
if "[VerfasserIn]" in line:
|
||||||
|
authors.append(line.split("[")[0].strip())
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
return ";".join(authors)
|
||||||
|
|
||||||
|
def _get_title(data):
|
||||||
|
titledata = None
|
||||||
|
title = ""
|
||||||
|
if "[ti_long]" in data:
|
||||||
|
titledata = data.split("[ti_long]")[1].split(")\n")[0].strip()
|
||||||
|
title = titledata.split("=>")[1].strip().split("/")[0].strip()
|
||||||
|
if "[ti_long_f]" in data:
|
||||||
|
titledata = data.split("[ti_long_f]")[1].split(")\n")[0].strip()
|
||||||
|
title = titledata.split("=>")[1].strip().split("/")[0].strip()
|
||||||
|
return title
|
||||||
|
|
||||||
|
def _get_adis_idn(data, signature):
|
||||||
|
loksatz_match = re.search(
|
||||||
|
r"\[loksatz\] => Array\s*\((.*?)\)",
|
||||||
|
data,
|
||||||
|
re.DOTALL,
|
||||||
|
)
|
||||||
|
if loksatz_match:
|
||||||
|
loksatz_content = loksatz_match.group(1)
|
||||||
|
|
||||||
|
# Step 2: Extract JSON objects within the loksatz section
|
||||||
|
json_objects = re.findall(r"{.*?}", loksatz_content, re.DOTALL)
|
||||||
|
# Print each JSON object
|
||||||
|
for obj in json_objects:
|
||||||
|
data = eval(obj)
|
||||||
|
if data["signatur"] == signature:
|
||||||
|
return data["adis_idn"]
|
||||||
|
|
||||||
|
def _get_in_apparat(data):
|
||||||
|
loksatz_match = re.search(
|
||||||
|
r"\[loksatz\] => Array\s*\((.*?)\)",
|
||||||
|
data,
|
||||||
|
re.DOTALL,
|
||||||
|
)
|
||||||
|
if loksatz_match:
|
||||||
|
loksatz_content = loksatz_match.group(1)
|
||||||
|
|
||||||
|
# Step 2: Extract JSON objects within the loksatz section
|
||||||
|
json_objects = re.findall(r"{.*?}", loksatz_content, re.DOTALL)
|
||||||
|
# Print each JSON object
|
||||||
|
for obj in json_objects:
|
||||||
|
data = eval(obj)
|
||||||
|
if data["ausleihcode"] == "R" and data["standort"] == "40":
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
ppn = _get_line(data, "[kid]")
|
||||||
|
title = _get_title(data).strip()
|
||||||
|
author = _get_author(data)
|
||||||
|
edition = _get_list_entry(data, "[ausgabe]", "[0]").replace(",", "")
|
||||||
|
link = f"https://rds.ibs-bw.de/phfreiburg/link?kid={_get_line(data, '[kid]')}"
|
||||||
|
isbn = _get_isbn(data)
|
||||||
|
# [self._get_list_entry(data,"[isbn]","[0]"),self._get_list_entry(data,"[is]","[1]")],
|
||||||
|
language = _get_list_entry(data, "[la_facet]", "[0]")
|
||||||
|
publisher = _get_list_entry(data, "[pu]", "[0]")
|
||||||
|
year = _get_list_entry(data, "[py_display]", "[0]")
|
||||||
|
pages = _get_list_entry(data, "[umfang]", "[0]").split(":")[0].strip()
|
||||||
|
signature = (
|
||||||
|
self.signature if self.signature is not None else _get_signature(data)
|
||||||
|
)
|
||||||
|
|
||||||
|
place = _get_list_entry(data, "[pp]", "[0]")
|
||||||
|
adis_idn = _get_adis_idn(data, signature=signature)
|
||||||
|
in_apparat = _get_in_apparat(data)
|
||||||
|
return BookData(
|
||||||
|
ppn=ppn,
|
||||||
|
title=title,
|
||||||
|
author=author,
|
||||||
|
edition=edition,
|
||||||
|
link=link,
|
||||||
|
isbn=isbn,
|
||||||
|
language=language,
|
||||||
|
publisher=publisher,
|
||||||
|
year=year,
|
||||||
|
pages=pages,
|
||||||
|
signature=signature,
|
||||||
|
place=place,
|
||||||
|
adis_idn=adis_idn,
|
||||||
|
in_apparat=in_apparat,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class COinSData:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def transform(self, data: str) -> BookData:
|
||||||
|
def _get_line(source: str, search: str) -> str:
|
||||||
|
try:
|
||||||
|
data = source.split(f"{search}=")[1] # .split("")[0].strip()
|
||||||
|
return data.split("rft")[0].strip() if "rft" in data else data
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
return BookData(
|
||||||
|
ppn=_get_line(data, "rft_id").split("=")[1],
|
||||||
|
title=_get_line(data, "rft.btitle"),
|
||||||
|
author=f"{_get_line(data, 'rft.aulast')}, {_get_line(data, 'rft.aufirst')}",
|
||||||
|
edition=_get_line(data, "rft.edition"),
|
||||||
|
link=_get_line(data, "rft_id"),
|
||||||
|
isbn=_get_line(data, "rft.isbn"),
|
||||||
|
publisher=_get_line(data, "rft.pub"),
|
||||||
|
year=_get_line(data, "rft.date"),
|
||||||
|
pages=_get_line(data, "rft.tpages").split(":")[0].strip(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RISData:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def transform(self, data: str) -> BookData:
|
||||||
|
def _get_line(source: str, search: str) -> str:
|
||||||
|
try:
|
||||||
|
data = source.split(f"{search} - ")[1] # .split("")[0].strip()
|
||||||
|
return data.split("\n")[0].strip() if "\n" in data else data
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
return BookData(
|
||||||
|
ppn=_get_line(data, "DP").split("=")[1],
|
||||||
|
title=_get_line(data, "TI"),
|
||||||
|
signature=_get_line(data, "CN"),
|
||||||
|
edition=_get_line(data, "ET").replace(",", ""),
|
||||||
|
link=_get_line(data, "DP"),
|
||||||
|
isbn=_get_line(data, "SN").split(","),
|
||||||
|
author=_get_line(data, "AU").split("[")[0].strip(),
|
||||||
|
language=_get_line(data, "LA"),
|
||||||
|
publisher=_get_line(data, "PB"),
|
||||||
|
year=_get_line(data, "PY"),
|
||||||
|
pages=_get_line(data, "SP"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BibTeXData:
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def transform(self, data: str) -> BookData:
|
||||||
|
def _get_line(source: str, search: str) -> str:
|
||||||
|
try:
|
||||||
|
return (
|
||||||
|
data.split(search)[1]
|
||||||
|
.split("\n")[0]
|
||||||
|
.strip()
|
||||||
|
.split("=")[1]
|
||||||
|
.strip()
|
||||||
|
.replace("{", "")
|
||||||
|
.replace("}", "")
|
||||||
|
.replace(",", "")
|
||||||
|
.replace("[", "")
|
||||||
|
.replace("];", "")
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
return BookData(
|
||||||
|
ppn=None,
|
||||||
|
title=_get_line(data, "title"),
|
||||||
|
signature=_get_line(data, "bestand"),
|
||||||
|
edition=_get_line(data, "edition"),
|
||||||
|
isbn=_get_line(data, "isbn"),
|
||||||
|
author=";".join(_get_line(data, "author").split(" and ")),
|
||||||
|
language=_get_line(data, "language"),
|
||||||
|
publisher=_get_line(data, "publisher"),
|
||||||
|
year=_get_line(data, "year"),
|
||||||
|
pages=_get_line(data, "pages"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RDSData:
|
||||||
|
def __init__(self):
|
||||||
|
self.retlist = []
|
||||||
|
|
||||||
|
def transform(self, data: str):
|
||||||
|
# rds_availability = RDS_AVAIL_DATA()
|
||||||
|
# rds_data = RDS_GENERIC_DATA()
|
||||||
|
print(data)
|
||||||
|
|
||||||
|
def __get_raw_data(data: str) -> list:
|
||||||
|
# create base data to be turned into pydantic classes
|
||||||
|
data = data.split("RDS ----------------------------------")[1]
|
||||||
|
edata = data.strip()
|
||||||
|
edata = edata.split("\n", 9)[9]
|
||||||
|
edata = edata.split("\n")[1:]
|
||||||
|
entry_1 = edata[0]
|
||||||
|
edata = edata[1:]
|
||||||
|
entry_2 = "".join(edata)
|
||||||
|
edata = []
|
||||||
|
edata.append(entry_1)
|
||||||
|
edata.append(entry_2)
|
||||||
|
return edata
|
||||||
|
|
||||||
|
ret_data = __get_raw_data(data)
|
||||||
|
# assign data[1] to RDS_AVAIL_DATA
|
||||||
|
# assign data[0] to RDS_DATA
|
||||||
|
self.rds_data = RDS_GENERIC_DATA().import_from_dict(ret_data[1])
|
||||||
|
self.rds_availability = RDS_AVAIL_DATA().import_from_dict(ret_data[0])
|
||||||
|
self.retlist.append(self.rds_availability)
|
||||||
|
self.retlist.append(self.rds_data)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def return_data(self, option=None):
|
||||||
|
if option == "rds_availability":
|
||||||
|
return self.retlist[0]
|
||||||
|
if option == "rds_data":
|
||||||
|
return self.retlist[1]
|
||||||
|
return {"rds_availability": self.retlist[0], "rds_data": self.retlist[1]}
|
||||||
|
|
||||||
|
|
||||||
|
class DictToTable:
|
||||||
|
def __init__(self):
|
||||||
|
self.work_author = None
|
||||||
|
self.section_author = None
|
||||||
|
self.year = None
|
||||||
|
self.edition = None
|
||||||
|
self.work_title = None
|
||||||
|
self.chapter_title = None
|
||||||
|
self.location = None
|
||||||
|
self.publisher = None
|
||||||
|
self.signature = None
|
||||||
|
self.type = None
|
||||||
|
self.pages = None
|
||||||
|
self.issue = None
|
||||||
|
self.isbn = None
|
||||||
|
|
||||||
|
def makeResult(self):
|
||||||
|
data = {
|
||||||
|
"work_author": self.work_author,
|
||||||
|
"section_author": self.section_author,
|
||||||
|
"year": self.year,
|
||||||
|
"edition": self.edition,
|
||||||
|
"work_title": self.work_title,
|
||||||
|
"chapter_title": self.chapter_title,
|
||||||
|
"location": self.location,
|
||||||
|
"publisher": self.publisher,
|
||||||
|
"signature": self.signature,
|
||||||
|
"issue": self.issue,
|
||||||
|
"pages": self.pages,
|
||||||
|
"isbn": self.isbn,
|
||||||
|
"type": self.type,
|
||||||
|
}
|
||||||
|
data = {k: v for k, v in data.items() if v is not None}
|
||||||
|
return data
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
for key in self.__dict__:
|
||||||
|
setattr(self, key, None)
|
||||||
|
|
||||||
|
def transform(self, data: dict):
|
||||||
|
mode = data["mode"]
|
||||||
|
self.reset()
|
||||||
|
if mode == "book":
|
||||||
|
return self.book_assign(data)
|
||||||
|
if mode == "hg":
|
||||||
|
return self.hg_assign(data)
|
||||||
|
if mode == "zs":
|
||||||
|
return self.zs_assign(data)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def book_assign(self, data):
|
||||||
|
self.type = "book"
|
||||||
|
self.work_author = data["book_author"]
|
||||||
|
self.signature = data["book_signature"]
|
||||||
|
self.location = data["book_place"]
|
||||||
|
self.year = data["book_year"]
|
||||||
|
self.work_title = data["book_title"]
|
||||||
|
self.edition = data["book_edition"]
|
||||||
|
self.pages = data["book_pages"]
|
||||||
|
self.publisher = data["book_publisher"]
|
||||||
|
self.isbn = data["book_isbn"]
|
||||||
|
return self.makeResult()
|
||||||
|
|
||||||
|
def hg_assign(self, data):
|
||||||
|
self.type = "hg"
|
||||||
|
self.section_author = data["hg_author"]
|
||||||
|
self.work_author = data["hg_editor"]
|
||||||
|
self.year = data["hg_year"]
|
||||||
|
self.work_title = data["hg_title"]
|
||||||
|
self.publisher = data["hg_publisher"]
|
||||||
|
self.location = data["hg_place"]
|
||||||
|
self.edition = data["hg_edition"]
|
||||||
|
self.chapter_title = data["hg_chaptertitle"]
|
||||||
|
self.pages = data["hg_pages"]
|
||||||
|
self.signature = data["hg_signature"]
|
||||||
|
self.isbn = data["hg_isbn"]
|
||||||
|
return self.makeResult()
|
||||||
|
|
||||||
|
def zs_assign(self, data):
|
||||||
|
self.type = "zs"
|
||||||
|
self.section_author = data["zs_author"]
|
||||||
|
self.chapter_title = data["zs_chapter_title"]
|
||||||
|
self.location = data["zs_place"]
|
||||||
|
self.issue = data["zs_issue"]
|
||||||
|
self.pages = data["zs_pages"]
|
||||||
|
self.publisher = data["zs_publisher"]
|
||||||
|
self.isbn = data["zs_isbn"]
|
||||||
|
|
||||||
|
self.year = data["zs_year"]
|
||||||
|
self.signature = data["zs_signature"]
|
||||||
|
self.work_title = data["zs_title"]
|
||||||
|
return self.makeResult()
|
||||||
@@ -1,5 +1,3 @@
|
|||||||
from typing import List
|
|
||||||
|
|
||||||
import regex
|
import regex
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
@@ -33,11 +31,11 @@ class Catalogue:
|
|||||||
response = requests.get(link, timeout=self.timeout)
|
response = requests.get(link, timeout=self.timeout)
|
||||||
return response.text
|
return response.text
|
||||||
|
|
||||||
def get_book_links(self, searchterm: str) -> List[str]:
|
def get_book_links(self, searchterm: str) -> list[str]:
|
||||||
response = self.search_book(searchterm)
|
response = self.search_book(searchterm)
|
||||||
soup = BeautifulSoup(response, "html.parser")
|
soup = BeautifulSoup(response, "html.parser")
|
||||||
links = soup.find_all("a", class_="title getFull")
|
links = soup.find_all("a", class_="title getFull")
|
||||||
res: List[str] = []
|
res: list[str] = []
|
||||||
for link in links:
|
for link in links:
|
||||||
res.append(BASE + link["href"]) # type: ignore
|
res.append(BASE + link["href"]) # type: ignore
|
||||||
return res
|
return res
|
||||||
@@ -186,7 +184,8 @@ class Catalogue:
|
|||||||
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
|
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
|
||||||
).get_text(strip=True)
|
).get_text(strip=True)
|
||||||
book.isbn = isbn
|
book.isbn = isbn
|
||||||
# from div col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_SCOPE get pages (second div in this div)
|
# from div col-xs-12 col-md-5 col-lg-4 rds-dl-head
|
||||||
|
# RDS_SCOPE get pages (second div in this div)
|
||||||
pages = None
|
pages = None
|
||||||
pages_el = soup.find("div", class_="RDS_SCOPE")
|
pages_el = soup.find("div", class_="RDS_SCOPE")
|
||||||
if pages_el:
|
if pages_el:
|
||||||
@@ -206,14 +205,14 @@ class Catalogue:
|
|||||||
# based on PPN, get title, people, edition, year, language, pages, isbn,
|
# based on PPN, get title, people, edition, year, language, pages, isbn,
|
||||||
link = f"https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{ppn}"
|
link = f"https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{ppn}"
|
||||||
result = self.search(link)
|
result = self.search(link)
|
||||||
soup = BeautifulSoup(result, "html.parser")
|
BeautifulSoup(result, "html.parser")
|
||||||
|
|
||||||
def get_ppn(self, searchterm: str) -> str | None:
|
def get_ppn(self, searchterm: str) -> str | None:
|
||||||
links = self.get_book_links(searchterm)
|
links = self.get_book_links(searchterm)
|
||||||
ppn = None
|
ppn = None
|
||||||
for link in links:
|
for link in links:
|
||||||
result = self.search(link)
|
result = self.search(link)
|
||||||
soup = BeautifulSoup(result, "html.parser")
|
BeautifulSoup(result, "html.parser")
|
||||||
ppn = link.split("/")[-1]
|
ppn = link.split("/")[-1]
|
||||||
if ppn and regex.match(r"^\d{8,10}[X\d]?$", ppn):
|
if ppn and regex.match(r"^\d{8,10}[X\d]?$", ppn):
|
||||||
return ppn
|
return ppn
|
||||||
@@ -328,3 +327,7 @@ class Catalogue:
|
|||||||
if link is None:
|
if link is None:
|
||||||
return None
|
return None
|
||||||
return link.library_location
|
return link.library_location
|
||||||
|
|
||||||
|
def check_book_exists(self, searchterm: str) -> bool:
|
||||||
|
links = self.get_book_links(searchterm)
|
||||||
|
return len(links) > 0
|
||||||
|
|||||||
1
src/bibapi/schemas/__init__.py
Normal file
1
src/bibapi/schemas/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Schemas for the provided APIs."""
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
import json
|
import json
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import Any, Optional, Union
|
from typing import Any
|
||||||
|
|
||||||
import regex
|
import regex
|
||||||
|
|
||||||
@@ -12,9 +12,9 @@ class BookData:
|
|||||||
signature: str | None = None
|
signature: str | None = None
|
||||||
edition: str | None = None
|
edition: str | None = None
|
||||||
link: str | None = None
|
link: str | None = None
|
||||||
isbn: Union[str, list[str], None] = field(default_factory=list[str])
|
isbn: str | list[str] | None = field(default_factory=list[str])
|
||||||
author: str | None = None
|
author: str | None = None
|
||||||
language: Union[str, list[str], None] = field(default_factory=list)
|
language: str | list[str] | None = field(default_factory=list)
|
||||||
publisher: str | None = None
|
publisher: str | None = None
|
||||||
place: str | None = None
|
place: str | None = None
|
||||||
year: int | None = None
|
year: int | None = None
|
||||||
@@ -23,9 +23,10 @@ class BookData:
|
|||||||
in_apparat: bool | None = False
|
in_apparat: bool | None = False
|
||||||
adis_idn: str | None = None
|
adis_idn: str | None = None
|
||||||
old_book: Any | None = None
|
old_book: Any | None = None
|
||||||
media_type: str | None = None #
|
media_type: str | None = None
|
||||||
in_library: bool | None = None # whether the book is in the library or not
|
in_library: bool | None = None # whether the book is in the library or not
|
||||||
libraries: list[str] | None = field(default_factory=list)
|
libraries: list[str] | None = field(default_factory=list)
|
||||||
|
medianr: int | None = None # media number
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
self.library_location = (
|
self.library_location = (
|
||||||
@@ -72,11 +73,10 @@ class BookData:
|
|||||||
key: value for key, value in self.__dict__.items() if value is not None
|
key: value for key, value in self.__dict__.items() if value is not None
|
||||||
}
|
}
|
||||||
# remove old_book from data_dict
|
# remove old_book from data_dict
|
||||||
if "old_book" in data_dict:
|
data_dict.pop("old_book", None)
|
||||||
del data_dict["old_book"]
|
|
||||||
return json.dumps(data_dict, ensure_ascii=False)
|
return json.dumps(data_dict, ensure_ascii=False)
|
||||||
|
|
||||||
def from_dataclass(self, dataclass: Optional[Any]) -> None:
|
def from_dataclass(self, dataclass: Any | None) -> None:
|
||||||
if dataclass is None:
|
if dataclass is None:
|
||||||
return
|
return
|
||||||
for key, value in dataclass.__dict__.items():
|
for key, value in dataclass.__dict__.items():
|
||||||
@@ -86,7 +86,6 @@ class BookData:
|
|||||||
if isinstance(self.media_type, str):
|
if isinstance(self.media_type, str):
|
||||||
if "Online" in self.pages:
|
if "Online" in self.pages:
|
||||||
return "eBook"
|
return "eBook"
|
||||||
else:
|
|
||||||
return "Druckausgabe"
|
return "Druckausgabe"
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -114,7 +113,7 @@ class BookData:
|
|||||||
return self
|
return self
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def edition_number(self) -> Optional[int]:
|
def edition_number(self) -> int | None:
|
||||||
if self.edition is None:
|
if self.edition is None:
|
||||||
return 0
|
return 0
|
||||||
match = regex.search(r"(\d+)", self.edition)
|
match = regex.search(r"(\d+)", self.edition)
|
||||||
|
|||||||
10
src/bibapi/schemas/errors.py
Normal file
10
src/bibapi/schemas/errors.py
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
class BibAPIError(Exception):
|
||||||
|
"""Base class for all BibAPI errors."""
|
||||||
|
|
||||||
|
|
||||||
|
class CatalogueError(BibAPIError):
|
||||||
|
"""Raised when there is an error with the library catalogue API."""
|
||||||
|
|
||||||
|
|
||||||
|
class NetworkError(BibAPIError):
|
||||||
|
"""Raised when there is a network-related error."""
|
||||||
@@ -1,5 +1,4 @@
|
|||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import List, Optional
|
|
||||||
|
|
||||||
|
|
||||||
# --- MARC XML structures ---
|
# --- MARC XML structures ---
|
||||||
@@ -20,14 +19,14 @@ class DataField:
|
|||||||
tag: str
|
tag: str
|
||||||
ind1: str = " "
|
ind1: str = " "
|
||||||
ind2: str = " "
|
ind2: str = " "
|
||||||
subfields: List[SubField] = field(default_factory=list)
|
subfields: list[SubField] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class MarcRecord:
|
class MarcRecord:
|
||||||
leader: str
|
leader: str
|
||||||
controlfields: List[ControlField] = field(default_factory=list)
|
controlfields: list[ControlField] = field(default_factory=list)
|
||||||
datafields: List[DataField] = field(default_factory=list)
|
datafields: list[DataField] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
# --- SRU record wrapper ---
|
# --- SRU record wrapper ---
|
||||||
@@ -52,17 +51,17 @@ class EchoedSearchRequest:
|
|||||||
class SearchRetrieveResponse:
|
class SearchRetrieveResponse:
|
||||||
version: str
|
version: str
|
||||||
numberOfRecords: int
|
numberOfRecords: int
|
||||||
records: List[Record] = field(default_factory=list)
|
records: list[Record] = field(default_factory=list)
|
||||||
echoedSearchRetrieveRequest: Optional[EchoedSearchRequest] = None
|
echoedSearchRetrieveRequest: EchoedSearchRequest | None = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class FormattedResponse:
|
class FormattedResponse:
|
||||||
title: str
|
title: str
|
||||||
edition: Optional[str] = None
|
edition: str | None = None
|
||||||
publisher: Optional[str] = None
|
publisher: str | None = None
|
||||||
year: Optional[str] = None
|
year: str | None = None
|
||||||
authors: List[str] = field(default_factory=list)
|
authors: list[str] = field(default_factory=list)
|
||||||
isbn: List[str] = field(default_factory=list)
|
isbn: list[str] = field(default_factory=list)
|
||||||
ppn: Optional[str] = None
|
ppn: str | None = None
|
||||||
libraries: List[str] = field(default_factory=list)
|
libraries: list[str] = field(default_factory=list)
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
|
from collections.abc import Iterable
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, Union
|
from typing import Any
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from requests.adapters import HTTPAdapter
|
from requests.adapters import HTTPAdapter
|
||||||
@@ -24,7 +25,7 @@ MARC = "http://www.loc.gov/MARC21/slim"
|
|||||||
NS = {"zs": ZS, "marc": MARC}
|
NS = {"zs": ZS, "marc": MARC}
|
||||||
|
|
||||||
|
|
||||||
def _text(elem: Optional[ET.Element]) -> str:
|
def _text(elem: ET.Element | None) -> str:
|
||||||
return (elem.text or "") if elem is not None else ""
|
return (elem.text or "") if elem is not None else ""
|
||||||
|
|
||||||
|
|
||||||
@@ -36,32 +37,32 @@ def _req_text(parent: ET.Element, path: str) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def parse_marc_record(record_el: ET.Element) -> MarcRecord:
|
def parse_marc_record(record_el: ET.Element) -> MarcRecord:
|
||||||
"""
|
"""record_el is the <marc:record> element (default ns MARC in your sample)"""
|
||||||
record_el is the <marc:record> element (default ns MARC in your sample)
|
|
||||||
"""
|
|
||||||
# leader
|
# leader
|
||||||
leader_text = _req_text(record_el, "marc:leader")
|
leader_text = _req_text(record_el, "marc:leader")
|
||||||
|
|
||||||
# controlfields
|
# controlfields
|
||||||
controlfields: List[ControlField] = []
|
controlfields: list[ControlField] = []
|
||||||
for cf in record_el.findall("marc:controlfield", NS):
|
for cf in record_el.findall("marc:controlfield", NS):
|
||||||
tag = cf.get("tag", "").strip()
|
tag = cf.get("tag", "").strip()
|
||||||
controlfields.append(ControlField(tag=tag, value=_text(cf)))
|
controlfields.append(ControlField(tag=tag, value=_text(cf)))
|
||||||
|
|
||||||
# datafields
|
# datafields
|
||||||
datafields: List[DataField] = []
|
datafields: list[DataField] = []
|
||||||
for df in record_el.findall("marc:datafield", NS):
|
for df in record_el.findall("marc:datafield", NS):
|
||||||
tag = df.get("tag", "").strip()
|
tag = df.get("tag", "").strip()
|
||||||
ind1 = df.get("ind1") or " "
|
ind1 = df.get("ind1") or " "
|
||||||
ind2 = df.get("ind2") or " "
|
ind2 = df.get("ind2") or " "
|
||||||
subfields: List[SubField] = []
|
subfields: list[SubField] = []
|
||||||
for sf in df.findall("marc:subfield", NS):
|
for sf in df.findall("marc:subfield", NS):
|
||||||
code = sf.get("code", "")
|
code = sf.get("code", "")
|
||||||
subfields.append(SubField(code=code, value=_text(sf)))
|
subfields.append(SubField(code=code, value=_text(sf)))
|
||||||
datafields.append(DataField(tag=tag, ind1=ind1, ind2=ind2, subfields=subfields))
|
datafields.append(DataField(tag=tag, ind1=ind1, ind2=ind2, subfields=subfields))
|
||||||
|
|
||||||
return MarcRecord(
|
return MarcRecord(
|
||||||
leader=leader_text, controlfields=controlfields, datafields=datafields
|
leader=leader_text,
|
||||||
|
controlfields=controlfields,
|
||||||
|
datafields=datafields,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -92,7 +93,7 @@ def parse_record(zs_record_el: ET.Element) -> Record:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def parse_echoed_request(root: ET.Element) -> Optional[EchoedSearchRequest]:
|
def parse_echoed_request(root: ET.Element) -> EchoedSearchRequest | None:
|
||||||
el = root.find("zs:echoedSearchRetrieveRequest", NS)
|
el = root.find("zs:echoedSearchRetrieveRequest", NS)
|
||||||
if el is None:
|
if el is None:
|
||||||
return None
|
return None
|
||||||
@@ -119,7 +120,7 @@ def parse_echoed_request(root: ET.Element) -> Optional[EchoedSearchRequest]:
|
|||||||
|
|
||||||
|
|
||||||
def parse_search_retrieve_response(
|
def parse_search_retrieve_response(
|
||||||
xml_str: Union[str, bytes],
|
xml_str: str | bytes,
|
||||||
) -> SearchRetrieveResponse:
|
) -> SearchRetrieveResponse:
|
||||||
root = ET.fromstring(xml_str)
|
root = ET.fromstring(xml_str)
|
||||||
|
|
||||||
@@ -128,7 +129,7 @@ def parse_search_retrieve_response(
|
|||||||
numberOfRecords = int(_req_text(root, "zs:numberOfRecords") or "0")
|
numberOfRecords = int(_req_text(root, "zs:numberOfRecords") or "0")
|
||||||
|
|
||||||
records_parent = root.find("zs:records", NS)
|
records_parent = root.find("zs:records", NS)
|
||||||
records: List[Record] = []
|
records: list[Record] = []
|
||||||
if records_parent is not None:
|
if records_parent is not None:
|
||||||
for r in records_parent.findall("zs:record", NS):
|
for r in records_parent.findall("zs:record", NS):
|
||||||
record = parse_record(r)
|
record = parse_record(r)
|
||||||
@@ -150,9 +151,9 @@ def parse_search_retrieve_response(
|
|||||||
|
|
||||||
def iter_datafields(
|
def iter_datafields(
|
||||||
rec: MarcRecord,
|
rec: MarcRecord,
|
||||||
tag: Optional[str] = None,
|
tag: str | None = None,
|
||||||
ind1: Optional[str] = None,
|
ind1: str | None = None,
|
||||||
ind2: Optional[str] = None,
|
ind2: str | None = None,
|
||||||
) -> Iterable[DataField]:
|
) -> Iterable[DataField]:
|
||||||
"""Yield datafields, optionally filtered by tag/indicators."""
|
"""Yield datafields, optionally filtered by tag/indicators."""
|
||||||
for df in rec.datafields:
|
for df in rec.datafields:
|
||||||
@@ -170,11 +171,11 @@ def subfield_values(
|
|||||||
tag: str,
|
tag: str,
|
||||||
code: str,
|
code: str,
|
||||||
*,
|
*,
|
||||||
ind1: Optional[str] = None,
|
ind1: str | None = None,
|
||||||
ind2: Optional[str] = None,
|
ind2: str | None = None,
|
||||||
) -> List[str]:
|
) -> list[str]:
|
||||||
"""All values for subfield `code` in every `tag` field (respecting indicators)."""
|
"""All values for subfield `code` in every `tag` field (respecting indicators)."""
|
||||||
out: List[str] = []
|
out: list[str] = []
|
||||||
for df in iter_datafields(rec, tag, ind1, ind2):
|
for df in iter_datafields(rec, tag, ind1, ind2):
|
||||||
out.extend(sf.value for sf in df.subfields if sf.code == code)
|
out.extend(sf.value for sf in df.subfields if sf.code == code)
|
||||||
return out
|
return out
|
||||||
@@ -185,10 +186,10 @@ def first_subfield_value(
|
|||||||
tag: str,
|
tag: str,
|
||||||
code: str,
|
code: str,
|
||||||
*,
|
*,
|
||||||
ind1: Optional[str] = None,
|
ind1: str | None = None,
|
||||||
ind2: Optional[str] = None,
|
ind2: str | None = None,
|
||||||
default: Optional[str] = None,
|
default: str | None = None,
|
||||||
) -> Optional[str]:
|
) -> str | None:
|
||||||
"""First value for subfield `code` in `tag` (respecting indicators)."""
|
"""First value for subfield `code` in `tag` (respecting indicators)."""
|
||||||
for df in iter_datafields(rec, tag, ind1, ind2):
|
for df in iter_datafields(rec, tag, ind1, ind2):
|
||||||
for sf in df.subfields:
|
for sf in df.subfields:
|
||||||
@@ -201,25 +202,24 @@ def find_datafields_with_subfields(
|
|||||||
rec: MarcRecord,
|
rec: MarcRecord,
|
||||||
tag: str,
|
tag: str,
|
||||||
*,
|
*,
|
||||||
where_all: Optional[Dict[str, str]] = None,
|
where_all: dict[str, str] | None = None,
|
||||||
where_any: Optional[Dict[str, str]] = None,
|
where_any: dict[str, str] | None = None,
|
||||||
casefold: bool = False,
|
casefold: bool = False,
|
||||||
ind1: Optional[str] = None,
|
ind1: str | None = None,
|
||||||
ind2: Optional[str] = None,
|
ind2: str | None = None,
|
||||||
) -> List[DataField]:
|
) -> list[DataField]:
|
||||||
"""
|
"""Return datafields of `tag` whose subfields match constraints:
|
||||||
Return datafields of `tag` whose subfields match constraints:
|
|
||||||
- where_all: every (code -> exact value) must be present
|
- where_all: every (code -> exact value) must be present
|
||||||
- where_any: at least one (code -> exact value) present
|
- where_any: at least one (code -> exact value) present
|
||||||
Set `casefold=True` for case-insensitive comparison.
|
Set `casefold=True` for case-insensitive comparison.
|
||||||
"""
|
"""
|
||||||
where_all = where_all or {}
|
where_all = where_all or {}
|
||||||
where_any = where_any or {}
|
where_any = where_any or {}
|
||||||
matched: List[DataField] = []
|
matched: list[DataField] = []
|
||||||
|
|
||||||
for df in iter_datafields(rec, tag, ind1, ind2):
|
for df in iter_datafields(rec, tag, ind1, ind2):
|
||||||
# Map code -> list of values (with optional casefold applied)
|
# Map code -> list of values (with optional casefold applied)
|
||||||
vals: Dict[str, List[str]] = {}
|
vals: dict[str, list[str]] = {}
|
||||||
for sf in df.subfields:
|
for sf in df.subfields:
|
||||||
v = sf.value.casefold() if casefold else sf.value
|
v = sf.value.casefold() if casefold else sf.value
|
||||||
vals.setdefault(sf.code, []).append(v)
|
vals.setdefault(sf.code, []).append(v)
|
||||||
@@ -246,8 +246,10 @@ def find_datafields_with_subfields(
|
|||||||
|
|
||||||
|
|
||||||
def controlfield_value(
|
def controlfield_value(
|
||||||
rec: MarcRecord, tag: str, default: Optional[str] = None
|
rec: MarcRecord,
|
||||||
) -> Optional[str]:
|
tag: str,
|
||||||
|
default: str | None = None,
|
||||||
|
) -> str | None:
|
||||||
"""Get the first controlfield value by tag (e.g., '001', '005')."""
|
"""Get the first controlfield value by tag (e.g., '001', '005')."""
|
||||||
for cf in rec.controlfields:
|
for cf in rec.controlfields:
|
||||||
if cf.tag == tag:
|
if cf.tag == tag:
|
||||||
@@ -256,8 +258,10 @@ def controlfield_value(
|
|||||||
|
|
||||||
|
|
||||||
def datafields_value(
|
def datafields_value(
|
||||||
data: List[DataField], code: str, default: Optional[str] = None
|
data: list[DataField],
|
||||||
) -> Optional[str]:
|
code: str,
|
||||||
|
default: str | None = None,
|
||||||
|
) -> str | None:
|
||||||
"""Get the first value for a specific subfield code in a list of datafields."""
|
"""Get the first value for a specific subfield code in a list of datafields."""
|
||||||
for df in data:
|
for df in data:
|
||||||
for sf in df.subfields:
|
for sf in df.subfields:
|
||||||
@@ -267,8 +271,10 @@ def datafields_value(
|
|||||||
|
|
||||||
|
|
||||||
def datafield_value(
|
def datafield_value(
|
||||||
df: DataField, code: str, default: Optional[str] = None
|
df: DataField,
|
||||||
) -> Optional[str]:
|
code: str,
|
||||||
|
default: str | None = None,
|
||||||
|
) -> str | None:
|
||||||
"""Get the first value for a specific subfield code in a datafield."""
|
"""Get the first value for a specific subfield code in a datafield."""
|
||||||
for sf in df.subfields:
|
for sf in df.subfields:
|
||||||
if sf.code == code:
|
if sf.code == code:
|
||||||
@@ -276,9 +282,8 @@ def datafield_value(
|
|||||||
return default
|
return default
|
||||||
|
|
||||||
|
|
||||||
def _smart_join_title(a: str, b: Optional[str]) -> str:
|
def _smart_join_title(a: str, b: str | None) -> str:
|
||||||
"""
|
"""Join 245 $a and $b with MARC-style punctuation.
|
||||||
Join 245 $a and $b with MARC-style punctuation.
|
|
||||||
If $b is present, join with ' : ' unless either side already supplies punctuation.
|
If $b is present, join with ' : ' unless either side already supplies punctuation.
|
||||||
"""
|
"""
|
||||||
a = a.strip()
|
a = a.strip()
|
||||||
@@ -293,7 +298,7 @@ def _smart_join_title(a: str, b: Optional[str]) -> str:
|
|||||||
def subfield_values_from_fields(
|
def subfield_values_from_fields(
|
||||||
fields: Iterable[DataField],
|
fields: Iterable[DataField],
|
||||||
code: str,
|
code: str,
|
||||||
) -> List[str]:
|
) -> list[str]:
|
||||||
"""All subfield values with given `code` across a list of DataField."""
|
"""All subfield values with given `code` across a list of DataField."""
|
||||||
return [sf.value for df in fields for sf in df.subfields if sf.code == code]
|
return [sf.value for df in fields for sf in df.subfields if sf.code == code]
|
||||||
|
|
||||||
@@ -301,8 +306,8 @@ def subfield_values_from_fields(
|
|||||||
def first_subfield_value_from_fields(
|
def first_subfield_value_from_fields(
|
||||||
fields: Iterable[DataField],
|
fields: Iterable[DataField],
|
||||||
code: str,
|
code: str,
|
||||||
default: Optional[str] = None,
|
default: str | None = None,
|
||||||
) -> Optional[str]:
|
) -> str | None:
|
||||||
"""First subfield value with given `code` across a list of DataField."""
|
"""First subfield value with given `code` across a list of DataField."""
|
||||||
for df in fields:
|
for df in fields:
|
||||||
for sf in df.subfields:
|
for sf in df.subfields:
|
||||||
@@ -314,12 +319,11 @@ def first_subfield_value_from_fields(
|
|||||||
def subfield_value_pairs_from_fields(
|
def subfield_value_pairs_from_fields(
|
||||||
fields: Iterable[DataField],
|
fields: Iterable[DataField],
|
||||||
code: str,
|
code: str,
|
||||||
) -> List[Tuple[DataField, str]]:
|
) -> list[tuple[DataField, str]]:
|
||||||
"""
|
"""Return (DataField, value) pairs for all subfields with `code`.
|
||||||
Return (DataField, value) pairs for all subfields with `code`.
|
|
||||||
Useful if you need to know which field a value came from.
|
Useful if you need to know which field a value came from.
|
||||||
"""
|
"""
|
||||||
out: List[Tuple[DataField, str]] = []
|
out: list[tuple[DataField, str]] = []
|
||||||
for df in fields:
|
for df in fields:
|
||||||
for sf in df.subfields:
|
for sf in df.subfields:
|
||||||
if sf.code == code:
|
if sf.code == code:
|
||||||
@@ -340,13 +344,17 @@ def book_from_marc(rec: MarcRecord, library_identifier: str) -> BookData:
|
|||||||
|
|
||||||
# Signature = 924 where $9 == "Frei 129" → take that field's $g
|
# Signature = 924 where $9 == "Frei 129" → take that field's $g
|
||||||
frei_fields = find_datafields_with_subfields(
|
frei_fields = find_datafields_with_subfields(
|
||||||
rec, "924", where_all={"9": "Frei 129"}
|
rec,
|
||||||
|
"924",
|
||||||
|
where_all={"9": "Frei 129"},
|
||||||
)
|
)
|
||||||
signature = first_subfield_value_from_fields(frei_fields, "g")
|
signature = first_subfield_value_from_fields(frei_fields, "g")
|
||||||
|
|
||||||
# Year = 264 $c (prefer ind2="1" publication; fallback to any 264)
|
# Year = 264 $c (prefer ind2="1" publication; fallback to any 264)
|
||||||
year = first_subfield_value(rec, "264", "c", ind2="1") or first_subfield_value(
|
year = first_subfield_value(rec, "264", "c", ind2="1") or first_subfield_value(
|
||||||
rec, "264", "c"
|
rec,
|
||||||
|
"264",
|
||||||
|
"c",
|
||||||
)
|
)
|
||||||
isbn = subfield_values(rec, "020", "a")
|
isbn = subfield_values(rec, "020", "a")
|
||||||
mediatype = first_subfield_value(rec, "338", "a")
|
mediatype = first_subfield_value(rec, "338", "a")
|
||||||
@@ -378,10 +386,10 @@ RVK_ALLOWED = r"[A-Z0-9.\-\/]" # conservative char set typically seen in RVK no
|
|||||||
|
|
||||||
|
|
||||||
def find_newer_edition(
|
def find_newer_edition(
|
||||||
swb_result: BookData, dnb_result: List[BookData]
|
swb_result: BookData,
|
||||||
) -> Optional[List[BookData]]:
|
dnb_result: list[BookData],
|
||||||
"""
|
) -> list[BookData] | None:
|
||||||
New edition if:
|
"""New edition if:
|
||||||
- year > swb.year OR
|
- year > swb.year OR
|
||||||
- edition_number > swb.edition_number
|
- edition_number > swb.edition_number
|
||||||
|
|
||||||
@@ -393,7 +401,7 @@ def find_newer_edition(
|
|||||||
edition_number desc, best-signature-match desc, has-signature desc).
|
edition_number desc, best-signature-match desc, has-signature desc).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def norm_sig(s: Optional[str]) -> str:
|
def norm_sig(s: str | None) -> str:
|
||||||
if not s:
|
if not s:
|
||||||
return ""
|
return ""
|
||||||
# normalize: lowercase, collapse whitespace, keep alnum + a few separators
|
# normalize: lowercase, collapse whitespace, keep alnum + a few separators
|
||||||
@@ -427,7 +435,7 @@ def find_newer_edition(
|
|||||||
swb_sig_norm = norm_sig(getattr(swb_result, "signature", None))
|
swb_sig_norm = norm_sig(getattr(swb_result, "signature", None))
|
||||||
|
|
||||||
# 1) Filter to same-work AND newer
|
# 1) Filter to same-work AND newer
|
||||||
candidates: List[BookData] = []
|
candidates: list[BookData] = []
|
||||||
for b in dnb_result:
|
for b in dnb_result:
|
||||||
# Skip if both signatures exist and don't match (different work)
|
# Skip if both signatures exist and don't match (different work)
|
||||||
b_sig = getattr(b, "signature", None)
|
b_sig = getattr(b, "signature", None)
|
||||||
@@ -443,7 +451,7 @@ def find_newer_edition(
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
# 2) Dedupe by PPN, preferring signature (and matching signature if possible)
|
# 2) Dedupe by PPN, preferring signature (and matching signature if possible)
|
||||||
by_ppn: dict[Optional[str], BookData] = {}
|
by_ppn: dict[str | None, BookData] = {}
|
||||||
for b in candidates:
|
for b in candidates:
|
||||||
key = getattr(b, "ppn", None)
|
key = getattr(b, "ppn", None)
|
||||||
prev = by_ppn.get(key)
|
prev = by_ppn.get(key)
|
||||||
@@ -477,7 +485,7 @@ def find_newer_edition(
|
|||||||
|
|
||||||
|
|
||||||
class QueryTransformer:
|
class QueryTransformer:
|
||||||
def __init__(self, api_schema: Type[Enum], arguments: Union[Iterable[str], str]):
|
def __init__(self, api_schema: type[Enum], arguments: Iterable[str] | str):
|
||||||
self.api_schema = api_schema
|
self.api_schema = api_schema
|
||||||
if isinstance(arguments, str):
|
if isinstance(arguments, str):
|
||||||
self.arguments = [arguments]
|
self.arguments = [arguments]
|
||||||
@@ -485,8 +493,8 @@ class QueryTransformer:
|
|||||||
self.arguments = arguments
|
self.arguments = arguments
|
||||||
self.drop_empty = True
|
self.drop_empty = True
|
||||||
|
|
||||||
def transform(self) -> Dict[str, Any]:
|
def transform(self) -> dict[str, Any]:
|
||||||
arguments: List[str] = []
|
arguments: list[str] = []
|
||||||
schema = self.api_schema
|
schema = self.api_schema
|
||||||
for arg in self.arguments:
|
for arg in self.arguments:
|
||||||
if "=" not in arg:
|
if "=" not in arg:
|
||||||
@@ -497,16 +505,17 @@ class QueryTransformer:
|
|||||||
if hasattr(schema, key.upper()):
|
if hasattr(schema, key.upper()):
|
||||||
api_key = getattr(schema, key.upper()).value
|
api_key = getattr(schema, key.upper()).value
|
||||||
if key.upper() == "AUTHOR" and hasattr(schema, "AUTHOR_SCHEMA"):
|
if key.upper() == "AUTHOR" and hasattr(schema, "AUTHOR_SCHEMA"):
|
||||||
author_schema = getattr(schema, "AUTHOR_SCHEMA").value
|
author_schema = schema.AUTHOR_SCHEMA.value
|
||||||
if author_schema == "SpaceAfterComma":
|
if author_schema == "SpaceAfterComma":
|
||||||
value = value.replace(",", ", ")
|
value = value.replace(",", ", ")
|
||||||
elif author_schema == "NoSpaceAfterComma":
|
elif author_schema == "NoSpaceAfterComma":
|
||||||
value = value.replace(", ", ",")
|
value = value.replace(", ", ",")
|
||||||
value = value.replace(" ", " ")
|
value = value.replace(" ", " ")
|
||||||
if key.upper() == "TITLE" and hasattr(
|
if key.upper() == "TITLE" and hasattr(
|
||||||
schema, "ENCLOSE_TITLE_IN_QUOTES"
|
schema,
|
||||||
|
"ENCLOSE_TITLE_IN_QUOTES",
|
||||||
):
|
):
|
||||||
if getattr(schema, "ENCLOSE_TITLE_IN_QUOTES"):
|
if schema.ENCLOSE_TITLE_IN_QUOTES:
|
||||||
value = f'"{value}"'
|
value = f'"{value}"'
|
||||||
|
|
||||||
arguments.append(f"{api_key}={value}")
|
arguments.append(f"{api_key}={value}")
|
||||||
@@ -519,10 +528,10 @@ class Api:
|
|||||||
self,
|
self,
|
||||||
site: str,
|
site: str,
|
||||||
url: str,
|
url: str,
|
||||||
prefix: Type[Enum],
|
prefix: type[Enum],
|
||||||
library_identifier: str,
|
library_identifier: str,
|
||||||
notsupported_args: Optional[List[str]] = None,
|
notsupported_args: list[str] | None = None,
|
||||||
replace: Optional[Dict[str, str]] = None,
|
replace: dict[str, str] | None = None,
|
||||||
):
|
):
|
||||||
self.site = site
|
self.site = site
|
||||||
self.url = url
|
self.url = url
|
||||||
@@ -554,7 +563,7 @@ class Api:
|
|||||||
# Best-effort cleanup
|
# Best-effort cleanup
|
||||||
self.close()
|
self.close()
|
||||||
|
|
||||||
def get(self, query_args: Union[Iterable[str], str]) -> List[Record]:
|
def get(self, query_args: Iterable[str] | str) -> list[Record]:
|
||||||
start_time = time.monotonic()
|
start_time = time.monotonic()
|
||||||
# if any query_arg ends with =, remove it
|
# if any query_arg ends with =, remove it
|
||||||
if isinstance(query_args, str):
|
if isinstance(query_args, str):
|
||||||
@@ -566,7 +575,8 @@ class Api:
|
|||||||
if not any(qa.startswith(na + "=") for na in self.notsupported_args)
|
if not any(qa.startswith(na + "=") for na in self.notsupported_args)
|
||||||
]
|
]
|
||||||
query_args = QueryTransformer(
|
query_args = QueryTransformer(
|
||||||
api_schema=self.prefix, arguments=query_args
|
api_schema=self.prefix,
|
||||||
|
arguments=query_args,
|
||||||
).transform()
|
).transform()
|
||||||
query = "+and+".join(query_args)
|
query = "+and+".join(query_args)
|
||||||
for old, new in self.replace.items():
|
for old, new in self.replace.items():
|
||||||
@@ -579,12 +589,12 @@ class Api:
|
|||||||
"Accept-Charset": "latin1,utf-8;q=0.7,*;q=0.3",
|
"Accept-Charset": "latin1,utf-8;q=0.7,*;q=0.3",
|
||||||
}
|
}
|
||||||
# Use persistent session, enforce 1 req/sec, and retry up to 5 times
|
# Use persistent session, enforce 1 req/sec, and retry up to 5 times
|
||||||
last_error: Optional[Exception] = None
|
last_error: Exception | None = None
|
||||||
for attempt in range(1, self._max_retries + 1):
|
for attempt in range(1, self._max_retries + 1):
|
||||||
# Abort if overall timeout exceeded before starting attempt
|
# Abort if overall timeout exceeded before starting attempt
|
||||||
if time.monotonic() - start_time > self._overall_timeout_seconds:
|
if time.monotonic() - start_time > self._overall_timeout_seconds:
|
||||||
last_error = requests.exceptions.Timeout(
|
last_error = requests.exceptions.Timeout(
|
||||||
f"Overall timeout {self._overall_timeout_seconds}s exceeded before attempt {attempt}"
|
f"Overall timeout {self._overall_timeout_seconds}s exceeded before attempt {attempt}",
|
||||||
)
|
)
|
||||||
break
|
break
|
||||||
# Enforce rate limit relative to last request end
|
# Enforce rate limit relative to last request end
|
||||||
@@ -596,20 +606,22 @@ class Api:
|
|||||||
try:
|
try:
|
||||||
# Per-attempt read timeout capped at remaining overall budget (but at most 30s)
|
# Per-attempt read timeout capped at remaining overall budget (but at most 30s)
|
||||||
remaining = max(
|
remaining = max(
|
||||||
0.0, self._overall_timeout_seconds - (time.monotonic() - start_time)
|
0.0,
|
||||||
|
self._overall_timeout_seconds - (time.monotonic() - start_time),
|
||||||
)
|
)
|
||||||
read_timeout = min(30.0, remaining if remaining > 0 else 0.001)
|
read_timeout = min(30.0, remaining if remaining > 0 else 0.001)
|
||||||
resp = self._session.get(
|
resp = self._session.get(
|
||||||
url, headers=headers, timeout=(3.05, read_timeout)
|
url,
|
||||||
|
headers=headers,
|
||||||
|
timeout=(3.05, read_timeout),
|
||||||
)
|
)
|
||||||
self._last_request_time = time.monotonic()
|
self._last_request_time = time.monotonic()
|
||||||
if resp.status_code == 200:
|
if resp.status_code == 200:
|
||||||
# Parse using raw bytes (original behavior) to preserve encoding edge cases
|
# Parse using raw bytes (original behavior) to preserve encoding edge cases
|
||||||
sr = parse_search_retrieve_response(resp.content)
|
sr = parse_search_retrieve_response(resp.content)
|
||||||
return sr.records
|
return sr.records
|
||||||
else:
|
|
||||||
last_error = Exception(
|
last_error = Exception(
|
||||||
f"Error fetching data from {self.site}: HTTP {resp.status_code} (attempt {attempt}/{self._max_retries})"
|
f"Error fetching data from {self.site}: HTTP {resp.status_code} (attempt {attempt}/{self._max_retries})",
|
||||||
)
|
)
|
||||||
except requests.exceptions.ReadTimeout as e:
|
except requests.exceptions.ReadTimeout as e:
|
||||||
last_error = e
|
last_error = e
|
||||||
@@ -625,9 +637,9 @@ class Api:
|
|||||||
# If we exit the loop, all attempts failed
|
# If we exit the loop, all attempts failed
|
||||||
raise last_error if last_error else Exception("Unknown request failure")
|
raise last_error if last_error else Exception("Unknown request failure")
|
||||||
|
|
||||||
def getBooks(self, query_args: Union[Iterable[str], str]) -> List[BookData]:
|
def getBooks(self, query_args: Iterable[str] | str) -> list[BookData]:
|
||||||
try:
|
try:
|
||||||
records: List[Record] = self.get(query_args)
|
records: list[Record] = self.get(query_args)
|
||||||
except requests.exceptions.ReadTimeout:
|
except requests.exceptions.ReadTimeout:
|
||||||
# Return a list with a single empty BookData object on read timeout
|
# Return a list with a single empty BookData object on read timeout
|
||||||
return [BookData()]
|
return [BookData()]
|
||||||
@@ -638,7 +650,7 @@ class Api:
|
|||||||
# Propagate other errors (could also choose to return empty list)
|
# Propagate other errors (could also choose to return empty list)
|
||||||
raise
|
raise
|
||||||
# Avoid printing on hot paths; rely on logger if needed
|
# Avoid printing on hot paths; rely on logger if needed
|
||||||
books: List[BookData] = []
|
books: list[BookData] = []
|
||||||
# extract title from query_args if present
|
# extract title from query_args if present
|
||||||
title = None
|
title = None
|
||||||
for arg in query_args:
|
for arg in query_args:
|
||||||
|
|||||||
305
src/bibapi/webrequest.py
Normal file
305
src/bibapi/webrequest.py
Normal file
@@ -0,0 +1,305 @@
|
|||||||
|
from enum import Enum
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
# import sleep_and_retry decorator to retry requests
|
||||||
|
from ratelimit import limits, sleep_and_retry
|
||||||
|
|
||||||
|
from src.bibapi._transformers import (
|
||||||
|
RDS_AVAIL_DATA,
|
||||||
|
RDS_GENERIC_DATA,
|
||||||
|
ARRAYData,
|
||||||
|
BibTeXData,
|
||||||
|
COinSData,
|
||||||
|
RDSData,
|
||||||
|
RISData,
|
||||||
|
)
|
||||||
|
from src.bibapi.schemas.bookdata import BookData
|
||||||
|
|
||||||
|
API_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{}/"
|
||||||
|
PPN_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND"
|
||||||
|
BASE = "https://rds.ibs-bw.de"
|
||||||
|
TITLE = "RDS_TITLE"
|
||||||
|
SIGNATURE = "RDS_SIGNATURE"
|
||||||
|
EDITION = "RDS_EDITION"
|
||||||
|
ISBN = "RDS_ISBN"
|
||||||
|
AUTHOR = "RDS_PERSON"
|
||||||
|
ALLOWED_IPS = [
|
||||||
|
"193.197.140.245", # PHFR Internal
|
||||||
|
]
|
||||||
|
HEADERS = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 \
|
||||||
|
(HTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36",
|
||||||
|
"Accept-Language": "en-US, en;q=0.5",
|
||||||
|
}
|
||||||
|
RATE_LIMIT = 20
|
||||||
|
RATE_PERIOD = 30
|
||||||
|
|
||||||
|
|
||||||
|
class TransformerType(Enum):
|
||||||
|
"""Enum for possible Transformer types."""
|
||||||
|
|
||||||
|
ARRAY = "ARRAY"
|
||||||
|
COinS = "COinS"
|
||||||
|
BibTeX = "BibTeX"
|
||||||
|
RIS = "RIS"
|
||||||
|
RDS = "RDS"
|
||||||
|
|
||||||
|
|
||||||
|
class WebRequest:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
"""Request data from the web, and format it depending on the mode."""
|
||||||
|
self.apparat = None
|
||||||
|
self.use_any = False # use any book that matches the search term
|
||||||
|
self.signature = None
|
||||||
|
self.ppn = None
|
||||||
|
self.data = None
|
||||||
|
self.timeout = 5
|
||||||
|
self.public_ip = None
|
||||||
|
self._can_run()
|
||||||
|
if self.public_ip not in ALLOWED_IPS:
|
||||||
|
raise PermissionError("IP not allowed to access the requested data.")
|
||||||
|
|
||||||
|
def _can_run(self) -> None:
|
||||||
|
"""Check if requests can be made."""
|
||||||
|
try:
|
||||||
|
# check public IP to see if the requested data can be accessed
|
||||||
|
ip_response = requests.get("https://api.ipify.org", timeout=self.timeout)
|
||||||
|
ip_response.raise_for_status()
|
||||||
|
self.public_ip = ip_response.text
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
raise ConnectionError("No internet connection") from e
|
||||||
|
|
||||||
|
if self.public_ip is None:
|
||||||
|
raise ConnectionError("No internet connection")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def use_any_book(self):
|
||||||
|
"""Use any book that matches the search term"""
|
||||||
|
self.use_any = True
|
||||||
|
return self
|
||||||
|
|
||||||
|
def set_apparat(self, apparat: int) -> "WebRequest":
|
||||||
|
self.apparat = apparat
|
||||||
|
if int(self.apparat) < 10:
|
||||||
|
self.apparat = f"0{self.apparat}"
|
||||||
|
return self
|
||||||
|
|
||||||
|
def get_ppn(self, signature: str) -> "WebRequest":
|
||||||
|
self.signature = signature
|
||||||
|
if "+" in signature:
|
||||||
|
signature = signature.replace("+", "%2B")
|
||||||
|
if "doi.org" in signature:
|
||||||
|
signature = signature.split("/")[-1]
|
||||||
|
self.ppn = signature
|
||||||
|
return self
|
||||||
|
|
||||||
|
@sleep_and_retry
|
||||||
|
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
|
||||||
|
def search_book(self, searchterm: str) -> str:
|
||||||
|
response = requests.get(PPN_URL.format(searchterm), timeout=self.timeout)
|
||||||
|
return response.text
|
||||||
|
|
||||||
|
@sleep_and_retry
|
||||||
|
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
|
||||||
|
def search_ppn(self, ppn: str) -> str:
|
||||||
|
response = requests.get(API_URL.format(ppn), timeout=self.timeout)
|
||||||
|
return response.text
|
||||||
|
|
||||||
|
def get_book_links(self, searchterm: str) -> list[str]:
|
||||||
|
response: str = self.search_book(searchterm) # type:ignore
|
||||||
|
soup = BeautifulSoup(response, "html.parser")
|
||||||
|
links = soup.find_all("a", class_="title getFull")
|
||||||
|
res: list[str] = []
|
||||||
|
for link in links:
|
||||||
|
res.append(BASE + link["href"])
|
||||||
|
return res
|
||||||
|
|
||||||
|
@sleep_and_retry
|
||||||
|
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
|
||||||
|
def search(self, link: str) -> str | None:
|
||||||
|
try:
|
||||||
|
response = requests.get(link, timeout=self.timeout)
|
||||||
|
return response.text
|
||||||
|
except requests.exceptions.RequestException:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_data(self) -> list[str] | None:
|
||||||
|
links = self.get_book_links(self.ppn)
|
||||||
|
return_data: list[str] = []
|
||||||
|
for link in links:
|
||||||
|
result: str = self.search(link) # type:ignore
|
||||||
|
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
||||||
|
# if found, return text of href
|
||||||
|
soup = BeautifulSoup(result, "html.parser")
|
||||||
|
locations = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION")
|
||||||
|
if locations:
|
||||||
|
for location in locations:
|
||||||
|
if "1. OG Semesterapparat" in location.text:
|
||||||
|
pre_tag = soup.find_all("pre")
|
||||||
|
return_data = []
|
||||||
|
if pre_tag:
|
||||||
|
for tag in pre_tag:
|
||||||
|
data = tag.text.strip()
|
||||||
|
return_data.append(data)
|
||||||
|
return return_data
|
||||||
|
return return_data
|
||||||
|
item_location = location.find(
|
||||||
|
"div",
|
||||||
|
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
|
||||||
|
).text.strip()
|
||||||
|
if self.use_any:
|
||||||
|
pre_tag = soup.find_all("pre")
|
||||||
|
if pre_tag:
|
||||||
|
for tag in pre_tag:
|
||||||
|
data = tag.text.strip()
|
||||||
|
return_data.append(data)
|
||||||
|
return return_data
|
||||||
|
raise ValueError("No <pre> tag found")
|
||||||
|
if f"Semesterapparat-{self.apparat}" in item_location:
|
||||||
|
pre_tag = soup.find_all("pre")
|
||||||
|
return_data = []
|
||||||
|
if pre_tag:
|
||||||
|
for tag in pre_tag:
|
||||||
|
data = tag.text.strip()
|
||||||
|
return_data.append(data)
|
||||||
|
return return_data
|
||||||
|
return return_data
|
||||||
|
|
||||||
|
return return_data
|
||||||
|
|
||||||
|
def get_data_elsa(self) -> list[str] | None:
|
||||||
|
links = self.get_book_links(self.ppn)
|
||||||
|
for link in links:
|
||||||
|
result = self.search(link)
|
||||||
|
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
||||||
|
# if found, return text of href
|
||||||
|
soup = BeautifulSoup(result, "html.parser")
|
||||||
|
locations = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION")
|
||||||
|
if locations:
|
||||||
|
for _ in locations:
|
||||||
|
pre_tag = soup.find_all("pre")
|
||||||
|
return_data = []
|
||||||
|
if pre_tag:
|
||||||
|
for tag in pre_tag:
|
||||||
|
data = tag.text.strip()
|
||||||
|
return_data.append(data)
|
||||||
|
return return_data
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class BibTextTransformer:
|
||||||
|
"""Transforms data from the web into a BibText format.
|
||||||
|
|
||||||
|
Valid Modes are ARRAY, COinS, BibTeX, RIS, RDS
|
||||||
|
Raises:
|
||||||
|
ValueError: Raised if mode is not in valid_modes
|
||||||
|
"""
|
||||||
|
|
||||||
|
valid_modes = [
|
||||||
|
TransformerType.ARRAY,
|
||||||
|
TransformerType.COinS,
|
||||||
|
TransformerType.BibTeX,
|
||||||
|
TransformerType.RIS,
|
||||||
|
TransformerType.RDS,
|
||||||
|
]
|
||||||
|
|
||||||
|
def __init__(self, mode: TransformerType = TransformerType.ARRAY) -> None:
|
||||||
|
self.mode = mode.value
|
||||||
|
self.field = None
|
||||||
|
self.signature = None
|
||||||
|
if mode not in self.valid_modes:
|
||||||
|
raise ValueError(f"Mode {mode} not valid")
|
||||||
|
self.data = None
|
||||||
|
# self.bookdata = BookData(**self.data)
|
||||||
|
|
||||||
|
def use_signature(self, signature: str) -> "BibTextTransformer":
|
||||||
|
"""Use the exact signature to search for the book"""
|
||||||
|
self.signature = signature
|
||||||
|
return self
|
||||||
|
|
||||||
|
def get_data(self, data: list[str] | None = None) -> "BibTextTransformer":
|
||||||
|
RIS_IDENT = "TY -"
|
||||||
|
ARRAY_IDENT = "[kid]"
|
||||||
|
COinS_IDENT = "ctx_ver"
|
||||||
|
BIBTEX_IDENT = "@book"
|
||||||
|
RDS_IDENT = "RDS ---------------------------------- "
|
||||||
|
|
||||||
|
if data is None:
|
||||||
|
self.data = None
|
||||||
|
return self
|
||||||
|
|
||||||
|
if self.mode == "RIS":
|
||||||
|
for line in data:
|
||||||
|
if RIS_IDENT in line:
|
||||||
|
self.data = line
|
||||||
|
elif self.mode == "ARRAY":
|
||||||
|
for line in data:
|
||||||
|
if ARRAY_IDENT in line:
|
||||||
|
self.data = line
|
||||||
|
elif self.mode == "COinS":
|
||||||
|
for line in data:
|
||||||
|
if COinS_IDENT in line:
|
||||||
|
self.data = line
|
||||||
|
elif self.mode == "BibTeX":
|
||||||
|
for line in data:
|
||||||
|
if BIBTEX_IDENT in line:
|
||||||
|
self.data = line
|
||||||
|
elif self.mode == "RDS":
|
||||||
|
for line in data:
|
||||||
|
if RDS_IDENT in line:
|
||||||
|
self.data = line
|
||||||
|
return self
|
||||||
|
|
||||||
|
def return_data(
|
||||||
|
self,
|
||||||
|
option: Any = None,
|
||||||
|
) -> (
|
||||||
|
BookData
|
||||||
|
| None
|
||||||
|
| RDS_GENERIC_DATA
|
||||||
|
| RDS_AVAIL_DATA
|
||||||
|
| dict[str, RDS_AVAIL_DATA | RDS_GENERIC_DATA]
|
||||||
|
):
|
||||||
|
"""Return Data to caller.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
option (string, optional): Option for RDS as there are two filetypes. Use rds_availability or rds_data. Anything else gives a dict of both responses. Defaults to None.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
BookData: a dataclass containing data about the book
|
||||||
|
|
||||||
|
"""
|
||||||
|
if self.data is None:
|
||||||
|
return None
|
||||||
|
match self.mode:
|
||||||
|
case "ARRAY":
|
||||||
|
return ARRAYData(self.signature).transform(self.data)
|
||||||
|
case "COinS":
|
||||||
|
return COinSData().transform(self.data)
|
||||||
|
case "BibTeX":
|
||||||
|
return BibTeXData().transform(self.data)
|
||||||
|
case "RIS":
|
||||||
|
return RISData().transform(self.data)
|
||||||
|
case "RDS":
|
||||||
|
return RDSData().transform(self.data).return_data(option)
|
||||||
|
case _:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def cover(isbn):
|
||||||
|
test_url = f"https://www.buchhandel.de/cover/{isbn}/{isbn}-cover-m.jpg"
|
||||||
|
data = requests.get(test_url, stream=True)
|
||||||
|
return data.content
|
||||||
|
|
||||||
|
|
||||||
|
def get_content(soup, css_class):
|
||||||
|
return soup.find("div", class_=css_class).text.strip()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
link = "CU 8500 K64"
|
||||||
|
data = WebRequest(71).get_ppn(link).get_data()
|
||||||
|
bib = BibTextTransformer("ARRAY").get_data().return_data()
|
||||||
1
tests/__init__.py
Normal file
1
tests/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Tests for the package."""
|
||||||
@@ -1,17 +1,55 @@
|
|||||||
from typing import Callable, Optional
|
"""Shared pytest fixtures for BibAPI tests."""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from bibapi import sru
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_marc_record_xml() -> str:
|
||||||
|
"""Sample MARC record XML for testing."""
|
||||||
|
return """<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<marc:record xmlns:marc="http://www.loc.gov/MARC21/slim">
|
||||||
|
<marc:leader>00000nam a22000001i 4500</marc:leader>
|
||||||
|
<marc:controlfield tag="001">123456789</marc:controlfield>
|
||||||
|
<marc:controlfield tag="005">20230101120000.0</marc:controlfield>
|
||||||
|
<marc:datafield tag="020" ind1=" " ind2=" ">
|
||||||
|
<marc:subfield code="a">9783123456789</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
<marc:datafield tag="041" ind1=" " ind2=" ">
|
||||||
|
<marc:subfield code="a">ger</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
<marc:datafield tag="245" ind1="1" ind2="0">
|
||||||
|
<marc:subfield code="a">Test Book Title</marc:subfield>
|
||||||
|
<marc:subfield code="b">A Subtitle</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
<marc:datafield tag="250" ind1=" " ind2=" ">
|
||||||
|
<marc:subfield code="a">2nd edition</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
<marc:datafield tag="264" ind1=" " ind2="1">
|
||||||
|
<marc:subfield code="a">Berlin</marc:subfield>
|
||||||
|
<marc:subfield code="b">Test Publisher</marc:subfield>
|
||||||
|
<marc:subfield code="c">2023</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
<marc:datafield tag="300" ind1=" " ind2=" ">
|
||||||
|
<marc:subfield code="a">456 pages</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
<marc:datafield tag="338" ind1=" " ind2=" ">
|
||||||
|
<marc:subfield code="a">Band</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
<marc:datafield tag="700" ind1="1" ind2=" ">
|
||||||
|
<marc:subfield code="a">Author, Test</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
<marc:datafield tag="924" ind1=" " ind2=" ">
|
||||||
|
<marc:subfield code="9">Frei 129</marc:subfield>
|
||||||
|
<marc:subfield code="g">ABC 123</marc:subfield>
|
||||||
|
<marc:subfield code="b">DE-Frei129</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
</marc:record>"""
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def sample_sru_xml() -> bytes:
|
def sample_sru_response_xml() -> bytes:
|
||||||
"""Return a small SRU searchRetrieveResponse (MARCXML) as bytes.
|
"""Sample SRU searchRetrieveResponse XML for testing."""
|
||||||
|
return b"""<?xml version="1.0" encoding="UTF-8"?>
|
||||||
Tests can use this raw bytes payload to simulate SRU responses.
|
|
||||||
"""
|
|
||||||
xml = b"""<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<zs:searchRetrieveResponse xmlns:zs="http://www.loc.gov/zing/srw/"
|
<zs:searchRetrieveResponse xmlns:zs="http://www.loc.gov/zing/srw/"
|
||||||
xmlns:marc="http://www.loc.gov/MARC21/slim">
|
xmlns:marc="http://www.loc.gov/MARC21/slim">
|
||||||
<zs:version>1.1</zs:version>
|
<zs:version>1.1</zs:version>
|
||||||
@@ -22,15 +60,35 @@ def sample_sru_xml() -> bytes:
|
|||||||
<zs:recordPacking>xml</zs:recordPacking>
|
<zs:recordPacking>xml</zs:recordPacking>
|
||||||
<zs:recordData>
|
<zs:recordData>
|
||||||
<marc:record>
|
<marc:record>
|
||||||
<marc:leader>-----nam a22</marc:leader>
|
<marc:leader>00000nam a22</marc:leader>
|
||||||
<marc:controlfield tag="001">PPN123</marc:controlfield>
|
<marc:controlfield tag="001">123456789</marc:controlfield>
|
||||||
|
<marc:datafield tag="020" ind1=" " ind2=" ">
|
||||||
|
<marc:subfield code="a">9783123456789</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
<marc:datafield tag="041" ind1=" " ind2=" ">
|
||||||
|
<marc:subfield code="a">ger</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
<marc:datafield tag="245" ind1=" " ind2=" ">
|
<marc:datafield tag="245" ind1=" " ind2=" ">
|
||||||
<marc:subfield code="a">Example Title</marc:subfield>
|
<marc:subfield code="a">Test Book</marc:subfield>
|
||||||
<marc:subfield code="b">Subtitle</marc:subfield>
|
</marc:datafield>
|
||||||
|
<marc:datafield tag="250" ind1=" " ind2=" ">
|
||||||
|
<marc:subfield code="a">1st edition</marc:subfield>
|
||||||
</marc:datafield>
|
</marc:datafield>
|
||||||
<marc:datafield tag="264" ind1=" " ind2="1">
|
<marc:datafield tag="264" ind1=" " ind2="1">
|
||||||
<marc:subfield code="c">2001</marc:subfield>
|
<marc:subfield code="b">Publisher</marc:subfield>
|
||||||
<marc:subfield code="b">Example Publisher</marc:subfield>
|
<marc:subfield code="c">2023</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
<marc:datafield tag="300" ind1=" " ind2=" ">
|
||||||
|
<marc:subfield code="a">200 pages</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
<marc:datafield tag="338" ind1=" " ind2=" ">
|
||||||
|
<marc:subfield code="a">Band</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
<marc:datafield tag="700" ind1="1" ind2=" ">
|
||||||
|
<marc:subfield code="a">Author, Test</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
<marc:datafield tag="924" ind1=" " ind2=" ">
|
||||||
|
<marc:subfield code="b">DE-Frei129</marc:subfield>
|
||||||
</marc:datafield>
|
</marc:datafield>
|
||||||
</marc:record>
|
</marc:record>
|
||||||
</zs:recordData>
|
</zs:recordData>
|
||||||
@@ -39,70 +97,55 @@ def sample_sru_xml() -> bytes:
|
|||||||
</zs:records>
|
</zs:records>
|
||||||
<zs:echoedSearchRetrieveRequest>
|
<zs:echoedSearchRetrieveRequest>
|
||||||
<zs:version>1.1</zs:version>
|
<zs:version>1.1</zs:version>
|
||||||
<zs:query>pica.tit=Example</zs:query>
|
<zs:query>pica.tit=Test</zs:query>
|
||||||
<zs:maximumRecords>10</zs:maximumRecords>
|
<zs:maximumRecords>100</zs:maximumRecords>
|
||||||
<zs:recordPacking>xml</zs:recordPacking>
|
<zs:recordPacking>xml</zs:recordPacking>
|
||||||
<zs:recordSchema>marcxml</zs:recordSchema>
|
<zs:recordSchema>marcxml</zs:recordSchema>
|
||||||
</zs:echoedSearchRetrieveRequest>
|
</zs:echoedSearchRetrieveRequest>
|
||||||
</zs:searchRetrieveResponse>
|
</zs:searchRetrieveResponse>"""
|
||||||
"""
|
|
||||||
return xml
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def sru_api_factory(monkeypatch) -> Callable[[str, Optional[bytes]], sru.Api]:
|
def mock_catalogue_html() -> str:
|
||||||
"""Factory to create an `sru.Api` (or subclass) with network calls mocked.
|
"""Sample HTML response from catalogue search."""
|
||||||
|
return """<!DOCTYPE html>
|
||||||
Usage:
|
<html>
|
||||||
def test_x(sru_api_factory, sample_sru_xml):
|
<body>
|
||||||
api = sru_api_factory('SWB', sample_sru_xml)
|
<a class="title getFull" href="/opac/record/123">Book Title</a>
|
||||||
books = api.getBooks(['pica.tit=Example'])
|
</body>
|
||||||
|
</html>"""
|
||||||
The fixture monkeypatches requests.Session.get on the Api instance to return
|
|
||||||
a fake Response with the provided bytes payload. If `response_bytes` is
|
|
||||||
None the real network call will be performed (not recommended in unit tests).
|
|
||||||
"""
|
|
||||||
|
|
||||||
def _make(site: str, response_bytes: Optional[bytes] = None) -> sru.Api:
|
|
||||||
mapping = {"SWB": sru.SWB, "DNB": sru.Api}
|
|
||||||
if site == "SWB":
|
|
||||||
api = sru.SWB()
|
|
||||||
elif site == "DNB":
|
|
||||||
# DNB Api class is the base Api configured differently in sru module
|
|
||||||
api = sru.Api(
|
|
||||||
sru.DNBData.NAME.value,
|
|
||||||
sru.DNBData.URL.value,
|
|
||||||
sru.DNBData.ARGSCHEMA.value,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# allow custom site/url/prefix via tuple passed as site: (site, url, prefix)
|
|
||||||
if isinstance(site, tuple) and len(site) == 3:
|
|
||||||
api = sru.Api(site[0], site[1], site[2])
|
|
||||||
else:
|
|
||||||
raise ValueError("Unknown site for factory: %r" % (site,))
|
|
||||||
|
|
||||||
if response_bytes is not None:
|
|
||||||
|
|
||||||
class FakeResp:
|
|
||||||
status_code = 200
|
|
||||||
|
|
||||||
def __init__(self, content: bytes):
|
|
||||||
self.content = content
|
|
||||||
|
|
||||||
def fake_get(url, headers=None, timeout=None):
|
|
||||||
return FakeResp(response_bytes)
|
|
||||||
|
|
||||||
# Patch only this instance's session.get
|
|
||||||
monkeypatch.setattr(api._session, "get", fake_get)
|
|
||||||
|
|
||||||
return api
|
|
||||||
|
|
||||||
return _make
|
|
||||||
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def sru_data():
|
def mock_catalogue_detail_html() -> str:
|
||||||
return {"bib_id": 20735, "sigil": "Frei129"}
|
"""Sample HTML response from catalogue book detail page."""
|
||||||
|
return """<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<div class="headline text">Test Book Title</div>
|
||||||
|
<div class="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PPN"></div>
|
||||||
|
<div class="col-xs-12 col-md-7 col-lg-8 rds-dl-panel">123456789</div>
|
||||||
|
<div class="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_EDITION"></div>
|
||||||
|
<div class="col-xs-12 col-md-7 col-lg-8 rds-dl-panel">2nd ed.</div>
|
||||||
|
<div class="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON"></div>
|
||||||
|
<div class="col-xs-12 col-md-7 col-lg-8 rds-dl-panel">
|
||||||
|
<a href="#">Author One</a>
|
||||||
|
<a href="#">Author Two</a>
|
||||||
|
</div>
|
||||||
|
<div class="panel-body">
|
||||||
|
<div class="rds-dl RDS_SIGNATURE">
|
||||||
|
<div class="rds-dl-panel">ABC 123</div>
|
||||||
|
</div>
|
||||||
|
<div class="rds-dl RDS_STATUS">
|
||||||
|
<div class="rds-dl-panel">Available</div>
|
||||||
|
</div>
|
||||||
|
<div class="rds-dl RDS_LOCATION">
|
||||||
|
<div class="rds-dl-panel">Main Library</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="RDS_ISBN"></div>
|
||||||
|
<div class="col-xs-12 col-md-7 col-lg-8 rds-dl-panel">9783123456789</div>
|
||||||
|
<div class="RDS_SCOPE"></div>
|
||||||
|
<div class="col-xs-12 col-md-7 col-lg-8 rds-dl-panel">300 pages</div>
|
||||||
|
</body>
|
||||||
|
</html>"""
|
||||||
|
|||||||
309
tests/test_catalogue.py
Normal file
309
tests/test_catalogue.py
Normal file
@@ -0,0 +1,309 @@
|
|||||||
|
"""Tests for the Catalogue class, which interacts with the library catalogue."""
|
||||||
|
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import requests
|
||||||
|
from pytest_mock import MockerFixture
|
||||||
|
|
||||||
|
from bibapi.catalogue import Catalogue
|
||||||
|
|
||||||
|
|
||||||
|
class TestCatalogue:
|
||||||
|
"""Tests for the Catalogue class."""
|
||||||
|
|
||||||
|
def test_catalogue_initialization(self, mocker: MockerFixture):
|
||||||
|
"""Test Catalogue initialization."""
|
||||||
|
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||||
|
catalogue = Catalogue()
|
||||||
|
assert catalogue.timeout == 15
|
||||||
|
|
||||||
|
def test_catalogue_custom_timeout(self, mocker: MockerFixture):
|
||||||
|
"""Test Catalogue initialization with custom timeout."""
|
||||||
|
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||||
|
catalogue = Catalogue(timeout=30)
|
||||||
|
assert catalogue.timeout == 30
|
||||||
|
|
||||||
|
def test_check_book_exists(self, mocker: MockerFixture):
|
||||||
|
"""Test the check_book_exists method of the Catalogue class."""
|
||||||
|
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||||
|
catalogue = Catalogue()
|
||||||
|
|
||||||
|
# Mock the get_book_links method to control its output
|
||||||
|
mocker.patch.object(
|
||||||
|
catalogue,
|
||||||
|
"get_book_links",
|
||||||
|
return_value=["link1", "link2"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Test with a known existing book
|
||||||
|
existing_book_searchterm = "1693321114"
|
||||||
|
assert catalogue.check_book_exists(existing_book_searchterm) is True
|
||||||
|
|
||||||
|
# Change the mock to return an empty list for non-existing book
|
||||||
|
mocker.patch.object(
|
||||||
|
catalogue,
|
||||||
|
"get_book_links",
|
||||||
|
return_value=[],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Test with a known non-existing book
|
||||||
|
non_existing_book_searchterm = "00000000009"
|
||||||
|
assert catalogue.check_book_exists(non_existing_book_searchterm) is False
|
||||||
|
|
||||||
|
def test_no_connection_raises_error(self, mocker: MockerFixture):
|
||||||
|
"""Test that a ConnectionError is raised with no internet connection."""
|
||||||
|
# Mock the check_connection method to simulate no internet connection
|
||||||
|
mocker.patch.object(
|
||||||
|
Catalogue,
|
||||||
|
"check_connection",
|
||||||
|
return_value=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(ConnectionError, match="No internet connection available."):
|
||||||
|
Catalogue()
|
||||||
|
|
||||||
|
def test_check_connection_success(self, mocker: MockerFixture):
|
||||||
|
"""Test check_connection returns True on success."""
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mocker.patch("requests.get", return_value=mock_response)
|
||||||
|
|
||||||
|
catalogue = Catalogue.__new__(Catalogue)
|
||||||
|
catalogue.timeout = 15
|
||||||
|
assert catalogue.check_connection() is True
|
||||||
|
|
||||||
|
def test_check_connection_failure(self, mocker: MockerFixture):
|
||||||
|
"""Test check_connection handles request exception."""
|
||||||
|
mocker.patch(
|
||||||
|
"requests.get",
|
||||||
|
side_effect=requests.exceptions.RequestException("Network error"),
|
||||||
|
)
|
||||||
|
|
||||||
|
catalogue = Catalogue.__new__(Catalogue)
|
||||||
|
catalogue.timeout = 15
|
||||||
|
result = catalogue.check_connection()
|
||||||
|
assert result is None # Returns None on exception
|
||||||
|
|
||||||
|
def test_search_book(self, mocker: MockerFixture):
|
||||||
|
"""Test search_book method."""
|
||||||
|
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.text = "<html>search results</html>"
|
||||||
|
mocker.patch("requests.get", return_value=mock_response)
|
||||||
|
|
||||||
|
catalogue = Catalogue()
|
||||||
|
result = catalogue.search_book("test search")
|
||||||
|
assert result == "<html>search results</html>"
|
||||||
|
|
||||||
|
def test_search(self, mocker: MockerFixture):
|
||||||
|
"""Test search method."""
|
||||||
|
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.text = "<html>detail page</html>"
|
||||||
|
mocker.patch("requests.get", return_value=mock_response)
|
||||||
|
|
||||||
|
catalogue = Catalogue()
|
||||||
|
result = catalogue.search("https://example.com/book/123")
|
||||||
|
assert result == "<html>detail page</html>"
|
||||||
|
|
||||||
|
def test_get_book_links(self, mocker: MockerFixture, mock_catalogue_html):
|
||||||
|
"""Test get_book_links method."""
|
||||||
|
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||||
|
mocker.patch.object(
|
||||||
|
Catalogue,
|
||||||
|
"search_book",
|
||||||
|
return_value=mock_catalogue_html,
|
||||||
|
)
|
||||||
|
|
||||||
|
catalogue = Catalogue()
|
||||||
|
links = catalogue.get_book_links("test search")
|
||||||
|
|
||||||
|
assert len(links) == 1
|
||||||
|
assert "https://rds.ibs-bw.de/opac/record/123" in links[0]
|
||||||
|
|
||||||
|
def test_in_library_with_ppn(self, mocker: MockerFixture):
|
||||||
|
"""Test in_library method with valid PPN."""
|
||||||
|
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||||
|
mocker.patch.object(
|
||||||
|
Catalogue,
|
||||||
|
"get_book_links",
|
||||||
|
return_value=["link1"],
|
||||||
|
)
|
||||||
|
|
||||||
|
catalogue = Catalogue()
|
||||||
|
assert catalogue.in_library("123456789") is True
|
||||||
|
|
||||||
|
def test_in_library_without_ppn(self, mocker: MockerFixture):
|
||||||
|
"""Test in_library method with None PPN."""
|
||||||
|
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||||
|
|
||||||
|
catalogue = Catalogue()
|
||||||
|
assert catalogue.in_library(None) is False
|
||||||
|
|
||||||
|
def test_in_library_not_found(self, mocker: MockerFixture):
|
||||||
|
"""Test in_library method when book not found."""
|
||||||
|
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||||
|
mocker.patch.object(
|
||||||
|
Catalogue,
|
||||||
|
"get_book_links",
|
||||||
|
return_value=[],
|
||||||
|
)
|
||||||
|
|
||||||
|
catalogue = Catalogue()
|
||||||
|
assert catalogue.in_library("nonexistent") is False
|
||||||
|
|
||||||
|
def test_get_location_none_ppn(self, mocker: MockerFixture):
|
||||||
|
"""Test get_location method with None PPN."""
|
||||||
|
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||||
|
|
||||||
|
catalogue = Catalogue()
|
||||||
|
assert catalogue.get_location(None) is None
|
||||||
|
|
||||||
|
def test_get_location_not_found(self, mocker: MockerFixture):
|
||||||
|
"""Test get_location when book not found."""
|
||||||
|
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||||
|
mocker.patch.object(Catalogue, "get_book", return_value=None)
|
||||||
|
|
||||||
|
catalogue = Catalogue()
|
||||||
|
assert catalogue.get_location("123") is None
|
||||||
|
|
||||||
|
def test_get_ppn(self, mocker: MockerFixture):
|
||||||
|
"""Test get_ppn method with valid PPN format."""
|
||||||
|
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||||
|
mocker.patch.object(
|
||||||
|
Catalogue,
|
||||||
|
"get_book_links",
|
||||||
|
return_value=["https://example.com/opac/record/1234567890"],
|
||||||
|
)
|
||||||
|
mocker.patch.object(Catalogue, "search", return_value="<html></html>")
|
||||||
|
|
||||||
|
catalogue = Catalogue()
|
||||||
|
ppn = catalogue.get_ppn("test")
|
||||||
|
assert ppn == "1234567890"
|
||||||
|
|
||||||
|
def test_get_ppn_with_x(self, mocker: MockerFixture):
|
||||||
|
"""Test get_ppn method with PPN ending in X."""
|
||||||
|
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||||
|
mocker.patch.object(
|
||||||
|
Catalogue,
|
||||||
|
"get_book_links",
|
||||||
|
return_value=["https://example.com/opac/record/123456789X"],
|
||||||
|
)
|
||||||
|
mocker.patch.object(Catalogue, "search", return_value="<html></html>")
|
||||||
|
|
||||||
|
catalogue = Catalogue()
|
||||||
|
ppn = catalogue.get_ppn("test")
|
||||||
|
assert ppn == "123456789X"
|
||||||
|
|
||||||
|
def test_get_semesterapparat_number(self, mocker: MockerFixture):
|
||||||
|
"""Test get_semesterapparat_number method."""
|
||||||
|
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||||
|
mocker.patch.object(
|
||||||
|
Catalogue,
|
||||||
|
"get_book_links",
|
||||||
|
return_value=["https://example.com/book"],
|
||||||
|
)
|
||||||
|
|
||||||
|
html = """<html>
|
||||||
|
<div class="col-xs-12 rds-dl RDS_LOCATION">
|
||||||
|
Semesterapparat-42
|
||||||
|
</div>
|
||||||
|
</html>"""
|
||||||
|
mocker.patch.object(Catalogue, "search", return_value=html)
|
||||||
|
|
||||||
|
catalogue = Catalogue()
|
||||||
|
result = catalogue.get_semesterapparat_number("test")
|
||||||
|
assert result == 42
|
||||||
|
|
||||||
|
def test_get_semesterapparat_number_handbibliothek(self, mocker: MockerFixture):
|
||||||
|
"""Test get_semesterapparat_number with Handbibliothek location."""
|
||||||
|
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||||
|
mocker.patch.object(
|
||||||
|
Catalogue,
|
||||||
|
"get_book_links",
|
||||||
|
return_value=["https://example.com/book"],
|
||||||
|
)
|
||||||
|
|
||||||
|
html = """<html>
|
||||||
|
<div class="col-xs-12 rds-dl RDS_LOCATION">
|
||||||
|
Floor 1
|
||||||
|
|
||||||
|
Handbibliothek-Reference
|
||||||
|
</div>
|
||||||
|
</html>"""
|
||||||
|
mocker.patch.object(Catalogue, "search", return_value=html)
|
||||||
|
|
||||||
|
catalogue = Catalogue()
|
||||||
|
result = catalogue.get_semesterapparat_number("test")
|
||||||
|
assert "Reference" in str(result) or "Handbibliothek" in str(result)
|
||||||
|
|
||||||
|
def test_get_semesterapparat_number_not_found(self, mocker: MockerFixture):
|
||||||
|
"""Test get_semesterapparat_number when not found."""
|
||||||
|
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||||
|
mocker.patch.object(Catalogue, "get_book_links", return_value=[])
|
||||||
|
|
||||||
|
catalogue = Catalogue()
|
||||||
|
result = catalogue.get_semesterapparat_number("test")
|
||||||
|
assert result == 0
|
||||||
|
|
||||||
|
def test_get_author(self, mocker: MockerFixture):
|
||||||
|
"""Test get_author method."""
|
||||||
|
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||||
|
mocker.patch.object(
|
||||||
|
Catalogue,
|
||||||
|
"get_book_links",
|
||||||
|
return_value=["https://example.com/book"],
|
||||||
|
)
|
||||||
|
|
||||||
|
html = """<html>
|
||||||
|
<div class="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON"></div>
|
||||||
|
<div class="col-xs-12 col-md-7 col-lg-8 rds-dl-panel">
|
||||||
|
<a href="#">Author One</a>
|
||||||
|
<a href="#">Author Two</a>
|
||||||
|
</div>
|
||||||
|
</html>"""
|
||||||
|
mocker.patch.object(Catalogue, "search", return_value=html)
|
||||||
|
|
||||||
|
catalogue = Catalogue()
|
||||||
|
author = catalogue.get_author("kid:123")
|
||||||
|
assert "Author One" in author
|
||||||
|
assert "Author Two" in author
|
||||||
|
assert "; " in author # Separator
|
||||||
|
|
||||||
|
def test_get_signature(self, mocker: MockerFixture):
|
||||||
|
"""Test get_signature method."""
|
||||||
|
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||||
|
mocker.patch.object(
|
||||||
|
Catalogue,
|
||||||
|
"get_book_links",
|
||||||
|
return_value=["https://example.com/book"],
|
||||||
|
)
|
||||||
|
|
||||||
|
html = """<html>
|
||||||
|
<div class="panel-body">
|
||||||
|
<div class="rds-dl RDS_SIGNATURE">
|
||||||
|
<div class="rds-dl-panel">ABC 123</div>
|
||||||
|
</div>
|
||||||
|
<div class="rds-dl RDS_STATUS">
|
||||||
|
<div class="rds-dl-panel">Available</div>
|
||||||
|
</div>
|
||||||
|
<div class="rds-dl RDS_LOCATION">
|
||||||
|
<div class="rds-dl-panel">Semesterapparat-1</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</html>"""
|
||||||
|
mocker.patch.object(Catalogue, "search", return_value=html)
|
||||||
|
|
||||||
|
catalogue = Catalogue()
|
||||||
|
signature = catalogue.get_signature("9783123456789")
|
||||||
|
assert signature == "ABC 123"
|
||||||
|
|
||||||
|
def test_get_signature_not_found(self, mocker: MockerFixture):
|
||||||
|
"""Test get_signature when not found."""
|
||||||
|
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||||
|
mocker.patch.object(Catalogue, "get_book_links", return_value=[])
|
||||||
|
|
||||||
|
catalogue = Catalogue()
|
||||||
|
signature = catalogue.get_signature("nonexistent")
|
||||||
|
assert signature is None
|
||||||
112
tests/test_init.py
Normal file
112
tests/test_init.py
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
"""Tests for the __init__.py wrapper classes."""
|
||||||
|
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from bibapi import DNB, HBZ, HEBIS, KOBV, OEVK, SWB
|
||||||
|
from bibapi.schemas.api_types import (
|
||||||
|
ALMASchema,
|
||||||
|
DublinCoreSchema,
|
||||||
|
PicaSchema,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestSWBWrapper:
|
||||||
|
"""Tests for the SWB wrapper class."""
|
||||||
|
|
||||||
|
def test_swb_initialization(self):
|
||||||
|
"""Test SWB initializes with correct config."""
|
||||||
|
api = SWB()
|
||||||
|
assert api.site == "SWB"
|
||||||
|
assert "sru.k10plus.de" in api.url
|
||||||
|
assert api.prefix == PicaSchema
|
||||||
|
assert api.library_identifier == "924$b"
|
||||||
|
api.close()
|
||||||
|
|
||||||
|
@patch.object(requests.Session, "get")
|
||||||
|
def test_swb_getbooks(self, mock_get):
|
||||||
|
"""Test SWB getBooks method."""
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.content = b"""<?xml version="1.0"?>
|
||||||
|
<zs:searchRetrieveResponse xmlns:zs="http://www.loc.gov/zing/srw/">
|
||||||
|
<zs:version>1.1</zs:version>
|
||||||
|
<zs:numberOfRecords>0</zs:numberOfRecords>
|
||||||
|
</zs:searchRetrieveResponse>"""
|
||||||
|
mock_get.return_value = mock_response
|
||||||
|
|
||||||
|
api = SWB()
|
||||||
|
books = api.getBooks(["TITLE=Test"])
|
||||||
|
assert isinstance(books, list)
|
||||||
|
api.close()
|
||||||
|
|
||||||
|
|
||||||
|
class TestDNBWrapper:
|
||||||
|
"""Tests for the DNB wrapper class."""
|
||||||
|
|
||||||
|
def test_dnb_initialization(self):
|
||||||
|
"""Test DNB initializes with correct config.
|
||||||
|
|
||||||
|
Note: DNB class has a bug - it doesn't set library_identifier before
|
||||||
|
calling super().__init__. This test documents the bug.
|
||||||
|
"""
|
||||||
|
# DNB has a bug - library_identifier is not set
|
||||||
|
with pytest.raises(AttributeError, match="library_identifier"):
|
||||||
|
api = DNB()
|
||||||
|
|
||||||
|
|
||||||
|
class TestKOBVWrapper:
|
||||||
|
"""Tests for the KOBV wrapper class."""
|
||||||
|
|
||||||
|
def test_kobv_initialization(self):
|
||||||
|
"""Test KOBV initializes with correct config."""
|
||||||
|
api = KOBV()
|
||||||
|
assert api.site == "KOBV"
|
||||||
|
assert "sru.kobv.de" in api.url
|
||||||
|
assert api.prefix == DublinCoreSchema
|
||||||
|
assert api.library_identifier == "924$b"
|
||||||
|
api.close()
|
||||||
|
|
||||||
|
|
||||||
|
class TestHEBISWrapper:
|
||||||
|
"""Tests for the HEBIS wrapper class."""
|
||||||
|
|
||||||
|
def test_hebis_initialization(self):
|
||||||
|
"""Test HEBIS initializes with correct config."""
|
||||||
|
api = HEBIS()
|
||||||
|
assert api.site == "HEBIS"
|
||||||
|
assert "sru.hebis.de" in api.url
|
||||||
|
assert api.prefix == PicaSchema
|
||||||
|
assert api.library_identifier == "924$b"
|
||||||
|
# HEBIS has specific replace patterns
|
||||||
|
assert " " in api.replace
|
||||||
|
# HEBIS has unsupported args
|
||||||
|
assert "YEAR" in api.notsupported_args
|
||||||
|
api.close()
|
||||||
|
|
||||||
|
|
||||||
|
class TestOEVKWrapper:
|
||||||
|
"""Tests for the OEVK wrapper class."""
|
||||||
|
|
||||||
|
def test_oevk_initialization(self):
|
||||||
|
"""Test OEVK initializes with correct config."""
|
||||||
|
api = OEVK()
|
||||||
|
assert api.site == "OEVK"
|
||||||
|
assert api.prefix == PicaSchema
|
||||||
|
assert api.library_identifier == "924$b"
|
||||||
|
api.close()
|
||||||
|
|
||||||
|
|
||||||
|
class TestHBZWrapper:
|
||||||
|
"""Tests for the HBZ wrapper class."""
|
||||||
|
|
||||||
|
def test_hbz_initialization(self):
|
||||||
|
"""Test HBZ initializes with correct config."""
|
||||||
|
api = HBZ()
|
||||||
|
assert api.site == "HBZ"
|
||||||
|
assert "alma.exlibrisgroup.com" in api.url
|
||||||
|
assert api.prefix == ALMASchema
|
||||||
|
assert api.library_identifier == "852$a"
|
||||||
|
api.close()
|
||||||
486
tests/test_marcxml_parser.py
Normal file
486
tests/test_marcxml_parser.py
Normal file
@@ -0,0 +1,486 @@
|
|||||||
|
"""Tests for MARCXML parsing functions in sru.py."""
|
||||||
|
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from bibapi.schemas.marcxml import (
|
||||||
|
DataField,
|
||||||
|
SubField,
|
||||||
|
)
|
||||||
|
from bibapi.sru import (
|
||||||
|
_smart_join_title,
|
||||||
|
_text,
|
||||||
|
controlfield_value,
|
||||||
|
datafield_value,
|
||||||
|
datafields_value,
|
||||||
|
find_datafields_with_subfields,
|
||||||
|
first_subfield_value,
|
||||||
|
first_subfield_value_from_fields,
|
||||||
|
iter_datafields,
|
||||||
|
parse_marc_record,
|
||||||
|
parse_search_retrieve_response,
|
||||||
|
subfield_values,
|
||||||
|
subfield_values_from_fields,
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- Fixtures for sample XML data ---
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def minimal_marc_xml() -> str:
|
||||||
|
"""Minimal MARC record XML string."""
|
||||||
|
return """<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<marc:record xmlns:marc="http://www.loc.gov/MARC21/slim">
|
||||||
|
<marc:leader>00000nam a22000001i 4500</marc:leader>
|
||||||
|
<marc:controlfield tag="001">PPN12345</marc:controlfield>
|
||||||
|
<marc:controlfield tag="005">20230101120000.0</marc:controlfield>
|
||||||
|
<marc:datafield tag="245" ind1="1" ind2="0">
|
||||||
|
<marc:subfield code="a">Test Title</marc:subfield>
|
||||||
|
<marc:subfield code="b">A Subtitle</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
</marc:record>"""
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def full_marc_xml() -> str:
|
||||||
|
"""More complete MARC record for testing."""
|
||||||
|
return """<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<marc:record xmlns:marc="http://www.loc.gov/MARC21/slim">
|
||||||
|
<marc:leader>00000nam a22000001i 4500</marc:leader>
|
||||||
|
<marc:controlfield tag="001">PPN98765</marc:controlfield>
|
||||||
|
<marc:controlfield tag="005">20231215150000.0</marc:controlfield>
|
||||||
|
<marc:controlfield tag="008">230101s2023 gw 000 0 ger d</marc:controlfield>
|
||||||
|
<marc:datafield tag="020" ind1=" " ind2=" ">
|
||||||
|
<marc:subfield code="a">9783123456789</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
<marc:datafield tag="020" ind1=" " ind2=" ">
|
||||||
|
<marc:subfield code="a">9783987654321</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
<marc:datafield tag="041" ind1=" " ind2=" ">
|
||||||
|
<marc:subfield code="a">ger</marc:subfield>
|
||||||
|
<marc:subfield code="a">eng</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
<marc:datafield tag="245" ind1="1" ind2="0">
|
||||||
|
<marc:subfield code="a">Comprehensive Test Book</marc:subfield>
|
||||||
|
<marc:subfield code="b">With Many Details</marc:subfield>
|
||||||
|
<marc:subfield code="c">by Author Name</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
<marc:datafield tag="250" ind1=" " ind2=" ">
|
||||||
|
<marc:subfield code="a">3rd edition</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
<marc:datafield tag="264" ind1=" " ind2="1">
|
||||||
|
<marc:subfield code="a">Berlin</marc:subfield>
|
||||||
|
<marc:subfield code="b">Test Publisher</marc:subfield>
|
||||||
|
<marc:subfield code="c">2023</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
<marc:datafield tag="300" ind1=" " ind2=" ">
|
||||||
|
<marc:subfield code="a">456 pages</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
<marc:datafield tag="338" ind1=" " ind2=" ">
|
||||||
|
<marc:subfield code="a">Band</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
<marc:datafield tag="700" ind1="1" ind2=" ">
|
||||||
|
<marc:subfield code="a">Author, First</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
<marc:datafield tag="700" ind1="1" ind2=" ">
|
||||||
|
<marc:subfield code="a">Author, Second</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
<marc:datafield tag="924" ind1=" " ind2=" ">
|
||||||
|
<marc:subfield code="9">Frei 129</marc:subfield>
|
||||||
|
<marc:subfield code="g">ABC 123</marc:subfield>
|
||||||
|
<marc:subfield code="b">DE-Frei129</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
</marc:record>"""
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sru_response_xml() -> bytes:
|
||||||
|
"""Complete SRU searchRetrieveResponse XML."""
|
||||||
|
return b"""<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<zs:searchRetrieveResponse xmlns:zs="http://www.loc.gov/zing/srw/"
|
||||||
|
xmlns:marc="http://www.loc.gov/MARC21/slim">
|
||||||
|
<zs:version>1.1</zs:version>
|
||||||
|
<zs:numberOfRecords>2</zs:numberOfRecords>
|
||||||
|
<zs:records>
|
||||||
|
<zs:record>
|
||||||
|
<zs:recordSchema>marcxml</zs:recordSchema>
|
||||||
|
<zs:recordPacking>xml</zs:recordPacking>
|
||||||
|
<zs:recordData>
|
||||||
|
<marc:record>
|
||||||
|
<marc:leader>00000nam a22</marc:leader>
|
||||||
|
<marc:controlfield tag="001">PPN001</marc:controlfield>
|
||||||
|
<marc:datafield tag="245" ind1=" " ind2=" ">
|
||||||
|
<marc:subfield code="a">First Book</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
</marc:record>
|
||||||
|
</zs:recordData>
|
||||||
|
<zs:recordPosition>1</zs:recordPosition>
|
||||||
|
</zs:record>
|
||||||
|
<zs:record>
|
||||||
|
<zs:recordSchema>marcxml</zs:recordSchema>
|
||||||
|
<zs:recordPacking>xml</zs:recordPacking>
|
||||||
|
<zs:recordData>
|
||||||
|
<marc:record>
|
||||||
|
<marc:leader>00000nam a22</marc:leader>
|
||||||
|
<marc:controlfield tag="001">PPN002</marc:controlfield>
|
||||||
|
<marc:datafield tag="245" ind1=" " ind2=" ">
|
||||||
|
<marc:subfield code="a">Second Book</marc:subfield>
|
||||||
|
</marc:datafield>
|
||||||
|
</marc:record>
|
||||||
|
</zs:recordData>
|
||||||
|
<zs:recordPosition>2</zs:recordPosition>
|
||||||
|
</zs:record>
|
||||||
|
</zs:records>
|
||||||
|
<zs:echoedSearchRetrieveRequest>
|
||||||
|
<zs:version>1.1</zs:version>
|
||||||
|
<zs:query>pica.tit=Test</zs:query>
|
||||||
|
<zs:maximumRecords>100</zs:maximumRecords>
|
||||||
|
<zs:recordPacking>xml</zs:recordPacking>
|
||||||
|
<zs:recordSchema>marcxml</zs:recordSchema>
|
||||||
|
</zs:echoedSearchRetrieveRequest>
|
||||||
|
</zs:searchRetrieveResponse>"""
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sru_response_no_records() -> bytes:
|
||||||
|
"""SRU response with zero records."""
|
||||||
|
return b"""<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<zs:searchRetrieveResponse xmlns:zs="http://www.loc.gov/zing/srw/">
|
||||||
|
<zs:version>1.1</zs:version>
|
||||||
|
<zs:numberOfRecords>0</zs:numberOfRecords>
|
||||||
|
</zs:searchRetrieveResponse>"""
|
||||||
|
|
||||||
|
|
||||||
|
# --- Tests for _text helper ---
|
||||||
|
|
||||||
|
|
||||||
|
class TestTextHelper:
|
||||||
|
def test_text_with_element_and_text(self):
|
||||||
|
elem = ET.fromstring("<tag>Hello</tag>")
|
||||||
|
assert _text(elem) == "Hello"
|
||||||
|
|
||||||
|
def test_text_with_element_no_text(self):
|
||||||
|
elem = ET.fromstring("<tag></tag>")
|
||||||
|
assert _text(elem) == ""
|
||||||
|
|
||||||
|
def test_text_with_none(self):
|
||||||
|
assert _text(None) == ""
|
||||||
|
|
||||||
|
def test_text_with_whitespace(self):
|
||||||
|
elem = ET.fromstring("<tag> spaced </tag>")
|
||||||
|
assert _text(elem) == " spaced "
|
||||||
|
|
||||||
|
|
||||||
|
# --- Tests for parse_marc_record ---
|
||||||
|
|
||||||
|
|
||||||
|
class TestParseMarcRecord:
|
||||||
|
def test_parse_minimal_record(self, minimal_marc_xml):
|
||||||
|
root = ET.fromstring(minimal_marc_xml)
|
||||||
|
record = parse_marc_record(root)
|
||||||
|
|
||||||
|
assert record.leader == "00000nam a22000001i 4500"
|
||||||
|
assert len(record.controlfields) == 2
|
||||||
|
assert record.controlfields[0].tag == "001"
|
||||||
|
assert record.controlfields[0].value == "PPN12345"
|
||||||
|
|
||||||
|
def test_parse_datafields(self, minimal_marc_xml):
|
||||||
|
root = ET.fromstring(minimal_marc_xml)
|
||||||
|
record = parse_marc_record(root)
|
||||||
|
|
||||||
|
assert len(record.datafields) == 1
|
||||||
|
df = record.datafields[0]
|
||||||
|
assert df.tag == "245"
|
||||||
|
assert df.ind1 == "1"
|
||||||
|
assert df.ind2 == "0"
|
||||||
|
assert len(df.subfields) == 2
|
||||||
|
assert df.subfields[0].code == "a"
|
||||||
|
assert df.subfields[0].value == "Test Title"
|
||||||
|
|
||||||
|
def test_parse_full_record(self, full_marc_xml):
|
||||||
|
root = ET.fromstring(full_marc_xml)
|
||||||
|
record = parse_marc_record(root)
|
||||||
|
|
||||||
|
assert len(record.controlfields) == 3
|
||||||
|
# Check multiple datafields
|
||||||
|
tags = [df.tag for df in record.datafields]
|
||||||
|
assert "020" in tags
|
||||||
|
assert "245" in tags
|
||||||
|
assert "700" in tags
|
||||||
|
assert "924" in tags
|
||||||
|
|
||||||
|
def test_parse_multiple_subfields_same_code(self, full_marc_xml):
|
||||||
|
root = ET.fromstring(full_marc_xml)
|
||||||
|
record = parse_marc_record(root)
|
||||||
|
|
||||||
|
# Find 041 field with multiple $a subfields
|
||||||
|
df_041 = next(df for df in record.datafields if df.tag == "041")
|
||||||
|
a_values = [sf.value for sf in df_041.subfields if sf.code == "a"]
|
||||||
|
assert a_values == ["ger", "eng"]
|
||||||
|
|
||||||
|
|
||||||
|
# --- Tests for parse_search_retrieve_response ---
|
||||||
|
|
||||||
|
|
||||||
|
class TestParseSearchRetrieveResponse:
|
||||||
|
def test_parse_response_with_records(self, sru_response_xml):
|
||||||
|
response = parse_search_retrieve_response(sru_response_xml)
|
||||||
|
|
||||||
|
assert response.version == "1.1"
|
||||||
|
assert response.numberOfRecords == 2
|
||||||
|
assert len(response.records) == 2
|
||||||
|
|
||||||
|
def test_parse_response_record_details(self, sru_response_xml):
|
||||||
|
response = parse_search_retrieve_response(sru_response_xml)
|
||||||
|
|
||||||
|
rec1 = response.records[0]
|
||||||
|
assert rec1.recordSchema == "marcxml"
|
||||||
|
assert rec1.recordPacking == "xml"
|
||||||
|
assert rec1.recordPosition == 1
|
||||||
|
assert controlfield_value(rec1.recordData, "001") == "PPN001"
|
||||||
|
|
||||||
|
def test_parse_response_no_records(self, sru_response_no_records):
|
||||||
|
response = parse_search_retrieve_response(sru_response_no_records)
|
||||||
|
|
||||||
|
assert response.version == "1.1"
|
||||||
|
assert response.numberOfRecords == 0
|
||||||
|
assert len(response.records) == 0
|
||||||
|
|
||||||
|
def test_parse_echoed_request(self, sru_response_xml):
|
||||||
|
response = parse_search_retrieve_response(sru_response_xml)
|
||||||
|
|
||||||
|
echoed = response.echoedSearchRetrieveRequest
|
||||||
|
assert echoed is not None
|
||||||
|
assert echoed.version == "1.1"
|
||||||
|
assert echoed.query == "pica.tit=Test"
|
||||||
|
assert echoed.maximumRecords == 100
|
||||||
|
assert echoed.recordSchema == "marcxml"
|
||||||
|
|
||||||
|
def test_parse_response_as_string(self, sru_response_xml):
|
||||||
|
# Should also work with string input
|
||||||
|
response = parse_search_retrieve_response(sru_response_xml.decode("utf-8"))
|
||||||
|
assert response.numberOfRecords == 2
|
||||||
|
|
||||||
|
|
||||||
|
# --- Tests for query helper functions ---
|
||||||
|
|
||||||
|
|
||||||
|
class TestIterDatafields:
|
||||||
|
def test_iter_all_datafields(self, full_marc_xml):
|
||||||
|
root = ET.fromstring(full_marc_xml)
|
||||||
|
record = parse_marc_record(root)
|
||||||
|
|
||||||
|
all_fields = list(iter_datafields(record))
|
||||||
|
assert len(all_fields) == len(record.datafields)
|
||||||
|
|
||||||
|
def test_iter_datafields_by_tag(self, full_marc_xml):
|
||||||
|
root = ET.fromstring(full_marc_xml)
|
||||||
|
record = parse_marc_record(root)
|
||||||
|
|
||||||
|
fields_020 = list(iter_datafields(record, tag="020"))
|
||||||
|
assert len(fields_020) == 2 # Two ISBN fields
|
||||||
|
|
||||||
|
def test_iter_datafields_by_indicator(self, full_marc_xml):
|
||||||
|
root = ET.fromstring(full_marc_xml)
|
||||||
|
record = parse_marc_record(root)
|
||||||
|
|
||||||
|
fields = list(iter_datafields(record, tag="264", ind2="1"))
|
||||||
|
assert len(fields) == 1
|
||||||
|
|
||||||
|
|
||||||
|
class TestSubfieldValues:
|
||||||
|
def test_subfield_values_single(self, full_marc_xml):
|
||||||
|
root = ET.fromstring(full_marc_xml)
|
||||||
|
record = parse_marc_record(root)
|
||||||
|
|
||||||
|
values = subfield_values(record, "245", "a")
|
||||||
|
assert values == ["Comprehensive Test Book"]
|
||||||
|
|
||||||
|
def test_subfield_values_multiple(self, full_marc_xml):
|
||||||
|
root = ET.fromstring(full_marc_xml)
|
||||||
|
record = parse_marc_record(root)
|
||||||
|
|
||||||
|
# Multiple ISBN values
|
||||||
|
values = subfield_values(record, "020", "a")
|
||||||
|
assert len(values) == 2
|
||||||
|
assert "9783123456789" in values
|
||||||
|
assert "9783987654321" in values
|
||||||
|
|
||||||
|
def test_subfield_values_empty(self, full_marc_xml):
|
||||||
|
root = ET.fromstring(full_marc_xml)
|
||||||
|
record = parse_marc_record(root)
|
||||||
|
|
||||||
|
values = subfield_values(record, "999", "x")
|
||||||
|
assert values == []
|
||||||
|
|
||||||
|
|
||||||
|
class TestFirstSubfieldValue:
|
||||||
|
def test_first_subfield_value_found(self, full_marc_xml):
|
||||||
|
root = ET.fromstring(full_marc_xml)
|
||||||
|
record = parse_marc_record(root)
|
||||||
|
|
||||||
|
value = first_subfield_value(record, "245", "a")
|
||||||
|
assert value == "Comprehensive Test Book"
|
||||||
|
|
||||||
|
def test_first_subfield_value_not_found(self, full_marc_xml):
|
||||||
|
root = ET.fromstring(full_marc_xml)
|
||||||
|
record = parse_marc_record(root)
|
||||||
|
|
||||||
|
value = first_subfield_value(record, "999", "x")
|
||||||
|
assert value is None
|
||||||
|
|
||||||
|
def test_first_subfield_value_with_default(self, full_marc_xml):
|
||||||
|
root = ET.fromstring(full_marc_xml)
|
||||||
|
record = parse_marc_record(root)
|
||||||
|
|
||||||
|
value = first_subfield_value(record, "999", "x", default="N/A")
|
||||||
|
assert value == "N/A"
|
||||||
|
|
||||||
|
def test_first_subfield_value_with_indicator(self, full_marc_xml):
|
||||||
|
root = ET.fromstring(full_marc_xml)
|
||||||
|
record = parse_marc_record(root)
|
||||||
|
|
||||||
|
value = first_subfield_value(record, "264", "c", ind2="1")
|
||||||
|
assert value == "2023"
|
||||||
|
|
||||||
|
|
||||||
|
class TestControlFieldValue:
|
||||||
|
def test_controlfield_value_found(self, full_marc_xml):
|
||||||
|
root = ET.fromstring(full_marc_xml)
|
||||||
|
record = parse_marc_record(root)
|
||||||
|
|
||||||
|
value = controlfield_value(record, "001")
|
||||||
|
assert value == "PPN98765"
|
||||||
|
|
||||||
|
def test_controlfield_value_not_found(self, full_marc_xml):
|
||||||
|
root = ET.fromstring(full_marc_xml)
|
||||||
|
record = parse_marc_record(root)
|
||||||
|
|
||||||
|
value = controlfield_value(record, "999")
|
||||||
|
assert value is None
|
||||||
|
|
||||||
|
def test_controlfield_value_with_default(self, full_marc_xml):
|
||||||
|
root = ET.fromstring(full_marc_xml)
|
||||||
|
record = parse_marc_record(root)
|
||||||
|
|
||||||
|
value = controlfield_value(record, "999", default="unknown")
|
||||||
|
assert value == "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
class TestFindDatafieldsWithSubfields:
|
||||||
|
def test_find_with_where_all(self, full_marc_xml):
|
||||||
|
root = ET.fromstring(full_marc_xml)
|
||||||
|
record = parse_marc_record(root)
|
||||||
|
|
||||||
|
fields = find_datafields_with_subfields(
|
||||||
|
record,
|
||||||
|
"924",
|
||||||
|
where_all={"9": "Frei 129"},
|
||||||
|
)
|
||||||
|
assert len(fields) == 1
|
||||||
|
assert fields[0].tag == "924"
|
||||||
|
|
||||||
|
def test_find_with_where_all_not_found(self, full_marc_xml):
|
||||||
|
root = ET.fromstring(full_marc_xml)
|
||||||
|
record = parse_marc_record(root)
|
||||||
|
|
||||||
|
fields = find_datafields_with_subfields(
|
||||||
|
record,
|
||||||
|
"924",
|
||||||
|
where_all={"9": "NonExistent"},
|
||||||
|
)
|
||||||
|
assert len(fields) == 0
|
||||||
|
|
||||||
|
def test_find_with_casefold(self, full_marc_xml):
|
||||||
|
root = ET.fromstring(full_marc_xml)
|
||||||
|
record = parse_marc_record(root)
|
||||||
|
|
||||||
|
fields = find_datafields_with_subfields(
|
||||||
|
record,
|
||||||
|
"924",
|
||||||
|
where_all={"9": "frei 129"}, # lowercase
|
||||||
|
casefold=True,
|
||||||
|
)
|
||||||
|
assert len(fields) == 1
|
||||||
|
|
||||||
|
|
||||||
|
class TestDatafieldValue:
|
||||||
|
def test_datafield_value_found(self):
|
||||||
|
df = DataField(
|
||||||
|
tag="245",
|
||||||
|
subfields=[
|
||||||
|
SubField(code="a", value="Title"),
|
||||||
|
SubField(code="b", value="Subtitle"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
assert datafield_value(df, "a") == "Title"
|
||||||
|
assert datafield_value(df, "b") == "Subtitle"
|
||||||
|
|
||||||
|
def test_datafield_value_not_found(self):
|
||||||
|
df = DataField(tag="245", subfields=[SubField(code="a", value="Title")])
|
||||||
|
assert datafield_value(df, "z") is None
|
||||||
|
|
||||||
|
def test_datafield_value_with_default(self):
|
||||||
|
df = DataField(tag="245", subfields=[])
|
||||||
|
assert datafield_value(df, "a", default="N/A") == "N/A"
|
||||||
|
|
||||||
|
|
||||||
|
class TestDatafieldsValue:
|
||||||
|
def test_datafields_value_found(self):
|
||||||
|
fields = [
|
||||||
|
DataField(tag="700", subfields=[SubField(code="a", value="Author One")]),
|
||||||
|
DataField(tag="700", subfields=[SubField(code="a", value="Author Two")]),
|
||||||
|
]
|
||||||
|
assert datafields_value(fields, "a") == "Author One"
|
||||||
|
|
||||||
|
def test_datafields_value_empty_list(self):
|
||||||
|
assert datafields_value([], "a") is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestSubfieldValuesFromFields:
|
||||||
|
def test_values_from_multiple_fields(self):
|
||||||
|
fields = [
|
||||||
|
DataField(tag="700", subfields=[SubField(code="a", value="Author One")]),
|
||||||
|
DataField(tag="700", subfields=[SubField(code="a", value="Author Two")]),
|
||||||
|
]
|
||||||
|
values = subfield_values_from_fields(fields, "a")
|
||||||
|
assert values == ["Author One", "Author Two"]
|
||||||
|
|
||||||
|
|
||||||
|
class TestFirstSubfieldValueFromFields:
|
||||||
|
def test_first_value_from_fields(self):
|
||||||
|
fields = [
|
||||||
|
DataField(tag="700", subfields=[SubField(code="a", value="First")]),
|
||||||
|
DataField(tag="700", subfields=[SubField(code="a", value="Second")]),
|
||||||
|
]
|
||||||
|
assert first_subfield_value_from_fields(fields, "a") == "First"
|
||||||
|
|
||||||
|
|
||||||
|
# --- Tests for _smart_join_title ---
|
||||||
|
|
||||||
|
|
||||||
|
class TestSmartJoinTitle:
|
||||||
|
def test_join_with_subtitle(self):
|
||||||
|
result = _smart_join_title("Main Title", "Subtitle")
|
||||||
|
assert result == "Main Title : Subtitle"
|
||||||
|
|
||||||
|
def test_join_without_subtitle(self):
|
||||||
|
result = _smart_join_title("Main Title", None)
|
||||||
|
assert result == "Main Title"
|
||||||
|
|
||||||
|
def test_join_with_empty_subtitle(self):
|
||||||
|
result = _smart_join_title("Main Title", "")
|
||||||
|
assert result == "Main Title"
|
||||||
|
|
||||||
|
def test_join_with_existing_colon(self):
|
||||||
|
result = _smart_join_title("Main Title:", "Subtitle")
|
||||||
|
assert result == "Main Title: Subtitle"
|
||||||
|
|
||||||
|
def test_join_with_existing_semicolon(self):
|
||||||
|
result = _smart_join_title("Main Title;", "More")
|
||||||
|
assert result == "Main Title; More"
|
||||||
|
|
||||||
|
def test_join_strips_whitespace(self):
|
||||||
|
result = _smart_join_title(" Main Title ", " Subtitle ")
|
||||||
|
assert result == "Main Title : Subtitle"
|
||||||
244
tests/test_schemas.py
Normal file
244
tests/test_schemas.py
Normal file
@@ -0,0 +1,244 @@
|
|||||||
|
"""Tests for schema modules."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from bibapi.schemas.api_types import (
|
||||||
|
ALMASchema,
|
||||||
|
DNBSchema,
|
||||||
|
DublinCoreSchema,
|
||||||
|
HBZSchema,
|
||||||
|
HebisSchema,
|
||||||
|
KOBVSchema,
|
||||||
|
OEVKSchema,
|
||||||
|
PicaSchema,
|
||||||
|
SWBSchema,
|
||||||
|
)
|
||||||
|
from bibapi.schemas.bookdata import BookData
|
||||||
|
from bibapi.schemas.errors import BibAPIError, CatalogueError, NetworkError
|
||||||
|
from bibapi.sru import QueryTransformer
|
||||||
|
|
||||||
|
# --- QueryTransformer tests with different schemas ---
|
||||||
|
|
||||||
|
arguments = [
|
||||||
|
"TITLE=Java ist auch eine Insel",
|
||||||
|
"AUTHOR=Ullenboom, Christian",
|
||||||
|
"YEAR=2020",
|
||||||
|
"PPN=1693321114",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_pica_schema():
|
||||||
|
transformer = QueryTransformer(PicaSchema, arguments)
|
||||||
|
transformed = transformer.transform()
|
||||||
|
assert len(transformed) == 4
|
||||||
|
assert transformed[0].startswith(PicaSchema.TITLE.value)
|
||||||
|
assert transformed[1].startswith(PicaSchema.AUTHOR.value)
|
||||||
|
assert transformed[2].startswith(PicaSchema.YEAR.value)
|
||||||
|
assert transformed[3].startswith(PicaSchema.PPN.value)
|
||||||
|
|
||||||
|
|
||||||
|
def test_alma_schema():
|
||||||
|
transformer = QueryTransformer(ALMASchema, arguments)
|
||||||
|
transformed = transformer.transform()
|
||||||
|
assert len(transformed) == 3 # PPN is not supported
|
||||||
|
assert transformed[0].startswith(ALMASchema.TITLE.value)
|
||||||
|
assert transformed[1].startswith(ALMASchema.AUTHOR.value)
|
||||||
|
assert transformed[2].startswith(ALMASchema.YEAR.value)
|
||||||
|
|
||||||
|
|
||||||
|
def test_dublin_core_schema():
|
||||||
|
transformer = QueryTransformer(DublinCoreSchema, arguments)
|
||||||
|
transformed = transformer.transform()
|
||||||
|
assert len(transformed) == 3 # YEAR is supported, PPN is not
|
||||||
|
assert transformed[0].startswith(DublinCoreSchema.TITLE.value)
|
||||||
|
assert transformed[1].startswith(DublinCoreSchema.AUTHOR.value)
|
||||||
|
assert transformed[2].startswith(DublinCoreSchema.YEAR.value)
|
||||||
|
|
||||||
|
|
||||||
|
# --- API Schema configuration tests ---
|
||||||
|
|
||||||
|
|
||||||
|
class TestApiSchemas:
|
||||||
|
"""Tests for API schema configurations."""
|
||||||
|
|
||||||
|
def test_swb_schema_config(self):
|
||||||
|
"""Test SWB schema configuration."""
|
||||||
|
assert SWBSchema.NAME.value == "SWB"
|
||||||
|
assert "sru.k10plus.de" in SWBSchema.URL.value
|
||||||
|
assert SWBSchema.ARGSCHEMA.value == PicaSchema
|
||||||
|
assert SWBSchema.LIBRARY_NAME_LOCATION_FIELD.value == "924$b"
|
||||||
|
|
||||||
|
def test_dnb_schema_config(self):
|
||||||
|
"""Test DNB schema configuration."""
|
||||||
|
assert DNBSchema.NAME.value == "DNB"
|
||||||
|
assert "services.dnb.de" in DNBSchema.URL.value
|
||||||
|
assert DNBSchema.ARGSCHEMA.value == DublinCoreSchema
|
||||||
|
|
||||||
|
def test_kobv_schema_config(self):
|
||||||
|
"""Test KOBV schema configuration."""
|
||||||
|
assert KOBVSchema.NAME.value == "KOBV"
|
||||||
|
assert "sru.kobv.de" in KOBVSchema.URL.value
|
||||||
|
assert KOBVSchema.ARGSCHEMA.value == DublinCoreSchema
|
||||||
|
|
||||||
|
def test_hebis_schema_config(self):
|
||||||
|
"""Test HEBIS schema configuration."""
|
||||||
|
assert HebisSchema.NAME.value == "HEBIS"
|
||||||
|
assert "sru.hebis.de" in HebisSchema.URL.value
|
||||||
|
assert HebisSchema.ARGSCHEMA.value == PicaSchema
|
||||||
|
# HEBIS has specific character replacements
|
||||||
|
assert " " in HebisSchema.REPLACE.value
|
||||||
|
|
||||||
|
def test_oevk_schema_config(self):
|
||||||
|
"""Test OEVK schema configuration."""
|
||||||
|
assert OEVKSchema.NAME.value == "OEVK"
|
||||||
|
assert OEVKSchema.ARGSCHEMA.value == PicaSchema
|
||||||
|
|
||||||
|
def test_hbz_schema_config(self):
|
||||||
|
"""Test HBZ schema configuration."""
|
||||||
|
assert HBZSchema.NAME.value == "HBZ"
|
||||||
|
assert HBZSchema.ARGSCHEMA.value == ALMASchema
|
||||||
|
assert HBZSchema.LIBRARY_NAME_LOCATION_FIELD.value == "852$a"
|
||||||
|
# HBZ doesn't support PPN
|
||||||
|
assert "PPN" in HBZSchema.NOTSUPPORTEDARGS.value
|
||||||
|
|
||||||
|
|
||||||
|
# --- BookData tests ---
|
||||||
|
|
||||||
|
|
||||||
|
class TestBookData:
|
||||||
|
"""Tests for the BookData class."""
|
||||||
|
|
||||||
|
def test_bookdata_creation_defaults(self):
|
||||||
|
"""Test BookData creation with defaults."""
|
||||||
|
book = BookData()
|
||||||
|
assert book.ppn is None
|
||||||
|
assert book.title is None
|
||||||
|
assert book.in_apparat is False
|
||||||
|
assert book.in_library is False
|
||||||
|
|
||||||
|
def test_bookdata_creation_with_values(self):
|
||||||
|
"""Test BookData creation with values."""
|
||||||
|
book = BookData(
|
||||||
|
ppn="123456",
|
||||||
|
title="Test Book",
|
||||||
|
signature="ABC 123",
|
||||||
|
year=2023,
|
||||||
|
isbn=["9783123456789"],
|
||||||
|
)
|
||||||
|
assert book.ppn == "123456"
|
||||||
|
assert book.title == "Test Book"
|
||||||
|
assert book.signature == "ABC 123"
|
||||||
|
assert book.year == "2023" # Converted to string without non-digits
|
||||||
|
assert book.in_library is True # Because signature exists
|
||||||
|
|
||||||
|
def test_bookdata_post_init_year_cleaning(self):
|
||||||
|
"""Test that year is cleaned of non-digits."""
|
||||||
|
book = BookData(year="2023 [erschienen]")
|
||||||
|
assert book.year == "2023"
|
||||||
|
|
||||||
|
def test_bookdata_post_init_language_normalization(self):
|
||||||
|
"""Test language list normalization."""
|
||||||
|
book = BookData(language=["ger", "eng", " fra "])
|
||||||
|
assert book.language == "ger,eng,fra"
|
||||||
|
|
||||||
|
def test_bookdata_post_init_library_location(self):
|
||||||
|
"""Test library_location is converted to string."""
|
||||||
|
book = BookData(library_location=123)
|
||||||
|
assert book.library_location == "123"
|
||||||
|
|
||||||
|
def test_bookdata_from_dict(self):
|
||||||
|
"""Test BookData.from_dict method."""
|
||||||
|
book = BookData()
|
||||||
|
data = {"ppn": "123", "title": "Test", "year": "2023"}
|
||||||
|
book.from_dict(data)
|
||||||
|
assert book.ppn == "123"
|
||||||
|
assert book.title == "Test"
|
||||||
|
|
||||||
|
def test_bookdata_merge(self):
|
||||||
|
"""Test BookData.merge method."""
|
||||||
|
book1 = BookData(ppn="123", title="Book 1")
|
||||||
|
book2 = BookData(title="Book 2", author="Author", isbn=["978123"])
|
||||||
|
|
||||||
|
book1.merge(book2)
|
||||||
|
assert book1.ppn == "123" # Original value preserved
|
||||||
|
assert book1.title == "Book 1" # Original value preserved (not None)
|
||||||
|
assert book1.author == "Author" # Merged from book2
|
||||||
|
assert "978123" in book1.isbn # Merged list
|
||||||
|
|
||||||
|
def test_bookdata_merge_lists(self):
|
||||||
|
"""Test BookData.merge with list merging."""
|
||||||
|
book1 = BookData(isbn=["978123"])
|
||||||
|
book2 = BookData(isbn=["978456", "978123"]) # Has duplicate
|
||||||
|
|
||||||
|
book1.merge(book2)
|
||||||
|
# Should have both ISBNs but no duplicates
|
||||||
|
assert len(book1.isbn) == 2
|
||||||
|
assert "978123" in book1.isbn
|
||||||
|
assert "978456" in book1.isbn
|
||||||
|
|
||||||
|
def test_bookdata_to_dict(self):
|
||||||
|
"""Test BookData.to_dict property."""
|
||||||
|
book = BookData(ppn="123", title="Test Book")
|
||||||
|
json_str = book.to_dict
|
||||||
|
data = json.loads(json_str)
|
||||||
|
assert data["ppn"] == "123"
|
||||||
|
assert data["title"] == "Test Book"
|
||||||
|
assert "old_book" not in data # Should be removed
|
||||||
|
|
||||||
|
def test_bookdata_from_string(self):
|
||||||
|
"""Test BookData.from_string method."""
|
||||||
|
json_str = '{"ppn": "123", "title": "Test"}'
|
||||||
|
book = BookData().from_string(json_str)
|
||||||
|
assert book.ppn == "123"
|
||||||
|
assert book.title == "Test"
|
||||||
|
|
||||||
|
def test_bookdata_edition_number(self):
|
||||||
|
"""Test BookData.edition_number property."""
|
||||||
|
book = BookData(edition="3rd edition")
|
||||||
|
assert book.edition_number == 3
|
||||||
|
|
||||||
|
book2 = BookData(edition="First edition")
|
||||||
|
assert book2.edition_number == 0 # No digit found
|
||||||
|
|
||||||
|
book3 = BookData(edition=None)
|
||||||
|
assert book3.edition_number == 0
|
||||||
|
|
||||||
|
def test_bookdata_get_book_type(self):
|
||||||
|
"""Test BookData.get_book_type method."""
|
||||||
|
book = BookData(media_type="print", pages="Online Resource")
|
||||||
|
assert book.get_book_type() == "eBook"
|
||||||
|
|
||||||
|
book2 = BookData(media_type="print", pages="300 pages")
|
||||||
|
assert book2.get_book_type() == "Druckausgabe"
|
||||||
|
|
||||||
|
|
||||||
|
# --- Error classes tests ---
|
||||||
|
|
||||||
|
|
||||||
|
class TestErrors:
|
||||||
|
"""Tests for error classes."""
|
||||||
|
|
||||||
|
def test_bibapi_error(self):
|
||||||
|
"""Test BibAPIError exception."""
|
||||||
|
with pytest.raises(BibAPIError):
|
||||||
|
raise BibAPIError("Test error")
|
||||||
|
|
||||||
|
def test_catalogue_error(self):
|
||||||
|
"""Test CatalogueError exception."""
|
||||||
|
with pytest.raises(CatalogueError):
|
||||||
|
raise CatalogueError("Catalogue error")
|
||||||
|
|
||||||
|
# Should also be a BibAPIError
|
||||||
|
with pytest.raises(BibAPIError):
|
||||||
|
raise CatalogueError("Catalogue error")
|
||||||
|
|
||||||
|
def test_network_error(self):
|
||||||
|
"""Test NetworkError exception."""
|
||||||
|
with pytest.raises(NetworkError):
|
||||||
|
raise NetworkError("Network error")
|
||||||
|
|
||||||
|
# Should also be a BibAPIError
|
||||||
|
with pytest.raises(BibAPIError):
|
||||||
|
raise NetworkError("Network error")
|
||||||
@@ -1,8 +1,389 @@
|
|||||||
from src.bibapi.sru import SWB
|
"""Comprehensive tests for the SRU module."""
|
||||||
|
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from bibapi.schemas.api_types import ALMASchema, DublinCoreSchema, PicaSchema
|
||||||
|
from bibapi.schemas.bookdata import BookData
|
||||||
|
from bibapi.sru import (
|
||||||
|
Api,
|
||||||
|
QueryTransformer,
|
||||||
|
book_from_marc,
|
||||||
|
find_newer_edition,
|
||||||
|
parse_marc_record,
|
||||||
|
)
|
||||||
|
from src.bibapi import SWB
|
||||||
|
|
||||||
|
# --- Integration test (requires network) ---
|
||||||
|
|
||||||
|
|
||||||
def test_swb_schema():
|
@pytest.mark.integration
|
||||||
|
def test_swb_schema() -> None:
|
||||||
|
"""Integration test that requires network access."""
|
||||||
result = SWB().getBooks(["pica.tit=Java ist auch eine Insel", "pica.bib=20735"])
|
result = SWB().getBooks(["pica.tit=Java ist auch eine Insel", "pica.bib=20735"])
|
||||||
assert len(result) == 1
|
assert len(result) == 1
|
||||||
assert result[0].title == "Java ist auch eine Insel"
|
assert result[0].title == "Java ist auch eine Insel"
|
||||||
assert
|
|
||||||
|
|
||||||
|
# --- Api class tests ---
|
||||||
|
|
||||||
|
|
||||||
|
class TestApiClass:
|
||||||
|
"""Tests for the Api class."""
|
||||||
|
|
||||||
|
def test_api_initialization(self):
|
||||||
|
"""Test Api class initialization."""
|
||||||
|
api = Api(
|
||||||
|
site="TestSite",
|
||||||
|
url="https://example.com/sru?query={}",
|
||||||
|
prefix=PicaSchema,
|
||||||
|
library_identifier="924$b",
|
||||||
|
)
|
||||||
|
assert api.site == "TestSite"
|
||||||
|
assert api.url == "https://example.com/sru?query={}"
|
||||||
|
assert api.prefix == PicaSchema
|
||||||
|
assert api.library_identifier == "924$b"
|
||||||
|
assert api._rate_limit_seconds == 1.0
|
||||||
|
assert api._max_retries == 5
|
||||||
|
assert api._overall_timeout_seconds == 30.0
|
||||||
|
api.close()
|
||||||
|
|
||||||
|
def test_api_with_notsupported_args(self):
|
||||||
|
"""Test Api initialization with unsupported arguments."""
|
||||||
|
api = Api(
|
||||||
|
site="TestSite",
|
||||||
|
url="https://example.com/sru?query={}",
|
||||||
|
prefix=PicaSchema,
|
||||||
|
library_identifier="924$b",
|
||||||
|
notsupported_args=["YEAR", "PPN"],
|
||||||
|
)
|
||||||
|
assert "YEAR" in api.notsupported_args
|
||||||
|
assert "PPN" in api.notsupported_args
|
||||||
|
api.close()
|
||||||
|
|
||||||
|
def test_api_with_replace_dict(self):
|
||||||
|
"""Test Api initialization with replace dictionary."""
|
||||||
|
api = Api(
|
||||||
|
site="TestSite",
|
||||||
|
url="https://example.com/sru?query={}",
|
||||||
|
prefix=PicaSchema,
|
||||||
|
library_identifier="924$b",
|
||||||
|
replace={" ": "+", "&": "%26"},
|
||||||
|
)
|
||||||
|
assert api.replace == {" ": "+", "&": "%26"}
|
||||||
|
api.close()
|
||||||
|
|
||||||
|
@patch.object(requests.Session, "get")
|
||||||
|
def test_api_get_success(self, mock_get, sample_sru_response_xml):
|
||||||
|
"""Test successful API get request."""
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.content = sample_sru_response_xml
|
||||||
|
mock_get.return_value = mock_response
|
||||||
|
|
||||||
|
api = Api(
|
||||||
|
site="TestSite",
|
||||||
|
url="https://example.com/sru?query={}",
|
||||||
|
prefix=PicaSchema,
|
||||||
|
library_identifier="924$b",
|
||||||
|
)
|
||||||
|
records = api.get(["title=Test"])
|
||||||
|
assert len(records) == 1
|
||||||
|
api.close()
|
||||||
|
|
||||||
|
@patch.object(requests.Session, "get")
|
||||||
|
def test_api_get_with_string_query(self, mock_get, sample_sru_response_xml):
|
||||||
|
"""Test API get with string query (not list)."""
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.content = sample_sru_response_xml
|
||||||
|
mock_get.return_value = mock_response
|
||||||
|
|
||||||
|
api = Api(
|
||||||
|
site="TestSite",
|
||||||
|
url="https://example.com/sru?query={}",
|
||||||
|
prefix=PicaSchema,
|
||||||
|
library_identifier="924$b",
|
||||||
|
)
|
||||||
|
records = api.get("title=Test")
|
||||||
|
assert len(records) == 1
|
||||||
|
api.close()
|
||||||
|
|
||||||
|
@patch.object(requests.Session, "get")
|
||||||
|
def test_api_get_filters_notsupported_args(self, mock_get, sample_sru_response_xml):
|
||||||
|
"""Test that unsupported args are filtered out."""
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.content = sample_sru_response_xml
|
||||||
|
mock_get.return_value = mock_response
|
||||||
|
|
||||||
|
api = Api(
|
||||||
|
site="TestSite",
|
||||||
|
url="https://example.com/sru?query={}",
|
||||||
|
prefix=PicaSchema,
|
||||||
|
library_identifier="924$b",
|
||||||
|
notsupported_args=["YEAR"],
|
||||||
|
)
|
||||||
|
# YEAR should be filtered out
|
||||||
|
records = api.get(["title=Test", "YEAR=2023"])
|
||||||
|
assert len(records) == 1
|
||||||
|
api.close()
|
||||||
|
|
||||||
|
@patch.object(requests.Session, "get")
|
||||||
|
def test_api_get_http_error_retries(self, mock_get):
|
||||||
|
"""Test that API retries on HTTP errors."""
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 500
|
||||||
|
mock_get.return_value = mock_response
|
||||||
|
|
||||||
|
api = Api(
|
||||||
|
site="TestSite",
|
||||||
|
url="https://example.com/sru?query={}",
|
||||||
|
prefix=PicaSchema,
|
||||||
|
library_identifier="924$b",
|
||||||
|
)
|
||||||
|
api._max_retries = 2
|
||||||
|
api._rate_limit_seconds = 0.01 # Speed up test
|
||||||
|
api._overall_timeout_seconds = 5.0
|
||||||
|
|
||||||
|
with pytest.raises(Exception, match="HTTP 500"):
|
||||||
|
api.get(["title=Test"])
|
||||||
|
api.close()
|
||||||
|
|
||||||
|
@patch.object(requests.Session, "get")
|
||||||
|
def test_api_get_timeout_returns_empty_bookdata(self, mock_get):
|
||||||
|
"""Test that timeout returns empty BookData list."""
|
||||||
|
mock_get.side_effect = requests.exceptions.ReadTimeout("Timeout")
|
||||||
|
|
||||||
|
api = Api(
|
||||||
|
site="TestSite",
|
||||||
|
url="https://example.com/sru?query={}",
|
||||||
|
prefix=PicaSchema,
|
||||||
|
library_identifier="924$b",
|
||||||
|
)
|
||||||
|
api._max_retries = 1
|
||||||
|
api._rate_limit_seconds = 0.01
|
||||||
|
|
||||||
|
books = api.getBooks(["title=Test"])
|
||||||
|
assert len(books) == 1
|
||||||
|
assert books[0].ppn is None # Empty BookData
|
||||||
|
api.close()
|
||||||
|
|
||||||
|
@patch.object(requests.Session, "get")
|
||||||
|
def test_api_getbooks_filters_by_title(self, mock_get, sample_sru_response_xml):
|
||||||
|
"""Test that getBooks filters results by title prefix."""
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.content = sample_sru_response_xml
|
||||||
|
mock_get.return_value = mock_response
|
||||||
|
|
||||||
|
api = Api(
|
||||||
|
site="TestSite",
|
||||||
|
url="https://example.com/sru?query={}",
|
||||||
|
prefix=PicaSchema,
|
||||||
|
library_identifier="924$b",
|
||||||
|
)
|
||||||
|
# Title in sample is "Test Book" - filtering for "Test" should match
|
||||||
|
books = api.getBooks(["pica.tit=Test"])
|
||||||
|
assert len(books) == 1
|
||||||
|
|
||||||
|
# Filtering for "NonExistent" should not match
|
||||||
|
books = api.getBooks(["pica.tit=NonExistent"])
|
||||||
|
assert len(books) == 0
|
||||||
|
api.close()
|
||||||
|
|
||||||
|
def test_api_close(self):
|
||||||
|
"""Test Api close method."""
|
||||||
|
api = Api(
|
||||||
|
site="TestSite",
|
||||||
|
url="https://example.com/sru?query={}",
|
||||||
|
prefix=PicaSchema,
|
||||||
|
library_identifier="924$b",
|
||||||
|
)
|
||||||
|
# Should not raise
|
||||||
|
api.close()
|
||||||
|
api.close() # Double close should be safe
|
||||||
|
|
||||||
|
|
||||||
|
# --- QueryTransformer tests ---
|
||||||
|
|
||||||
|
|
||||||
|
class TestQueryTransformer:
|
||||||
|
"""Tests for the QueryTransformer class."""
|
||||||
|
|
||||||
|
def test_transform_pica_schema(self):
|
||||||
|
"""Test transformation with PicaSchema."""
|
||||||
|
args = ["TITLE=Test Book", "AUTHOR=Smith, John"]
|
||||||
|
transformer = QueryTransformer(PicaSchema, args)
|
||||||
|
result = transformer.transform()
|
||||||
|
|
||||||
|
assert len(result) == 2
|
||||||
|
# Check that pica.tit is in the result
|
||||||
|
assert any(r.startswith("pica.tit=") for r in result)
|
||||||
|
# Author should have comma without space
|
||||||
|
assert any(r.startswith("pica.per=") for r in result)
|
||||||
|
|
||||||
|
def test_transform_alma_schema(self):
|
||||||
|
"""Test transformation with ALMASchema."""
|
||||||
|
args = ["TITLE=Test Book", "AUTHOR=Smith, John"]
|
||||||
|
transformer = QueryTransformer(ALMASchema, args)
|
||||||
|
result = transformer.transform()
|
||||||
|
|
||||||
|
assert len(result) == 2
|
||||||
|
# Title should be enclosed in quotes
|
||||||
|
assert any('alma.title="Test Book"' in r for r in result)
|
||||||
|
|
||||||
|
def test_transform_dublin_core_schema(self):
|
||||||
|
"""Test transformation with DublinCoreSchema."""
|
||||||
|
args = ["TITLE=Test Book", "AUTHOR=Smith,John"]
|
||||||
|
transformer = QueryTransformer(DublinCoreSchema, args)
|
||||||
|
result = transformer.transform()
|
||||||
|
|
||||||
|
assert len(result) == 2
|
||||||
|
# Check that dc.title is in the result
|
||||||
|
assert any(r.startswith("dc.title=") for r in result)
|
||||||
|
# Author should have space after comma
|
||||||
|
assert any(r.startswith("dc.creator=") for r in result)
|
||||||
|
|
||||||
|
def test_transform_string_input(self):
|
||||||
|
"""Test transformation with string input instead of list."""
|
||||||
|
transformer = QueryTransformer(PicaSchema, "TITLE=Test Book")
|
||||||
|
result = transformer.transform()
|
||||||
|
assert len(result) == 1
|
||||||
|
|
||||||
|
def test_transform_drops_empty_values(self):
|
||||||
|
"""Test that empty values are dropped when drop_empty is True."""
|
||||||
|
args = ["TITLE=Test Book", "AUTHOR="]
|
||||||
|
transformer = QueryTransformer(PicaSchema, args)
|
||||||
|
result = transformer.transform()
|
||||||
|
assert len(result) == 1
|
||||||
|
|
||||||
|
def test_transform_invalid_format_ignored(self):
|
||||||
|
"""Test that arguments without = are ignored."""
|
||||||
|
args = ["TITLE=Test Book", "InvalidArg", "AUTHOR=Smith"]
|
||||||
|
transformer = QueryTransformer(PicaSchema, args)
|
||||||
|
result = transformer.transform()
|
||||||
|
assert len(result) == 2
|
||||||
|
|
||||||
|
def test_transform_unknown_key_ignored(self):
|
||||||
|
"""Test that unknown keys are ignored."""
|
||||||
|
args = ["TITLE=Test Book", "UNKNOWNKEY=value"]
|
||||||
|
transformer = QueryTransformer(PicaSchema, args)
|
||||||
|
result = transformer.transform()
|
||||||
|
assert len(result) == 1
|
||||||
|
|
||||||
|
|
||||||
|
# --- book_from_marc tests ---
|
||||||
|
|
||||||
|
|
||||||
|
class TestBookFromMarc:
|
||||||
|
"""Tests for the book_from_marc function."""
|
||||||
|
|
||||||
|
def test_book_from_marc_basic(self, sample_marc_record_xml):
|
||||||
|
"""Test basic book extraction from MARC record."""
|
||||||
|
root = ET.fromstring(sample_marc_record_xml)
|
||||||
|
record = parse_marc_record(root)
|
||||||
|
book = book_from_marc(record, "924$b")
|
||||||
|
|
||||||
|
assert book.ppn == "123456789"
|
||||||
|
assert book.title == "Test Book Title"
|
||||||
|
assert book.edition == "2nd edition"
|
||||||
|
assert book.year == "2023"
|
||||||
|
assert book.publisher == "Test Publisher"
|
||||||
|
assert "9783123456789" in book.isbn
|
||||||
|
assert book.pages == "456 pages"
|
||||||
|
assert book.media_type == "Band"
|
||||||
|
assert book.author == "Author, Test"
|
||||||
|
|
||||||
|
def test_book_from_marc_signature(self, sample_marc_record_xml):
|
||||||
|
"""Test signature extraction from MARC record with Frei 129."""
|
||||||
|
root = ET.fromstring(sample_marc_record_xml)
|
||||||
|
record = parse_marc_record(root)
|
||||||
|
book = book_from_marc(record, "924$b")
|
||||||
|
|
||||||
|
# Signature should be from 924 where $9 == "Frei 129" -> $g
|
||||||
|
assert book.signature == "ABC 123"
|
||||||
|
|
||||||
|
def test_book_from_marc_libraries(self, sample_marc_record_xml):
|
||||||
|
"""Test library extraction from MARC record."""
|
||||||
|
root = ET.fromstring(sample_marc_record_xml)
|
||||||
|
record = parse_marc_record(root)
|
||||||
|
book = book_from_marc(record, "924$b")
|
||||||
|
|
||||||
|
assert "DE-Frei129" in book.libraries
|
||||||
|
|
||||||
|
|
||||||
|
# --- find_newer_edition tests ---
|
||||||
|
|
||||||
|
|
||||||
|
class TestFindNewerEdition:
|
||||||
|
"""Tests for the find_newer_edition function."""
|
||||||
|
|
||||||
|
def test_find_newer_edition_by_year(self):
|
||||||
|
"""Test finding newer edition by year."""
|
||||||
|
swb = BookData(ppn="1", year=2020, edition="1st edition")
|
||||||
|
dnb = [
|
||||||
|
BookData(ppn="2", year=2023, edition="3rd edition"),
|
||||||
|
BookData(ppn="3", year=2019, edition="1st edition"),
|
||||||
|
]
|
||||||
|
result = find_newer_edition(swb, dnb)
|
||||||
|
assert result is not None
|
||||||
|
assert len(result) == 1
|
||||||
|
# Year is stored as string after post_init
|
||||||
|
assert result[0].year == "2023"
|
||||||
|
|
||||||
|
def test_find_newer_edition_by_edition_number(self):
|
||||||
|
"""Test finding newer edition by edition number."""
|
||||||
|
swb = BookData(ppn="1", year=2020, edition="1st edition")
|
||||||
|
dnb = [
|
||||||
|
BookData(ppn="2", year=2020, edition="3rd edition"),
|
||||||
|
]
|
||||||
|
result = find_newer_edition(swb, dnb)
|
||||||
|
assert result is not None
|
||||||
|
assert len(result) == 1
|
||||||
|
assert result[0].edition_number == 3
|
||||||
|
|
||||||
|
def test_find_newer_edition_none_found(self):
|
||||||
|
"""Test when no newer edition exists."""
|
||||||
|
swb = BookData(ppn="1", year=2023, edition="5th edition")
|
||||||
|
dnb = [
|
||||||
|
BookData(ppn="2", year=2020, edition="1st edition"),
|
||||||
|
BookData(ppn="3", year=2019, edition="2nd edition"),
|
||||||
|
]
|
||||||
|
result = find_newer_edition(swb, dnb)
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_find_newer_edition_empty_list(self):
|
||||||
|
"""Test with empty DNB result list."""
|
||||||
|
swb = BookData(ppn="1", year=2020)
|
||||||
|
result = find_newer_edition(swb, [])
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_find_newer_edition_prefers_matching_signature(self):
|
||||||
|
"""Test that matching signature is preferred."""
|
||||||
|
swb = BookData(ppn="1", year=2020, signature="ABC 123")
|
||||||
|
dnb = [
|
||||||
|
BookData(ppn="2", year=2023, signature="ABC 123"),
|
||||||
|
BookData(ppn="3", year=2023, signature="XYZ 789"),
|
||||||
|
]
|
||||||
|
result = find_newer_edition(swb, dnb)
|
||||||
|
assert result is not None
|
||||||
|
assert len(result) == 1
|
||||||
|
# Should prefer matching signature (first one) but XYZ 789 differs
|
||||||
|
# so it's filtered out. Result should be the matching one.
|
||||||
|
|
||||||
|
def test_find_newer_edition_deduplicates_by_ppn(self):
|
||||||
|
"""Test that results are deduplicated by PPN."""
|
||||||
|
swb = BookData(ppn="1", year=2020)
|
||||||
|
dnb = [
|
||||||
|
BookData(ppn="2", year=2023, signature="ABC"),
|
||||||
|
BookData(ppn="2", year=2023), # Duplicate PPN, no signature
|
||||||
|
]
|
||||||
|
result = find_newer_edition(swb, dnb)
|
||||||
|
assert result is not None
|
||||||
|
assert len(result) == 1
|
||||||
|
# Should prefer the one with signature
|
||||||
|
assert result[0].signature == "ABC"
|
||||||
|
|||||||
375
tests/test_transformers.py
Normal file
375
tests/test_transformers.py
Normal file
@@ -0,0 +1,375 @@
|
|||||||
|
"""Tests for the _transformers module."""
|
||||||
|
|
||||||
|
from src.bibapi._transformers import (
|
||||||
|
RDS_AVAIL_DATA,
|
||||||
|
RDS_DATA,
|
||||||
|
RDS_GENERIC_DATA,
|
||||||
|
ARRAYData,
|
||||||
|
BibTeXData,
|
||||||
|
COinSData,
|
||||||
|
DictToTable,
|
||||||
|
Item,
|
||||||
|
RISData,
|
||||||
|
)
|
||||||
|
from src.bibapi.schemas.bookdata import BookData
|
||||||
|
|
||||||
|
# --- Item dataclass tests ---
|
||||||
|
|
||||||
|
|
||||||
|
class TestItem:
|
||||||
|
"""Tests for the Item dataclass."""
|
||||||
|
|
||||||
|
def test_item_creation_defaults(self):
|
||||||
|
"""Test Item creation with defaults."""
|
||||||
|
item = Item()
|
||||||
|
assert item.superlocation == ""
|
||||||
|
assert item.status == ""
|
||||||
|
assert item.availability == ""
|
||||||
|
|
||||||
|
def test_item_creation_with_values(self):
|
||||||
|
"""Test Item creation with values."""
|
||||||
|
item = Item(
|
||||||
|
superlocation="Main Library",
|
||||||
|
status="available",
|
||||||
|
callnumber="ABC 123",
|
||||||
|
)
|
||||||
|
assert item.superlocation == "Main Library"
|
||||||
|
assert item.status == "available"
|
||||||
|
assert item.callnumber == "ABC 123"
|
||||||
|
|
||||||
|
def test_item_from_dict(self):
|
||||||
|
"""Test Item.from_dict method."""
|
||||||
|
item = Item()
|
||||||
|
data = {
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"status": "available",
|
||||||
|
"callnumber": "ABC 123",
|
||||||
|
"location": "Floor 1",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
result = item.from_dict(data)
|
||||||
|
assert result.status == "available"
|
||||||
|
assert result.callnumber == "ABC 123"
|
||||||
|
assert result.location == "Floor 1"
|
||||||
|
|
||||||
|
|
||||||
|
# --- RDS_DATA dataclass tests ---
|
||||||
|
|
||||||
|
|
||||||
|
class TestRDSData:
|
||||||
|
"""Tests for the RDS_DATA dataclass."""
|
||||||
|
|
||||||
|
def test_rds_data_creation_defaults(self):
|
||||||
|
"""Test RDS_DATA creation with defaults."""
|
||||||
|
rds = RDS_DATA()
|
||||||
|
assert rds.RDS_SIGNATURE == ""
|
||||||
|
assert rds.RDS_STATUS == ""
|
||||||
|
assert rds.RDS_LOCATION == ""
|
||||||
|
|
||||||
|
def test_rds_data_import_from_dict(self):
|
||||||
|
"""Test RDS_DATA.import_from_dict method."""
|
||||||
|
rds = RDS_DATA()
|
||||||
|
data = {
|
||||||
|
"RDS_SIGNATURE": "ABC 123",
|
||||||
|
"RDS_STATUS": "available",
|
||||||
|
"RDS_LOCATION": "Floor 1",
|
||||||
|
}
|
||||||
|
result = rds.import_from_dict(data)
|
||||||
|
assert result.RDS_SIGNATURE == "ABC 123"
|
||||||
|
assert result.RDS_STATUS == "available"
|
||||||
|
assert result.RDS_LOCATION == "Floor 1"
|
||||||
|
|
||||||
|
|
||||||
|
# --- RDS_AVAIL_DATA dataclass tests ---
|
||||||
|
|
||||||
|
|
||||||
|
class TestRDSAvailData:
|
||||||
|
"""Tests for the RDS_AVAIL_DATA dataclass."""
|
||||||
|
|
||||||
|
def test_rds_avail_data_creation_defaults(self):
|
||||||
|
"""Test RDS_AVAIL_DATA creation with defaults."""
|
||||||
|
rds = RDS_AVAIL_DATA()
|
||||||
|
assert rds.library_sigil == ""
|
||||||
|
assert rds.items == []
|
||||||
|
|
||||||
|
def test_rds_avail_data_import_from_dict(self):
|
||||||
|
"""Test RDS_AVAIL_DATA.import_from_dict method."""
|
||||||
|
rds = RDS_AVAIL_DATA()
|
||||||
|
json_data = (
|
||||||
|
'{"DE-Frei129": {"Location1": {"items": [{"status": "available"}]}}}'
|
||||||
|
)
|
||||||
|
result = rds.import_from_dict(json_data)
|
||||||
|
assert result.library_sigil == "DE-Frei129"
|
||||||
|
assert len(result.items) == 1
|
||||||
|
|
||||||
|
|
||||||
|
# --- RDS_GENERIC_DATA dataclass tests ---
|
||||||
|
|
||||||
|
|
||||||
|
class TestRDSGenericData:
|
||||||
|
"""Tests for the RDS_GENERIC_DATA dataclass."""
|
||||||
|
|
||||||
|
def test_rds_generic_data_creation_defaults(self):
|
||||||
|
"""Test RDS_GENERIC_DATA creation with defaults."""
|
||||||
|
rds = RDS_GENERIC_DATA()
|
||||||
|
assert rds.LibrarySigil == ""
|
||||||
|
assert rds.RDS_DATA == []
|
||||||
|
|
||||||
|
def test_rds_generic_data_import_from_dict(self):
|
||||||
|
"""Test RDS_GENERIC_DATA.import_from_dict method."""
|
||||||
|
rds = RDS_GENERIC_DATA()
|
||||||
|
json_data = '{"DE-Frei129": [{"RDS_SIGNATURE": "ABC 123"}]}'
|
||||||
|
result = rds.import_from_dict(json_data)
|
||||||
|
assert result.LibrarySigil == "DE-Frei129"
|
||||||
|
assert len(result.RDS_DATA) == 1
|
||||||
|
|
||||||
|
|
||||||
|
# --- ARRAYData tests ---
|
||||||
|
|
||||||
|
|
||||||
|
class TestARRAYData:
|
||||||
|
"""Tests for the ARRAYData transformer."""
|
||||||
|
|
||||||
|
def test_array_data_transform(self):
|
||||||
|
"""Test ARRAYData transform method."""
|
||||||
|
sample_data = """
|
||||||
|
[kid] => 123456789
|
||||||
|
[ti_long] => Array
|
||||||
|
(
|
||||||
|
[0] => Test Book Title
|
||||||
|
)
|
||||||
|
[isbn] => Array
|
||||||
|
(
|
||||||
|
[0] => 9783123456789
|
||||||
|
)
|
||||||
|
[la_facet] => Array
|
||||||
|
(
|
||||||
|
[0] => German
|
||||||
|
)
|
||||||
|
[pu] => Array
|
||||||
|
(
|
||||||
|
[0] => Test Publisher
|
||||||
|
)
|
||||||
|
[py_display] => Array
|
||||||
|
(
|
||||||
|
[0] => 2023
|
||||||
|
)
|
||||||
|
[umfang] => Array
|
||||||
|
(
|
||||||
|
[0] => 300 pages
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
transformer = ARRAYData()
|
||||||
|
result = transformer.transform(sample_data)
|
||||||
|
|
||||||
|
assert isinstance(result, BookData)
|
||||||
|
assert result.ppn == "123456789"
|
||||||
|
|
||||||
|
def test_array_data_with_signature(self):
|
||||||
|
"""Test ARRAYData with predefined signature."""
|
||||||
|
sample_data = "[kid] => 123456789"
|
||||||
|
transformer = ARRAYData(signature="ABC 123")
|
||||||
|
result = transformer.transform(sample_data)
|
||||||
|
|
||||||
|
assert isinstance(result, BookData)
|
||||||
|
|
||||||
|
|
||||||
|
# --- COinSData tests ---
|
||||||
|
|
||||||
|
|
||||||
|
class TestCOinSData:
|
||||||
|
"""Tests for the COinSData transformer."""
|
||||||
|
|
||||||
|
def test_coins_data_transform(self):
|
||||||
|
"""Test COinSData transform method."""
|
||||||
|
# Note: COinS format uses & separators, last field shouldn't have trailing &
|
||||||
|
sample_data = (
|
||||||
|
"ctx_ver=Z39.88-2004&"
|
||||||
|
"rft_id=info:sid/test?kid=123456&"
|
||||||
|
"rft.btitle=Test Bookrft&" # btitle ends parsing at next 'rft'
|
||||||
|
"rft.aulast=Smithrft&"
|
||||||
|
"rft.aufirst=Johnrft&"
|
||||||
|
"rft.edition=2ndrft&"
|
||||||
|
"rft.isbn=9783123456789rft&"
|
||||||
|
"rft.pub=Publisherrft&"
|
||||||
|
"rft.date=2023rft&"
|
||||||
|
"rft.tpages=300"
|
||||||
|
)
|
||||||
|
transformer = COinSData()
|
||||||
|
result = transformer.transform(sample_data)
|
||||||
|
|
||||||
|
assert isinstance(result, BookData)
|
||||||
|
# The transformer splits on 'rft' after the field value
|
||||||
|
assert "Test Book" in result.title
|
||||||
|
assert "Smith" in result.author
|
||||||
|
|
||||||
|
|
||||||
|
# --- RISData tests ---
|
||||||
|
|
||||||
|
|
||||||
|
class TestRISData:
|
||||||
|
"""Tests for the RISData transformer."""
|
||||||
|
|
||||||
|
def test_ris_data_transform(self):
|
||||||
|
"""Test RISData transform method."""
|
||||||
|
sample_data = """TY - BOOK
|
||||||
|
TI - Test Book Title
|
||||||
|
AU - Smith, John
|
||||||
|
ET - 2nd edition
|
||||||
|
CN - ABC 123
|
||||||
|
SN - 9783123456789
|
||||||
|
LA - English
|
||||||
|
PB - Test Publisher
|
||||||
|
PY - 2023
|
||||||
|
SP - 300
|
||||||
|
DP - https://example.com/book?kid=123456
|
||||||
|
ER -"""
|
||||||
|
transformer = RISData()
|
||||||
|
result = transformer.transform(sample_data)
|
||||||
|
|
||||||
|
assert isinstance(result, BookData)
|
||||||
|
assert result.title == "Test Book Title"
|
||||||
|
assert result.signature == "ABC 123"
|
||||||
|
assert result.edition == "2nd edition"
|
||||||
|
assert result.year == "2023"
|
||||||
|
|
||||||
|
|
||||||
|
# --- BibTeXData tests ---
|
||||||
|
|
||||||
|
|
||||||
|
class TestBibTeXData:
|
||||||
|
"""Tests for the BibTeXData transformer."""
|
||||||
|
|
||||||
|
def test_bibtex_data_transform(self):
|
||||||
|
"""Test BibTeXData transform method."""
|
||||||
|
sample_data = """@book{test2023,
|
||||||
|
title = {Test Book Title},
|
||||||
|
author = {Smith, John and Doe, Jane},
|
||||||
|
edition = {2nd},
|
||||||
|
isbn = {9783123456789},
|
||||||
|
language = {English},
|
||||||
|
publisher = {Test Publisher},
|
||||||
|
year = {2023},
|
||||||
|
pages = {300},
|
||||||
|
bestand = {ABC 123}
|
||||||
|
}"""
|
||||||
|
transformer = BibTeXData()
|
||||||
|
result = transformer.transform(sample_data)
|
||||||
|
|
||||||
|
assert isinstance(result, BookData)
|
||||||
|
assert result.title == "Test Book Title"
|
||||||
|
# BibTeX transformer joins with ; and removes commas
|
||||||
|
assert "Smith John" in result.author
|
||||||
|
assert "Doe Jane" in result.author
|
||||||
|
assert result.signature == "ABC 123"
|
||||||
|
|
||||||
|
|
||||||
|
# --- DictToTable tests ---
|
||||||
|
|
||||||
|
|
||||||
|
class TestDictToTable:
|
||||||
|
"""Tests for the DictToTable transformer."""
|
||||||
|
|
||||||
|
def test_dict_to_table_book_mode(self):
|
||||||
|
"""Test DictToTable with book mode."""
|
||||||
|
data = {
|
||||||
|
"mode": "book",
|
||||||
|
"book_author": "Smith, John",
|
||||||
|
"book_signature": "ABC 123",
|
||||||
|
"book_place": "Berlin",
|
||||||
|
"book_year": "2023",
|
||||||
|
"book_title": "Test Book",
|
||||||
|
"book_edition": "2nd",
|
||||||
|
"book_pages": "300",
|
||||||
|
"book_publisher": "Publisher",
|
||||||
|
"book_isbn": "9783123456789",
|
||||||
|
}
|
||||||
|
transformer = DictToTable()
|
||||||
|
result = transformer.transform(data)
|
||||||
|
|
||||||
|
assert result["type"] == "book"
|
||||||
|
assert result["work_author"] == "Smith, John"
|
||||||
|
assert result["signature"] == "ABC 123"
|
||||||
|
assert result["year"] == "2023"
|
||||||
|
|
||||||
|
def test_dict_to_table_hg_mode(self):
|
||||||
|
"""Test DictToTable with hg (editor) mode."""
|
||||||
|
data = {
|
||||||
|
"mode": "hg",
|
||||||
|
"hg_author": "Chapter Author",
|
||||||
|
"hg_editor": "Editor Name",
|
||||||
|
"hg_year": "2023",
|
||||||
|
"hg_title": "Collection Title",
|
||||||
|
"hg_publisher": "Publisher",
|
||||||
|
"hg_place": "Berlin",
|
||||||
|
"hg_edition": "1st",
|
||||||
|
"hg_chaptertitle": "Chapter Title",
|
||||||
|
"hg_pages": "50-75",
|
||||||
|
"hg_signature": "ABC 123",
|
||||||
|
"hg_isbn": "9783123456789",
|
||||||
|
}
|
||||||
|
transformer = DictToTable()
|
||||||
|
result = transformer.transform(data)
|
||||||
|
|
||||||
|
assert result["type"] == "hg"
|
||||||
|
assert result["section_author"] == "Chapter Author"
|
||||||
|
assert result["work_author"] == "Editor Name"
|
||||||
|
assert result["chapter_title"] == "Chapter Title"
|
||||||
|
|
||||||
|
def test_dict_to_table_zs_mode(self):
|
||||||
|
"""Test DictToTable with zs (journal) mode."""
|
||||||
|
data = {
|
||||||
|
"mode": "zs",
|
||||||
|
"zs_author": "Article Author",
|
||||||
|
"zs_chapter_title": "Article Title",
|
||||||
|
"zs_place": "Berlin",
|
||||||
|
"zs_issue": "Vol. 5, No. 2",
|
||||||
|
"zs_pages": "100-120",
|
||||||
|
"zs_publisher": "Publisher",
|
||||||
|
"zs_isbn": "1234-5678",
|
||||||
|
"zs_year": "2023",
|
||||||
|
"zs_signature": "PER 123",
|
||||||
|
"zs_title": "Journal Name",
|
||||||
|
}
|
||||||
|
transformer = DictToTable()
|
||||||
|
result = transformer.transform(data)
|
||||||
|
|
||||||
|
assert result["type"] == "zs"
|
||||||
|
assert result["section_author"] == "Article Author"
|
||||||
|
assert result["chapter_title"] == "Article Title"
|
||||||
|
assert result["issue"] == "Vol. 5, No. 2"
|
||||||
|
|
||||||
|
def test_dict_to_table_reset(self):
|
||||||
|
"""Test DictToTable reset method."""
|
||||||
|
transformer = DictToTable()
|
||||||
|
transformer.work_author = "Test"
|
||||||
|
transformer.year = "2023"
|
||||||
|
|
||||||
|
transformer.reset()
|
||||||
|
|
||||||
|
assert transformer.work_author is None
|
||||||
|
assert transformer.year is None
|
||||||
|
|
||||||
|
def test_dict_to_table_make_result_excludes_none(self):
|
||||||
|
"""Test that makeResult excludes None values."""
|
||||||
|
transformer = DictToTable()
|
||||||
|
transformer.work_author = "Test Author"
|
||||||
|
transformer.year = "2023"
|
||||||
|
# Leave others as None
|
||||||
|
|
||||||
|
result = transformer.makeResult()
|
||||||
|
|
||||||
|
assert "work_author" in result
|
||||||
|
assert "year" in result
|
||||||
|
assert "section_author" not in result # Should be excluded
|
||||||
|
assert "pages" not in result # Should be excluded
|
||||||
|
|
||||||
|
def test_dict_to_table_invalid_mode(self):
|
||||||
|
"""Test DictToTable with invalid mode returns None."""
|
||||||
|
data = {"mode": "invalid"}
|
||||||
|
transformer = DictToTable()
|
||||||
|
result = transformer.transform(data)
|
||||||
|
|
||||||
|
assert result is None
|
||||||
309
tests/test_webrequest.py
Normal file
309
tests/test_webrequest.py
Normal file
@@ -0,0 +1,309 @@
|
|||||||
|
"""Tests for the webrequest module."""
|
||||||
|
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from src.bibapi.webrequest import (
|
||||||
|
ALLOWED_IPS,
|
||||||
|
BibTextTransformer,
|
||||||
|
TransformerType,
|
||||||
|
WebRequest,
|
||||||
|
cover,
|
||||||
|
get_content,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestTransformerType:
|
||||||
|
"""Tests for TransformerType enum."""
|
||||||
|
|
||||||
|
def test_transformer_type_values(self):
|
||||||
|
"""Test TransformerType enum values."""
|
||||||
|
assert TransformerType.ARRAY.value == "ARRAY"
|
||||||
|
assert TransformerType.COinS.value == "COinS"
|
||||||
|
assert TransformerType.BibTeX.value == "BibTeX"
|
||||||
|
assert TransformerType.RIS.value == "RIS"
|
||||||
|
assert TransformerType.RDS.value == "RDS"
|
||||||
|
|
||||||
|
|
||||||
|
class TestWebRequest:
|
||||||
|
"""Tests for WebRequest class."""
|
||||||
|
|
||||||
|
def test_webrequest_init_not_allowed_ip(self):
|
||||||
|
"""Test WebRequest raises PermissionError for non-allowed IP."""
|
||||||
|
with patch("requests.get") as mock_get:
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.text = "192.168.1.1" # Not in ALLOWED_IPS
|
||||||
|
mock_get.return_value = mock_response
|
||||||
|
|
||||||
|
with pytest.raises(PermissionError, match="IP not allowed"):
|
||||||
|
WebRequest()
|
||||||
|
|
||||||
|
def test_webrequest_init_allowed_ip(self):
|
||||||
|
"""Test WebRequest initializes successfully with allowed IP."""
|
||||||
|
with patch("requests.get") as mock_get:
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.text = ALLOWED_IPS[0] # Use first allowed IP
|
||||||
|
mock_get.return_value = mock_response
|
||||||
|
|
||||||
|
wr = WebRequest()
|
||||||
|
assert wr.public_ip == ALLOWED_IPS[0]
|
||||||
|
assert wr.timeout == 5
|
||||||
|
assert wr.use_any is False
|
||||||
|
|
||||||
|
def test_webrequest_no_connection(self):
|
||||||
|
"""Test WebRequest raises ConnectionError when no internet."""
|
||||||
|
with patch("requests.get") as mock_get:
|
||||||
|
mock_get.side_effect = requests.exceptions.RequestException("No connection")
|
||||||
|
|
||||||
|
with pytest.raises(ConnectionError, match="No internet connection"):
|
||||||
|
WebRequest()
|
||||||
|
|
||||||
|
def test_webrequest_use_any_book(self):
|
||||||
|
"""Test use_any_book property."""
|
||||||
|
with patch("requests.get") as mock_get:
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.text = ALLOWED_IPS[0]
|
||||||
|
mock_get.return_value = mock_response
|
||||||
|
|
||||||
|
wr = WebRequest()
|
||||||
|
result = wr.use_any_book
|
||||||
|
assert result.use_any is True
|
||||||
|
|
||||||
|
def test_webrequest_set_apparat(self):
|
||||||
|
"""Test set_apparat method."""
|
||||||
|
with patch("requests.get") as mock_get:
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.text = ALLOWED_IPS[0]
|
||||||
|
mock_get.return_value = mock_response
|
||||||
|
|
||||||
|
wr = WebRequest()
|
||||||
|
result = wr.set_apparat(5)
|
||||||
|
assert result.apparat == "05" # Padded with 0
|
||||||
|
|
||||||
|
result = wr.set_apparat(15)
|
||||||
|
assert result.apparat == 15 # Not padded
|
||||||
|
|
||||||
|
def test_webrequest_get_ppn(self):
|
||||||
|
"""Test get_ppn method."""
|
||||||
|
with patch("requests.get") as mock_get:
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.text = ALLOWED_IPS[0]
|
||||||
|
mock_get.return_value = mock_response
|
||||||
|
|
||||||
|
wr = WebRequest()
|
||||||
|
|
||||||
|
# Normal signature
|
||||||
|
result = wr.get_ppn("ABC 123")
|
||||||
|
assert result.ppn == "ABC 123"
|
||||||
|
assert result.signature == "ABC 123"
|
||||||
|
|
||||||
|
# Signature with +
|
||||||
|
result = wr.get_ppn("ABC+123")
|
||||||
|
assert result.ppn == "ABC%2B123"
|
||||||
|
|
||||||
|
# DOI
|
||||||
|
result = wr.get_ppn("https://doi.org/10.1234/test")
|
||||||
|
assert result.ppn == "test"
|
||||||
|
|
||||||
|
def test_webrequest_search_book(self):
|
||||||
|
"""Test search_book method."""
|
||||||
|
with patch("requests.get") as mock_get:
|
||||||
|
# First call for IP check
|
||||||
|
ip_response = MagicMock()
|
||||||
|
ip_response.text = ALLOWED_IPS[0]
|
||||||
|
|
||||||
|
# Second call for actual search
|
||||||
|
search_response = MagicMock()
|
||||||
|
search_response.text = "<html>results</html>"
|
||||||
|
|
||||||
|
mock_get.side_effect = [ip_response, search_response]
|
||||||
|
|
||||||
|
wr = WebRequest()
|
||||||
|
result = wr.search_book("test search")
|
||||||
|
assert result == "<html>results</html>"
|
||||||
|
|
||||||
|
def test_webrequest_search_ppn(self):
|
||||||
|
"""Test search_ppn method."""
|
||||||
|
with patch("requests.get") as mock_get:
|
||||||
|
ip_response = MagicMock()
|
||||||
|
ip_response.text = ALLOWED_IPS[0]
|
||||||
|
|
||||||
|
ppn_response = MagicMock()
|
||||||
|
ppn_response.text = "<html>ppn result</html>"
|
||||||
|
|
||||||
|
mock_get.side_effect = [ip_response, ppn_response]
|
||||||
|
|
||||||
|
wr = WebRequest()
|
||||||
|
result = wr.search_ppn("123456")
|
||||||
|
assert result == "<html>ppn result</html>"
|
||||||
|
|
||||||
|
def test_webrequest_search(self):
|
||||||
|
"""Test search method."""
|
||||||
|
with patch("requests.get") as mock_get:
|
||||||
|
ip_response = MagicMock()
|
||||||
|
ip_response.text = ALLOWED_IPS[0]
|
||||||
|
|
||||||
|
search_response = MagicMock()
|
||||||
|
search_response.text = "<html>detail page</html>"
|
||||||
|
|
||||||
|
mock_get.side_effect = [ip_response, search_response]
|
||||||
|
|
||||||
|
wr = WebRequest()
|
||||||
|
result = wr.search("https://example.com/book")
|
||||||
|
assert result == "<html>detail page</html>"
|
||||||
|
|
||||||
|
def test_webrequest_search_error(self):
|
||||||
|
"""Test search method handles errors."""
|
||||||
|
with patch("requests.get") as mock_get:
|
||||||
|
ip_response = MagicMock()
|
||||||
|
ip_response.text = ALLOWED_IPS[0]
|
||||||
|
|
||||||
|
mock_get.side_effect = [ip_response, requests.exceptions.RequestException()]
|
||||||
|
|
||||||
|
wr = WebRequest()
|
||||||
|
result = wr.search("https://example.com/book")
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_webrequest_get_book_links(self):
|
||||||
|
"""Test get_book_links method."""
|
||||||
|
html = """<html>
|
||||||
|
<a class="title getFull" href="/opac/book/123">Book 1</a>
|
||||||
|
<a class="title getFull" href="/opac/book/456">Book 2</a>
|
||||||
|
</html>"""
|
||||||
|
|
||||||
|
with patch("requests.get") as mock_get:
|
||||||
|
ip_response = MagicMock()
|
||||||
|
ip_response.text = ALLOWED_IPS[0]
|
||||||
|
|
||||||
|
search_response = MagicMock()
|
||||||
|
search_response.text = html
|
||||||
|
|
||||||
|
mock_get.side_effect = [ip_response, search_response]
|
||||||
|
|
||||||
|
wr = WebRequest()
|
||||||
|
wr.ppn = "test"
|
||||||
|
links = wr.get_book_links("test")
|
||||||
|
|
||||||
|
assert len(links) == 2
|
||||||
|
assert "https://rds.ibs-bw.de/opac/book/123" in links[0]
|
||||||
|
|
||||||
|
|
||||||
|
class TestBibTextTransformer:
|
||||||
|
"""Tests for BibTextTransformer class."""
|
||||||
|
|
||||||
|
def test_bibtexttransformer_init_valid_mode(self):
|
||||||
|
"""Test BibTextTransformer initialization with valid mode."""
|
||||||
|
bt = BibTextTransformer(TransformerType.ARRAY)
|
||||||
|
assert bt.mode == "ARRAY"
|
||||||
|
|
||||||
|
def test_bibtexttransformer_init_default_mode(self):
|
||||||
|
"""Test BibTextTransformer uses ARRAY as default mode."""
|
||||||
|
bt = BibTextTransformer()
|
||||||
|
assert bt.mode == "ARRAY"
|
||||||
|
|
||||||
|
def test_bibtexttransformer_invalid_mode(self):
|
||||||
|
"""Test BibTextTransformer raises error for invalid mode."""
|
||||||
|
|
||||||
|
# Create a fake invalid mode
|
||||||
|
class FakeMode:
|
||||||
|
value = "INVALID"
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match="not valid"):
|
||||||
|
BibTextTransformer(FakeMode())
|
||||||
|
|
||||||
|
def test_bibtexttransformer_use_signature(self):
|
||||||
|
"""Test use_signature method."""
|
||||||
|
bt = BibTextTransformer()
|
||||||
|
result = bt.use_signature("ABC 123")
|
||||||
|
assert result.signature == "ABC 123"
|
||||||
|
|
||||||
|
def test_bibtexttransformer_get_data_none(self):
|
||||||
|
"""Test get_data with None input."""
|
||||||
|
bt = BibTextTransformer()
|
||||||
|
result = bt.get_data(None)
|
||||||
|
assert result.data is None
|
||||||
|
|
||||||
|
def test_bibtexttransformer_get_data_ris(self):
|
||||||
|
"""Test get_data with RIS format."""
|
||||||
|
bt = BibTextTransformer(TransformerType.RIS)
|
||||||
|
data = ["Some data", "TY - BOOK\nTI - Test"]
|
||||||
|
result = bt.get_data(data)
|
||||||
|
assert "TY -" in result.data
|
||||||
|
|
||||||
|
def test_bibtexttransformer_get_data_array(self):
|
||||||
|
"""Test get_data with ARRAY format."""
|
||||||
|
bt = BibTextTransformer(TransformerType.ARRAY)
|
||||||
|
data = ["Some data", "[kid] => 123456"]
|
||||||
|
result = bt.get_data(data)
|
||||||
|
assert "[kid]" in result.data
|
||||||
|
|
||||||
|
def test_bibtexttransformer_get_data_coins(self):
|
||||||
|
"""Test get_data with COinS format."""
|
||||||
|
bt = BibTextTransformer(TransformerType.COinS)
|
||||||
|
data = ["Some data", "ctx_ver=Z39.88"]
|
||||||
|
result = bt.get_data(data)
|
||||||
|
assert "ctx_ver" in result.data
|
||||||
|
|
||||||
|
def test_bibtexttransformer_get_data_bibtex(self):
|
||||||
|
"""Test get_data with BibTeX format."""
|
||||||
|
bt = BibTextTransformer(TransformerType.BibTeX)
|
||||||
|
data = ["Some data", "@book{test2023,"]
|
||||||
|
result = bt.get_data(data)
|
||||||
|
assert "@book" in result.data
|
||||||
|
|
||||||
|
def test_bibtexttransformer_get_data_rds(self):
|
||||||
|
"""Test get_data with RDS format."""
|
||||||
|
bt = BibTextTransformer(TransformerType.RDS)
|
||||||
|
data = ["Some data", "RDS ---------------------------------- test"]
|
||||||
|
result = bt.get_data(data)
|
||||||
|
assert "RDS" in result.data
|
||||||
|
|
||||||
|
def test_bibtexttransformer_return_data_none(self):
|
||||||
|
"""Test return_data when data is None."""
|
||||||
|
bt = BibTextTransformer()
|
||||||
|
bt.get_data(None)
|
||||||
|
result = bt.return_data()
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestCoverFunction:
|
||||||
|
"""Tests for the cover function."""
|
||||||
|
|
||||||
|
def test_cover_returns_content(self):
|
||||||
|
"""Test cover function returns image content."""
|
||||||
|
with patch("requests.get") as mock_get:
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.content = b"fake_image_content"
|
||||||
|
mock_get.return_value = mock_response
|
||||||
|
|
||||||
|
result = cover("9783123456789")
|
||||||
|
assert result == b"fake_image_content"
|
||||||
|
|
||||||
|
def test_cover_url_format(self):
|
||||||
|
"""Test cover function calls correct URL."""
|
||||||
|
with patch("requests.get") as mock_get:
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.content = b""
|
||||||
|
mock_get.return_value = mock_response
|
||||||
|
|
||||||
|
cover("9783123456789")
|
||||||
|
|
||||||
|
called_url = mock_get.call_args[0][0]
|
||||||
|
assert "9783123456789" in called_url
|
||||||
|
assert "buchhandel.de/cover" in called_url
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetContentFunction:
|
||||||
|
"""Tests for the get_content function."""
|
||||||
|
|
||||||
|
def test_get_content(self):
|
||||||
|
"""Test get_content extracts text from div."""
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
html = '<html><div class="test-class"> Content Here </div></html>'
|
||||||
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
|
||||||
|
result = get_content(soup, "test-class")
|
||||||
|
assert result == "Content Here"
|
||||||
Reference in New Issue
Block a user