Merge pull request 'new features' (#13) from dev into main
Reviewed-on: #13
This commit was merged in pull request #13.
This commit is contained in:
7
.coveragerc
Normal file
7
.coveragerc
Normal file
@@ -0,0 +1,7 @@
|
||||
[run]
|
||||
branch = True
|
||||
omit = */build/*,tests/*,main.py
|
||||
[report]
|
||||
exclude_lines =
|
||||
pragma: no cover
|
||||
raise NotImplementedError.*
|
||||
@@ -2,18 +2,18 @@ on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
github_release:
|
||||
description: 'Create Gitea Release'
|
||||
description: "Create Gitea Release"
|
||||
default: true
|
||||
type: boolean
|
||||
bump:
|
||||
description: 'Bump type'
|
||||
description: "Bump type"
|
||||
required: false
|
||||
default: 'patch'
|
||||
default: "patch"
|
||||
type: choice
|
||||
options:
|
||||
- 'major'
|
||||
- 'minor'
|
||||
- 'patch'
|
||||
- "major"
|
||||
- "minor"
|
||||
- "patch"
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -26,7 +26,7 @@ jobs:
|
||||
fetch-tags: true # Fetch all tags (refs/tags)
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v5
|
||||
uses: astral-sh/setup-uv@v7
|
||||
- name: Set up Python
|
||||
run: uv python install
|
||||
with:
|
||||
@@ -66,7 +66,6 @@ jobs:
|
||||
USERNAME: ${{ github.repository_owner }}
|
||||
run: uv publish --publish-url https://git.theprivateserver.de/api/packages/$USERNAME/pypi/ -t ${{ secrets.TOKEN }}
|
||||
|
||||
|
||||
- name: Create release
|
||||
id: create_release
|
||||
if: ${{ github.event.inputs.github_release == 'true' }}
|
||||
|
||||
@@ -14,7 +14,7 @@ jobs:
|
||||
uses: actions/checkout@master
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v5
|
||||
uses: astral-sh/setup-uv@v7
|
||||
with:
|
||||
python-version-file: "pyproject.toml"
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ jobs:
|
||||
uses: actions/checkout@master
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v5
|
||||
uses: astral-sh/setup-uv@v7
|
||||
with:
|
||||
python-version-file: "pyproject.toml"
|
||||
|
||||
|
||||
7
.pre-commit-config.yaml
Normal file
7
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,7 @@
|
||||
repos:
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.6.9 # latest ruff version
|
||||
hooks:
|
||||
- id: ruff
|
||||
args: ["--fix"]
|
||||
- id: ruff-format
|
||||
@@ -3,30 +3,24 @@ name = "bibapi"
|
||||
version = "0.0.6"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
authors = [
|
||||
{ name = "WorldTeacher", email = "coding_contact@pm.me" }
|
||||
]
|
||||
authors = [{ name = "WorldTeacher", email = "coding_contact@pm.me" }]
|
||||
requires-python = ">=3.13"
|
||||
dependencies = [
|
||||
"regex>=2025.9.18",
|
||||
"requests>=2.32.5",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
# SRU API feature: for accessing library catalogs via SRU protocol
|
||||
sru = [
|
||||
"requests>=2.32.5",
|
||||
]
|
||||
sru = ["requests>=2.32.5"]
|
||||
|
||||
# Catalogue feature: web scraping local library catalog
|
||||
catalogue = [
|
||||
"requests>=2.32.5",
|
||||
"beautifulsoup4>=4.12.0",
|
||||
]
|
||||
catalogue = ["requests>=2.32.5", "beautifulsoup4>=4.12.0"]
|
||||
|
||||
webrequest = ["bibapi[catalogue]", "ratelimit>=2.2.0"]
|
||||
|
||||
# Install all features
|
||||
all = [
|
||||
"bibapi[sru,catalogue]",
|
||||
]
|
||||
all = ["bibapi[sru,catalogue]"]
|
||||
|
||||
[build-system]
|
||||
requires = ["uv_build >= 0.9.5, <0.10.0"]
|
||||
@@ -55,11 +49,25 @@ pre_commit_hooks = []
|
||||
post_commit_hooks = []
|
||||
|
||||
[dependency-groups]
|
||||
test = [
|
||||
dev = [
|
||||
"pylint>=4.0.3",
|
||||
"pytest-mock>=3.15.1",
|
||||
"types-pysocks>=1.7.1.20251001",
|
||||
"types-regex>=2025.9.18.20250921",
|
||||
"types-requests>=2.32.4.20250913",
|
||||
"mypy>=1.18.2",
|
||||
"pytest>=8.4.2",
|
||||
"pytest-cov>=7.0.0",
|
||||
"ratelimit>=2.2.0",
|
||||
"beautifulsoup4>=4.12.0",
|
||||
]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
python_files = ["test_*.py"]
|
||||
python_classes = ["Test*"]
|
||||
python_functions = ["test_*"]
|
||||
markers = [
|
||||
"integration: marks tests as integration tests (deselect with '-m \"not integration\"')",
|
||||
]
|
||||
|
||||
|
||||
@@ -1,18 +1,26 @@
|
||||
from .schemas.api_types import *
|
||||
from .schemas.api_types import (
|
||||
ALMASchema,
|
||||
DNBSchema,
|
||||
HBZSchema,
|
||||
HebisSchema,
|
||||
KOBVSchema,
|
||||
OEVKSchema,
|
||||
SWBSchema,
|
||||
)
|
||||
from .sru import Api as _Api
|
||||
|
||||
__all__ = [
|
||||
"SWB",
|
||||
"DNB",
|
||||
"KOBV",
|
||||
"HEBIS",
|
||||
"OEVK",
|
||||
"HBZ",
|
||||
"HEBIS",
|
||||
"KOBV",
|
||||
"OEVK",
|
||||
"SWB",
|
||||
]
|
||||
|
||||
|
||||
class SWB(_Api):
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
self.site = SWBSchema.NAME.value
|
||||
self.url = SWBSchema.URL.value
|
||||
self.prefix = SWBSchema.ARGSCHEMA.value
|
||||
@@ -21,7 +29,7 @@ class SWB(_Api):
|
||||
|
||||
|
||||
class DNB(_Api):
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
self.site = DNBSchema.NAME.value
|
||||
self.url = DNBSchema.URL.value
|
||||
self.prefix = DNBSchema.ARGSCHEMA.value
|
||||
@@ -29,7 +37,7 @@ class DNB(_Api):
|
||||
|
||||
|
||||
class KOBV(_Api):
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
self.site = KOBVSchema.NAME.value
|
||||
self.url = KOBVSchema.URL.value
|
||||
self.prefix = KOBVSchema.ARGSCHEMA.value
|
||||
@@ -38,7 +46,7 @@ class KOBV(_Api):
|
||||
|
||||
|
||||
class HEBIS(_Api):
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
self.site = HebisSchema.NAME.value
|
||||
self.url = HebisSchema.URL.value
|
||||
self.prefix = HebisSchema.ARGSCHEMA.value
|
||||
@@ -56,7 +64,7 @@ class HEBIS(_Api):
|
||||
|
||||
|
||||
class OEVK(_Api):
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
self.site = OEVKSchema.NAME.value
|
||||
self.url = OEVKSchema.URL.value
|
||||
self.prefix = OEVKSchema.ARGSCHEMA.value
|
||||
@@ -65,18 +73,18 @@ class OEVK(_Api):
|
||||
|
||||
|
||||
class HBZ(_Api):
|
||||
"""
|
||||
Small wrapper of the SRU API used to retrieve data from the HBZ libraries
|
||||
"""Small wrapper of the SRU API used to retrieve data from the HBZ libraries.
|
||||
|
||||
All fields are available [here](https://eu04.alma.exlibrisgroup.com/view/sru/49HBZ_NETWORK?version=1.2)
|
||||
|
||||
Schema
|
||||
------
|
||||
HBZSchema: <HBZSchema>
|
||||
HBZSchema: "HBZSchema"
|
||||
|
||||
query prefix: alma.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
self.site = HBZSchema.NAME.value
|
||||
self.url = HBZSchema.URL.value
|
||||
self.prefix = HBZSchema.ARGSCHEMA.value
|
||||
|
||||
502
src/bibapi/_transformers.py
Normal file
502
src/bibapi/_transformers.py
Normal file
@@ -0,0 +1,502 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import field as dataclass_field
|
||||
from typing import Any
|
||||
|
||||
from src.bibapi.schemas.bookdata import BookData
|
||||
|
||||
|
||||
@dataclass
|
||||
class Item:
|
||||
superlocation: str | None = dataclass_field(default_factory=str)
|
||||
status: str | None = dataclass_field(default_factory=str)
|
||||
availability: str | None = dataclass_field(default_factory=str)
|
||||
notes: str | None = dataclass_field(default_factory=str)
|
||||
limitation: str | None = dataclass_field(default_factory=str)
|
||||
duedate: str | None = dataclass_field(default_factory=str)
|
||||
id: str | None = dataclass_field(default_factory=str)
|
||||
item_id: str | None = dataclass_field(default_factory=str)
|
||||
ilslink: str | None = dataclass_field(default_factory=str)
|
||||
number: int | None = dataclass_field(default_factory=int)
|
||||
barcode: str | None = dataclass_field(default_factory=str)
|
||||
reserve: str | None = dataclass_field(default_factory=str)
|
||||
callnumber: str | None = dataclass_field(default_factory=str)
|
||||
department: str | None = dataclass_field(default_factory=str)
|
||||
locationhref: str | None = dataclass_field(default_factory=str)
|
||||
location: str | None = dataclass_field(default_factory=str)
|
||||
ktrl_nr: str | None = dataclass_field(default_factory=str)
|
||||
|
||||
def from_dict(self, data: dict[str, Any]) -> Item:
|
||||
"""Import data from dict."""
|
||||
data = data["items"]
|
||||
for entry in data:
|
||||
for key, value in entry.items():
|
||||
setattr(self, key, value)
|
||||
return self
|
||||
|
||||
|
||||
@dataclass
|
||||
class RDS_AVAIL_DATA:
|
||||
"""Class to store RDS availability data"""
|
||||
|
||||
library_sigil: str = dataclass_field(default_factory=str)
|
||||
items: list[Item] = dataclass_field(default_factory=list)
|
||||
|
||||
def import_from_dict(self, data: str):
|
||||
"""Import data from dict"""
|
||||
edata = json.loads(data)
|
||||
# library sigil is first key
|
||||
|
||||
self.library_sigil = str(list(edata.keys())[0])
|
||||
# get data from first key
|
||||
edata = edata[self.library_sigil]
|
||||
for location in edata:
|
||||
item = Item(superlocation=location).from_dict(edata[location])
|
||||
|
||||
self.items.append(item)
|
||||
return self
|
||||
|
||||
|
||||
@dataclass
|
||||
class RDS_DATA:
|
||||
"""Class to store RDS data"""
|
||||
|
||||
RDS_SIGNATURE: str = dataclass_field(default_factory=str)
|
||||
RDS_STATUS: str = dataclass_field(default_factory=str)
|
||||
RDS_LOCATION: str = dataclass_field(default_factory=str)
|
||||
RDS_URL: Any = dataclass_field(default_factory=str)
|
||||
RDS_HINT: Any = dataclass_field(default_factory=str)
|
||||
RDS_COMMENT: Any = dataclass_field(default_factory=str)
|
||||
RDS_HOLDING: Any = dataclass_field(default_factory=str)
|
||||
RDS_HOLDING_LEAK: Any = dataclass_field(default_factory=str)
|
||||
RDS_INTERN: Any = dataclass_field(default_factory=str)
|
||||
RDS_PROVENIENCE: Any = dataclass_field(default_factory=str)
|
||||
RDS_LOCAL_NOTATION: str = dataclass_field(default_factory=str)
|
||||
RDS_LEA: Any = dataclass_field(default_factory=str)
|
||||
|
||||
def import_from_dict(self, data: dict) -> RDS_DATA:
|
||||
"""Import data from dict"""
|
||||
for key, value in data.items():
|
||||
setattr(self, key, value)
|
||||
return self
|
||||
|
||||
|
||||
@dataclass
|
||||
class RDS_GENERIC_DATA:
|
||||
LibrarySigil: str = dataclass_field(default_factory=str)
|
||||
RDS_DATA: list[RDS_DATA] = dataclass_field(default_factory=list)
|
||||
|
||||
def import_from_dict(self, data: str) -> RDS_GENERIC_DATA:
|
||||
"""Import data from dict"""
|
||||
edata = json.loads(data)
|
||||
# library sigil is first key
|
||||
self.LibrarySigil = str(list(edata.keys())[0])
|
||||
# get data from first key
|
||||
edata = edata[self.LibrarySigil]
|
||||
for entry in edata:
|
||||
rds_data = RDS_DATA() # Create a new RDS_DATA instance
|
||||
# Populate the RDS_DATA instance from the entry
|
||||
# This assumes that the entry is a dictionary that matches the structure of the RDS_DATA class
|
||||
rds_data.import_from_dict(entry)
|
||||
self.RDS_DATA.append(rds_data) # Add the RDS_DATA instance to the list
|
||||
return self
|
||||
|
||||
|
||||
class BaseStruct:
|
||||
def __init__(self, **kwargs):
|
||||
for key, value in kwargs.items():
|
||||
setattr(self, key, value)
|
||||
|
||||
|
||||
class ARRAYData:
|
||||
def __init__(self, signature=None) -> None:
|
||||
self.signature = None
|
||||
|
||||
def transform(self, data: str) -> BookData:
|
||||
def _get_line(source: str, search: str) -> str:
|
||||
try:
|
||||
data = (
|
||||
source.split(search)[1]
|
||||
.split("\n")[0]
|
||||
.strip()
|
||||
.replace("=>", "")
|
||||
.strip()
|
||||
)
|
||||
return data
|
||||
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
def _get_list_entry(source: str, search: str, entry: str) -> str:
|
||||
try:
|
||||
source = source.replace("\t", "").replace("\r", "")
|
||||
source = source.split(search)[1].split(")")[0]
|
||||
return _get_line(source, entry).replace("=>", "").strip()
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
def _get_isbn(source: str) -> list:
|
||||
try:
|
||||
isbn = source.split("[isbn]")[1].split(")")[0].strip()
|
||||
isbn = isbn.split("(")[1]
|
||||
isbns = isbn.split("=>")
|
||||
ret = []
|
||||
for _ in isbns:
|
||||
# remove _ from list
|
||||
isb = _.split("\n")[0].strip()
|
||||
if isb == "":
|
||||
continue
|
||||
ret.append(isb) if isb not in ret else None
|
||||
return ret
|
||||
except Exception:
|
||||
isbn = []
|
||||
return isbn
|
||||
|
||||
def _get_signature(data):
|
||||
try:
|
||||
sig_data = (
|
||||
data.split("[loksatz]")[1]
|
||||
.split("[0] => ")[1]
|
||||
.split("\n")[0]
|
||||
.strip()
|
||||
)
|
||||
signature_data = eval(sig_data)
|
||||
return signature_data["signatur"]
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _get_author(data):
|
||||
try:
|
||||
array = data.split("[au_display_short]")[1].split(")\n")[0].strip()
|
||||
except Exception:
|
||||
return ""
|
||||
entries = array.split("\n")
|
||||
authors = []
|
||||
hg_present = False
|
||||
verf_present = False
|
||||
lines = []
|
||||
for entry in entries:
|
||||
if "=>" in entry:
|
||||
line = entry.split("=>")[1].strip()
|
||||
if "[HerausgeberIn]" in line:
|
||||
hg_present = True
|
||||
if "[VerfasserIn]" in line:
|
||||
verf_present = True
|
||||
lines.append(line)
|
||||
for line in lines:
|
||||
if hg_present and verf_present:
|
||||
if "[HerausgeberIn]" in line:
|
||||
authors.append(line.split("[")[0].strip())
|
||||
elif verf_present:
|
||||
if "[VerfasserIn]" in line:
|
||||
authors.append(line.split("[")[0].strip())
|
||||
else:
|
||||
pass
|
||||
return ";".join(authors)
|
||||
|
||||
def _get_title(data):
|
||||
titledata = None
|
||||
title = ""
|
||||
if "[ti_long]" in data:
|
||||
titledata = data.split("[ti_long]")[1].split(")\n")[0].strip()
|
||||
title = titledata.split("=>")[1].strip().split("/")[0].strip()
|
||||
if "[ti_long_f]" in data:
|
||||
titledata = data.split("[ti_long_f]")[1].split(")\n")[0].strip()
|
||||
title = titledata.split("=>")[1].strip().split("/")[0].strip()
|
||||
return title
|
||||
|
||||
def _get_adis_idn(data, signature):
|
||||
loksatz_match = re.search(
|
||||
r"\[loksatz\] => Array\s*\((.*?)\)",
|
||||
data,
|
||||
re.DOTALL,
|
||||
)
|
||||
if loksatz_match:
|
||||
loksatz_content = loksatz_match.group(1)
|
||||
|
||||
# Step 2: Extract JSON objects within the loksatz section
|
||||
json_objects = re.findall(r"{.*?}", loksatz_content, re.DOTALL)
|
||||
# Print each JSON object
|
||||
for obj in json_objects:
|
||||
data = eval(obj)
|
||||
if data["signatur"] == signature:
|
||||
return data["adis_idn"]
|
||||
|
||||
def _get_in_apparat(data):
|
||||
loksatz_match = re.search(
|
||||
r"\[loksatz\] => Array\s*\((.*?)\)",
|
||||
data,
|
||||
re.DOTALL,
|
||||
)
|
||||
if loksatz_match:
|
||||
loksatz_content = loksatz_match.group(1)
|
||||
|
||||
# Step 2: Extract JSON objects within the loksatz section
|
||||
json_objects = re.findall(r"{.*?}", loksatz_content, re.DOTALL)
|
||||
# Print each JSON object
|
||||
for obj in json_objects:
|
||||
data = eval(obj)
|
||||
if data["ausleihcode"] == "R" and data["standort"] == "40":
|
||||
return True
|
||||
return False
|
||||
|
||||
ppn = _get_line(data, "[kid]")
|
||||
title = _get_title(data).strip()
|
||||
author = _get_author(data)
|
||||
edition = _get_list_entry(data, "[ausgabe]", "[0]").replace(",", "")
|
||||
link = f"https://rds.ibs-bw.de/phfreiburg/link?kid={_get_line(data, '[kid]')}"
|
||||
isbn = _get_isbn(data)
|
||||
# [self._get_list_entry(data,"[isbn]","[0]"),self._get_list_entry(data,"[is]","[1]")],
|
||||
language = _get_list_entry(data, "[la_facet]", "[0]")
|
||||
publisher = _get_list_entry(data, "[pu]", "[0]")
|
||||
year = _get_list_entry(data, "[py_display]", "[0]")
|
||||
pages = _get_list_entry(data, "[umfang]", "[0]").split(":")[0].strip()
|
||||
signature = (
|
||||
self.signature if self.signature is not None else _get_signature(data)
|
||||
)
|
||||
|
||||
place = _get_list_entry(data, "[pp]", "[0]")
|
||||
adis_idn = _get_adis_idn(data, signature=signature)
|
||||
in_apparat = _get_in_apparat(data)
|
||||
return BookData(
|
||||
ppn=ppn,
|
||||
title=title,
|
||||
author=author,
|
||||
edition=edition,
|
||||
link=link,
|
||||
isbn=isbn,
|
||||
language=language,
|
||||
publisher=publisher,
|
||||
year=year,
|
||||
pages=pages,
|
||||
signature=signature,
|
||||
place=place,
|
||||
adis_idn=adis_idn,
|
||||
in_apparat=in_apparat,
|
||||
)
|
||||
|
||||
|
||||
class COinSData:
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
def transform(self, data: str) -> BookData:
|
||||
def _get_line(source: str, search: str) -> str:
|
||||
try:
|
||||
data = source.split(f"{search}=")[1] # .split("")[0].strip()
|
||||
return data.split("rft")[0].strip() if "rft" in data else data
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
return BookData(
|
||||
ppn=_get_line(data, "rft_id").split("=")[1],
|
||||
title=_get_line(data, "rft.btitle"),
|
||||
author=f"{_get_line(data, 'rft.aulast')}, {_get_line(data, 'rft.aufirst')}",
|
||||
edition=_get_line(data, "rft.edition"),
|
||||
link=_get_line(data, "rft_id"),
|
||||
isbn=_get_line(data, "rft.isbn"),
|
||||
publisher=_get_line(data, "rft.pub"),
|
||||
year=_get_line(data, "rft.date"),
|
||||
pages=_get_line(data, "rft.tpages").split(":")[0].strip(),
|
||||
)
|
||||
|
||||
|
||||
class RISData:
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
def transform(self, data: str) -> BookData:
|
||||
def _get_line(source: str, search: str) -> str:
|
||||
try:
|
||||
data = source.split(f"{search} - ")[1] # .split("")[0].strip()
|
||||
return data.split("\n")[0].strip() if "\n" in data else data
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
return BookData(
|
||||
ppn=_get_line(data, "DP").split("=")[1],
|
||||
title=_get_line(data, "TI"),
|
||||
signature=_get_line(data, "CN"),
|
||||
edition=_get_line(data, "ET").replace(",", ""),
|
||||
link=_get_line(data, "DP"),
|
||||
isbn=_get_line(data, "SN").split(","),
|
||||
author=_get_line(data, "AU").split("[")[0].strip(),
|
||||
language=_get_line(data, "LA"),
|
||||
publisher=_get_line(data, "PB"),
|
||||
year=_get_line(data, "PY"),
|
||||
pages=_get_line(data, "SP"),
|
||||
)
|
||||
|
||||
|
||||
class BibTeXData:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def transform(self, data: str) -> BookData:
|
||||
def _get_line(source: str, search: str) -> str:
|
||||
try:
|
||||
return (
|
||||
data.split(search)[1]
|
||||
.split("\n")[0]
|
||||
.strip()
|
||||
.split("=")[1]
|
||||
.strip()
|
||||
.replace("{", "")
|
||||
.replace("}", "")
|
||||
.replace(",", "")
|
||||
.replace("[", "")
|
||||
.replace("];", "")
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return ""
|
||||
|
||||
return BookData(
|
||||
ppn=None,
|
||||
title=_get_line(data, "title"),
|
||||
signature=_get_line(data, "bestand"),
|
||||
edition=_get_line(data, "edition"),
|
||||
isbn=_get_line(data, "isbn"),
|
||||
author=";".join(_get_line(data, "author").split(" and ")),
|
||||
language=_get_line(data, "language"),
|
||||
publisher=_get_line(data, "publisher"),
|
||||
year=_get_line(data, "year"),
|
||||
pages=_get_line(data, "pages"),
|
||||
)
|
||||
|
||||
|
||||
class RDSData:
|
||||
def __init__(self):
|
||||
self.retlist = []
|
||||
|
||||
def transform(self, data: str):
|
||||
# rds_availability = RDS_AVAIL_DATA()
|
||||
# rds_data = RDS_GENERIC_DATA()
|
||||
print(data)
|
||||
|
||||
def __get_raw_data(data: str) -> list:
|
||||
# create base data to be turned into pydantic classes
|
||||
data = data.split("RDS ----------------------------------")[1]
|
||||
edata = data.strip()
|
||||
edata = edata.split("\n", 9)[9]
|
||||
edata = edata.split("\n")[1:]
|
||||
entry_1 = edata[0]
|
||||
edata = edata[1:]
|
||||
entry_2 = "".join(edata)
|
||||
edata = []
|
||||
edata.append(entry_1)
|
||||
edata.append(entry_2)
|
||||
return edata
|
||||
|
||||
ret_data = __get_raw_data(data)
|
||||
# assign data[1] to RDS_AVAIL_DATA
|
||||
# assign data[0] to RDS_DATA
|
||||
self.rds_data = RDS_GENERIC_DATA().import_from_dict(ret_data[1])
|
||||
self.rds_availability = RDS_AVAIL_DATA().import_from_dict(ret_data[0])
|
||||
self.retlist.append(self.rds_availability)
|
||||
self.retlist.append(self.rds_data)
|
||||
return self
|
||||
|
||||
def return_data(self, option=None):
|
||||
if option == "rds_availability":
|
||||
return self.retlist[0]
|
||||
if option == "rds_data":
|
||||
return self.retlist[1]
|
||||
return {"rds_availability": self.retlist[0], "rds_data": self.retlist[1]}
|
||||
|
||||
|
||||
class DictToTable:
|
||||
def __init__(self):
|
||||
self.work_author = None
|
||||
self.section_author = None
|
||||
self.year = None
|
||||
self.edition = None
|
||||
self.work_title = None
|
||||
self.chapter_title = None
|
||||
self.location = None
|
||||
self.publisher = None
|
||||
self.signature = None
|
||||
self.type = None
|
||||
self.pages = None
|
||||
self.issue = None
|
||||
self.isbn = None
|
||||
|
||||
def makeResult(self):
|
||||
data = {
|
||||
"work_author": self.work_author,
|
||||
"section_author": self.section_author,
|
||||
"year": self.year,
|
||||
"edition": self.edition,
|
||||
"work_title": self.work_title,
|
||||
"chapter_title": self.chapter_title,
|
||||
"location": self.location,
|
||||
"publisher": self.publisher,
|
||||
"signature": self.signature,
|
||||
"issue": self.issue,
|
||||
"pages": self.pages,
|
||||
"isbn": self.isbn,
|
||||
"type": self.type,
|
||||
}
|
||||
data = {k: v for k, v in data.items() if v is not None}
|
||||
return data
|
||||
|
||||
def reset(self):
|
||||
for key in self.__dict__:
|
||||
setattr(self, key, None)
|
||||
|
||||
def transform(self, data: dict):
|
||||
mode = data["mode"]
|
||||
self.reset()
|
||||
if mode == "book":
|
||||
return self.book_assign(data)
|
||||
if mode == "hg":
|
||||
return self.hg_assign(data)
|
||||
if mode == "zs":
|
||||
return self.zs_assign(data)
|
||||
return None
|
||||
|
||||
def book_assign(self, data):
|
||||
self.type = "book"
|
||||
self.work_author = data["book_author"]
|
||||
self.signature = data["book_signature"]
|
||||
self.location = data["book_place"]
|
||||
self.year = data["book_year"]
|
||||
self.work_title = data["book_title"]
|
||||
self.edition = data["book_edition"]
|
||||
self.pages = data["book_pages"]
|
||||
self.publisher = data["book_publisher"]
|
||||
self.isbn = data["book_isbn"]
|
||||
return self.makeResult()
|
||||
|
||||
def hg_assign(self, data):
|
||||
self.type = "hg"
|
||||
self.section_author = data["hg_author"]
|
||||
self.work_author = data["hg_editor"]
|
||||
self.year = data["hg_year"]
|
||||
self.work_title = data["hg_title"]
|
||||
self.publisher = data["hg_publisher"]
|
||||
self.location = data["hg_place"]
|
||||
self.edition = data["hg_edition"]
|
||||
self.chapter_title = data["hg_chaptertitle"]
|
||||
self.pages = data["hg_pages"]
|
||||
self.signature = data["hg_signature"]
|
||||
self.isbn = data["hg_isbn"]
|
||||
return self.makeResult()
|
||||
|
||||
def zs_assign(self, data):
|
||||
self.type = "zs"
|
||||
self.section_author = data["zs_author"]
|
||||
self.chapter_title = data["zs_chapter_title"]
|
||||
self.location = data["zs_place"]
|
||||
self.issue = data["zs_issue"]
|
||||
self.pages = data["zs_pages"]
|
||||
self.publisher = data["zs_publisher"]
|
||||
self.isbn = data["zs_isbn"]
|
||||
|
||||
self.year = data["zs_year"]
|
||||
self.signature = data["zs_signature"]
|
||||
self.work_title = data["zs_title"]
|
||||
return self.makeResult()
|
||||
@@ -1,5 +1,3 @@
|
||||
from typing import List
|
||||
|
||||
import regex
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
@@ -33,11 +31,11 @@ class Catalogue:
|
||||
response = requests.get(link, timeout=self.timeout)
|
||||
return response.text
|
||||
|
||||
def get_book_links(self, searchterm: str) -> List[str]:
|
||||
def get_book_links(self, searchterm: str) -> list[str]:
|
||||
response = self.search_book(searchterm)
|
||||
soup = BeautifulSoup(response, "html.parser")
|
||||
links = soup.find_all("a", class_="title getFull")
|
||||
res: List[str] = []
|
||||
res: list[str] = []
|
||||
for link in links:
|
||||
res.append(BASE + link["href"]) # type: ignore
|
||||
return res
|
||||
@@ -186,7 +184,8 @@ class Catalogue:
|
||||
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
|
||||
).get_text(strip=True)
|
||||
book.isbn = isbn
|
||||
# from div col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_SCOPE get pages (second div in this div)
|
||||
# from div col-xs-12 col-md-5 col-lg-4 rds-dl-head
|
||||
# RDS_SCOPE get pages (second div in this div)
|
||||
pages = None
|
||||
pages_el = soup.find("div", class_="RDS_SCOPE")
|
||||
if pages_el:
|
||||
@@ -206,14 +205,14 @@ class Catalogue:
|
||||
# based on PPN, get title, people, edition, year, language, pages, isbn,
|
||||
link = f"https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{ppn}"
|
||||
result = self.search(link)
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
BeautifulSoup(result, "html.parser")
|
||||
|
||||
def get_ppn(self, searchterm: str) -> str | None:
|
||||
links = self.get_book_links(searchterm)
|
||||
ppn = None
|
||||
for link in links:
|
||||
result = self.search(link)
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
BeautifulSoup(result, "html.parser")
|
||||
ppn = link.split("/")[-1]
|
||||
if ppn and regex.match(r"^\d{8,10}[X\d]?$", ppn):
|
||||
return ppn
|
||||
@@ -328,3 +327,7 @@ class Catalogue:
|
||||
if link is None:
|
||||
return None
|
||||
return link.library_location
|
||||
|
||||
def check_book_exists(self, searchterm: str) -> bool:
|
||||
links = self.get_book_links(searchterm)
|
||||
return len(links) > 0
|
||||
|
||||
1
src/bibapi/schemas/__init__.py
Normal file
1
src/bibapi/schemas/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Schemas for the provided APIs."""
|
||||
@@ -1,6 +1,6 @@
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Optional, Union
|
||||
from typing import Any
|
||||
|
||||
import regex
|
||||
|
||||
@@ -12,9 +12,9 @@ class BookData:
|
||||
signature: str | None = None
|
||||
edition: str | None = None
|
||||
link: str | None = None
|
||||
isbn: Union[str, list[str], None] = field(default_factory=list[str])
|
||||
isbn: str | list[str] | None = field(default_factory=list[str])
|
||||
author: str | None = None
|
||||
language: Union[str, list[str], None] = field(default_factory=list)
|
||||
language: str | list[str] | None = field(default_factory=list)
|
||||
publisher: str | None = None
|
||||
place: str | None = None
|
||||
year: int | None = None
|
||||
@@ -23,9 +23,10 @@ class BookData:
|
||||
in_apparat: bool | None = False
|
||||
adis_idn: str | None = None
|
||||
old_book: Any | None = None
|
||||
media_type: str | None = None #
|
||||
media_type: str | None = None
|
||||
in_library: bool | None = None # whether the book is in the library or not
|
||||
libraries: list[str] | None = field(default_factory=list)
|
||||
medianr: int | None = None # media number
|
||||
|
||||
def __post_init__(self):
|
||||
self.library_location = (
|
||||
@@ -72,11 +73,10 @@ class BookData:
|
||||
key: value for key, value in self.__dict__.items() if value is not None
|
||||
}
|
||||
# remove old_book from data_dict
|
||||
if "old_book" in data_dict:
|
||||
del data_dict["old_book"]
|
||||
data_dict.pop("old_book", None)
|
||||
return json.dumps(data_dict, ensure_ascii=False)
|
||||
|
||||
def from_dataclass(self, dataclass: Optional[Any]) -> None:
|
||||
def from_dataclass(self, dataclass: Any | None) -> None:
|
||||
if dataclass is None:
|
||||
return
|
||||
for key, value in dataclass.__dict__.items():
|
||||
@@ -86,7 +86,6 @@ class BookData:
|
||||
if isinstance(self.media_type, str):
|
||||
if "Online" in self.pages:
|
||||
return "eBook"
|
||||
else:
|
||||
return "Druckausgabe"
|
||||
return None
|
||||
|
||||
@@ -114,7 +113,7 @@ class BookData:
|
||||
return self
|
||||
|
||||
@property
|
||||
def edition_number(self) -> Optional[int]:
|
||||
def edition_number(self) -> int | None:
|
||||
if self.edition is None:
|
||||
return 0
|
||||
match = regex.search(r"(\d+)", self.edition)
|
||||
|
||||
10
src/bibapi/schemas/errors.py
Normal file
10
src/bibapi/schemas/errors.py
Normal file
@@ -0,0 +1,10 @@
|
||||
class BibAPIError(Exception):
|
||||
"""Base class for all BibAPI errors."""
|
||||
|
||||
|
||||
class CatalogueError(BibAPIError):
|
||||
"""Raised when there is an error with the library catalogue API."""
|
||||
|
||||
|
||||
class NetworkError(BibAPIError):
|
||||
"""Raised when there is a network-related error."""
|
||||
@@ -1,5 +1,4 @@
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
# --- MARC XML structures ---
|
||||
@@ -20,14 +19,14 @@ class DataField:
|
||||
tag: str
|
||||
ind1: str = " "
|
||||
ind2: str = " "
|
||||
subfields: List[SubField] = field(default_factory=list)
|
||||
subfields: list[SubField] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class MarcRecord:
|
||||
leader: str
|
||||
controlfields: List[ControlField] = field(default_factory=list)
|
||||
datafields: List[DataField] = field(default_factory=list)
|
||||
controlfields: list[ControlField] = field(default_factory=list)
|
||||
datafields: list[DataField] = field(default_factory=list)
|
||||
|
||||
|
||||
# --- SRU record wrapper ---
|
||||
@@ -52,17 +51,17 @@ class EchoedSearchRequest:
|
||||
class SearchRetrieveResponse:
|
||||
version: str
|
||||
numberOfRecords: int
|
||||
records: List[Record] = field(default_factory=list)
|
||||
echoedSearchRetrieveRequest: Optional[EchoedSearchRequest] = None
|
||||
records: list[Record] = field(default_factory=list)
|
||||
echoedSearchRetrieveRequest: EchoedSearchRequest | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class FormattedResponse:
|
||||
title: str
|
||||
edition: Optional[str] = None
|
||||
publisher: Optional[str] = None
|
||||
year: Optional[str] = None
|
||||
authors: List[str] = field(default_factory=list)
|
||||
isbn: List[str] = field(default_factory=list)
|
||||
ppn: Optional[str] = None
|
||||
libraries: List[str] = field(default_factory=list)
|
||||
edition: str | None = None
|
||||
publisher: str | None = None
|
||||
year: str | None = None
|
||||
authors: list[str] = field(default_factory=list)
|
||||
isbn: list[str] = field(default_factory=list)
|
||||
ppn: str | None = None
|
||||
libraries: list[str] = field(default_factory=list)
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
import re
|
||||
import time
|
||||
import xml.etree.ElementTree as ET
|
||||
from collections.abc import Iterable
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, Union
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
from requests.adapters import HTTPAdapter
|
||||
@@ -24,7 +25,7 @@ MARC = "http://www.loc.gov/MARC21/slim"
|
||||
NS = {"zs": ZS, "marc": MARC}
|
||||
|
||||
|
||||
def _text(elem: Optional[ET.Element]) -> str:
|
||||
def _text(elem: ET.Element | None) -> str:
|
||||
return (elem.text or "") if elem is not None else ""
|
||||
|
||||
|
||||
@@ -36,32 +37,32 @@ def _req_text(parent: ET.Element, path: str) -> str:
|
||||
|
||||
|
||||
def parse_marc_record(record_el: ET.Element) -> MarcRecord:
|
||||
"""
|
||||
record_el is the <marc:record> element (default ns MARC in your sample)
|
||||
"""
|
||||
"""record_el is the <marc:record> element (default ns MARC in your sample)"""
|
||||
# leader
|
||||
leader_text = _req_text(record_el, "marc:leader")
|
||||
|
||||
# controlfields
|
||||
controlfields: List[ControlField] = []
|
||||
controlfields: list[ControlField] = []
|
||||
for cf in record_el.findall("marc:controlfield", NS):
|
||||
tag = cf.get("tag", "").strip()
|
||||
controlfields.append(ControlField(tag=tag, value=_text(cf)))
|
||||
|
||||
# datafields
|
||||
datafields: List[DataField] = []
|
||||
datafields: list[DataField] = []
|
||||
for df in record_el.findall("marc:datafield", NS):
|
||||
tag = df.get("tag", "").strip()
|
||||
ind1 = df.get("ind1") or " "
|
||||
ind2 = df.get("ind2") or " "
|
||||
subfields: List[SubField] = []
|
||||
subfields: list[SubField] = []
|
||||
for sf in df.findall("marc:subfield", NS):
|
||||
code = sf.get("code", "")
|
||||
subfields.append(SubField(code=code, value=_text(sf)))
|
||||
datafields.append(DataField(tag=tag, ind1=ind1, ind2=ind2, subfields=subfields))
|
||||
|
||||
return MarcRecord(
|
||||
leader=leader_text, controlfields=controlfields, datafields=datafields
|
||||
leader=leader_text,
|
||||
controlfields=controlfields,
|
||||
datafields=datafields,
|
||||
)
|
||||
|
||||
|
||||
@@ -92,7 +93,7 @@ def parse_record(zs_record_el: ET.Element) -> Record:
|
||||
)
|
||||
|
||||
|
||||
def parse_echoed_request(root: ET.Element) -> Optional[EchoedSearchRequest]:
|
||||
def parse_echoed_request(root: ET.Element) -> EchoedSearchRequest | None:
|
||||
el = root.find("zs:echoedSearchRetrieveRequest", NS)
|
||||
if el is None:
|
||||
return None
|
||||
@@ -119,7 +120,7 @@ def parse_echoed_request(root: ET.Element) -> Optional[EchoedSearchRequest]:
|
||||
|
||||
|
||||
def parse_search_retrieve_response(
|
||||
xml_str: Union[str, bytes],
|
||||
xml_str: str | bytes,
|
||||
) -> SearchRetrieveResponse:
|
||||
root = ET.fromstring(xml_str)
|
||||
|
||||
@@ -128,7 +129,7 @@ def parse_search_retrieve_response(
|
||||
numberOfRecords = int(_req_text(root, "zs:numberOfRecords") or "0")
|
||||
|
||||
records_parent = root.find("zs:records", NS)
|
||||
records: List[Record] = []
|
||||
records: list[Record] = []
|
||||
if records_parent is not None:
|
||||
for r in records_parent.findall("zs:record", NS):
|
||||
record = parse_record(r)
|
||||
@@ -150,9 +151,9 @@ def parse_search_retrieve_response(
|
||||
|
||||
def iter_datafields(
|
||||
rec: MarcRecord,
|
||||
tag: Optional[str] = None,
|
||||
ind1: Optional[str] = None,
|
||||
ind2: Optional[str] = None,
|
||||
tag: str | None = None,
|
||||
ind1: str | None = None,
|
||||
ind2: str | None = None,
|
||||
) -> Iterable[DataField]:
|
||||
"""Yield datafields, optionally filtered by tag/indicators."""
|
||||
for df in rec.datafields:
|
||||
@@ -170,11 +171,11 @@ def subfield_values(
|
||||
tag: str,
|
||||
code: str,
|
||||
*,
|
||||
ind1: Optional[str] = None,
|
||||
ind2: Optional[str] = None,
|
||||
) -> List[str]:
|
||||
ind1: str | None = None,
|
||||
ind2: str | None = None,
|
||||
) -> list[str]:
|
||||
"""All values for subfield `code` in every `tag` field (respecting indicators)."""
|
||||
out: List[str] = []
|
||||
out: list[str] = []
|
||||
for df in iter_datafields(rec, tag, ind1, ind2):
|
||||
out.extend(sf.value for sf in df.subfields if sf.code == code)
|
||||
return out
|
||||
@@ -185,10 +186,10 @@ def first_subfield_value(
|
||||
tag: str,
|
||||
code: str,
|
||||
*,
|
||||
ind1: Optional[str] = None,
|
||||
ind2: Optional[str] = None,
|
||||
default: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
ind1: str | None = None,
|
||||
ind2: str | None = None,
|
||||
default: str | None = None,
|
||||
) -> str | None:
|
||||
"""First value for subfield `code` in `tag` (respecting indicators)."""
|
||||
for df in iter_datafields(rec, tag, ind1, ind2):
|
||||
for sf in df.subfields:
|
||||
@@ -201,25 +202,24 @@ def find_datafields_with_subfields(
|
||||
rec: MarcRecord,
|
||||
tag: str,
|
||||
*,
|
||||
where_all: Optional[Dict[str, str]] = None,
|
||||
where_any: Optional[Dict[str, str]] = None,
|
||||
where_all: dict[str, str] | None = None,
|
||||
where_any: dict[str, str] | None = None,
|
||||
casefold: bool = False,
|
||||
ind1: Optional[str] = None,
|
||||
ind2: Optional[str] = None,
|
||||
) -> List[DataField]:
|
||||
"""
|
||||
Return datafields of `tag` whose subfields match constraints:
|
||||
ind1: str | None = None,
|
||||
ind2: str | None = None,
|
||||
) -> list[DataField]:
|
||||
"""Return datafields of `tag` whose subfields match constraints:
|
||||
- where_all: every (code -> exact value) must be present
|
||||
- where_any: at least one (code -> exact value) present
|
||||
Set `casefold=True` for case-insensitive comparison.
|
||||
"""
|
||||
where_all = where_all or {}
|
||||
where_any = where_any or {}
|
||||
matched: List[DataField] = []
|
||||
matched: list[DataField] = []
|
||||
|
||||
for df in iter_datafields(rec, tag, ind1, ind2):
|
||||
# Map code -> list of values (with optional casefold applied)
|
||||
vals: Dict[str, List[str]] = {}
|
||||
vals: dict[str, list[str]] = {}
|
||||
for sf in df.subfields:
|
||||
v = sf.value.casefold() if casefold else sf.value
|
||||
vals.setdefault(sf.code, []).append(v)
|
||||
@@ -246,8 +246,10 @@ def find_datafields_with_subfields(
|
||||
|
||||
|
||||
def controlfield_value(
|
||||
rec: MarcRecord, tag: str, default: Optional[str] = None
|
||||
) -> Optional[str]:
|
||||
rec: MarcRecord,
|
||||
tag: str,
|
||||
default: str | None = None,
|
||||
) -> str | None:
|
||||
"""Get the first controlfield value by tag (e.g., '001', '005')."""
|
||||
for cf in rec.controlfields:
|
||||
if cf.tag == tag:
|
||||
@@ -256,8 +258,10 @@ def controlfield_value(
|
||||
|
||||
|
||||
def datafields_value(
|
||||
data: List[DataField], code: str, default: Optional[str] = None
|
||||
) -> Optional[str]:
|
||||
data: list[DataField],
|
||||
code: str,
|
||||
default: str | None = None,
|
||||
) -> str | None:
|
||||
"""Get the first value for a specific subfield code in a list of datafields."""
|
||||
for df in data:
|
||||
for sf in df.subfields:
|
||||
@@ -267,8 +271,10 @@ def datafields_value(
|
||||
|
||||
|
||||
def datafield_value(
|
||||
df: DataField, code: str, default: Optional[str] = None
|
||||
) -> Optional[str]:
|
||||
df: DataField,
|
||||
code: str,
|
||||
default: str | None = None,
|
||||
) -> str | None:
|
||||
"""Get the first value for a specific subfield code in a datafield."""
|
||||
for sf in df.subfields:
|
||||
if sf.code == code:
|
||||
@@ -276,9 +282,8 @@ def datafield_value(
|
||||
return default
|
||||
|
||||
|
||||
def _smart_join_title(a: str, b: Optional[str]) -> str:
|
||||
"""
|
||||
Join 245 $a and $b with MARC-style punctuation.
|
||||
def _smart_join_title(a: str, b: str | None) -> str:
|
||||
"""Join 245 $a and $b with MARC-style punctuation.
|
||||
If $b is present, join with ' : ' unless either side already supplies punctuation.
|
||||
"""
|
||||
a = a.strip()
|
||||
@@ -293,7 +298,7 @@ def _smart_join_title(a: str, b: Optional[str]) -> str:
|
||||
def subfield_values_from_fields(
|
||||
fields: Iterable[DataField],
|
||||
code: str,
|
||||
) -> List[str]:
|
||||
) -> list[str]:
|
||||
"""All subfield values with given `code` across a list of DataField."""
|
||||
return [sf.value for df in fields for sf in df.subfields if sf.code == code]
|
||||
|
||||
@@ -301,8 +306,8 @@ def subfield_values_from_fields(
|
||||
def first_subfield_value_from_fields(
|
||||
fields: Iterable[DataField],
|
||||
code: str,
|
||||
default: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
default: str | None = None,
|
||||
) -> str | None:
|
||||
"""First subfield value with given `code` across a list of DataField."""
|
||||
for df in fields:
|
||||
for sf in df.subfields:
|
||||
@@ -314,12 +319,11 @@ def first_subfield_value_from_fields(
|
||||
def subfield_value_pairs_from_fields(
|
||||
fields: Iterable[DataField],
|
||||
code: str,
|
||||
) -> List[Tuple[DataField, str]]:
|
||||
"""
|
||||
Return (DataField, value) pairs for all subfields with `code`.
|
||||
) -> list[tuple[DataField, str]]:
|
||||
"""Return (DataField, value) pairs for all subfields with `code`.
|
||||
Useful if you need to know which field a value came from.
|
||||
"""
|
||||
out: List[Tuple[DataField, str]] = []
|
||||
out: list[tuple[DataField, str]] = []
|
||||
for df in fields:
|
||||
for sf in df.subfields:
|
||||
if sf.code == code:
|
||||
@@ -340,13 +344,17 @@ def book_from_marc(rec: MarcRecord, library_identifier: str) -> BookData:
|
||||
|
||||
# Signature = 924 where $9 == "Frei 129" → take that field's $g
|
||||
frei_fields = find_datafields_with_subfields(
|
||||
rec, "924", where_all={"9": "Frei 129"}
|
||||
rec,
|
||||
"924",
|
||||
where_all={"9": "Frei 129"},
|
||||
)
|
||||
signature = first_subfield_value_from_fields(frei_fields, "g")
|
||||
|
||||
# Year = 264 $c (prefer ind2="1" publication; fallback to any 264)
|
||||
year = first_subfield_value(rec, "264", "c", ind2="1") or first_subfield_value(
|
||||
rec, "264", "c"
|
||||
rec,
|
||||
"264",
|
||||
"c",
|
||||
)
|
||||
isbn = subfield_values(rec, "020", "a")
|
||||
mediatype = first_subfield_value(rec, "338", "a")
|
||||
@@ -378,10 +386,10 @@ RVK_ALLOWED = r"[A-Z0-9.\-\/]" # conservative char set typically seen in RVK no
|
||||
|
||||
|
||||
def find_newer_edition(
|
||||
swb_result: BookData, dnb_result: List[BookData]
|
||||
) -> Optional[List[BookData]]:
|
||||
"""
|
||||
New edition if:
|
||||
swb_result: BookData,
|
||||
dnb_result: list[BookData],
|
||||
) -> list[BookData] | None:
|
||||
"""New edition if:
|
||||
- year > swb.year OR
|
||||
- edition_number > swb.edition_number
|
||||
|
||||
@@ -393,7 +401,7 @@ def find_newer_edition(
|
||||
edition_number desc, best-signature-match desc, has-signature desc).
|
||||
"""
|
||||
|
||||
def norm_sig(s: Optional[str]) -> str:
|
||||
def norm_sig(s: str | None) -> str:
|
||||
if not s:
|
||||
return ""
|
||||
# normalize: lowercase, collapse whitespace, keep alnum + a few separators
|
||||
@@ -427,7 +435,7 @@ def find_newer_edition(
|
||||
swb_sig_norm = norm_sig(getattr(swb_result, "signature", None))
|
||||
|
||||
# 1) Filter to same-work AND newer
|
||||
candidates: List[BookData] = []
|
||||
candidates: list[BookData] = []
|
||||
for b in dnb_result:
|
||||
# Skip if both signatures exist and don't match (different work)
|
||||
b_sig = getattr(b, "signature", None)
|
||||
@@ -443,7 +451,7 @@ def find_newer_edition(
|
||||
return None
|
||||
|
||||
# 2) Dedupe by PPN, preferring signature (and matching signature if possible)
|
||||
by_ppn: dict[Optional[str], BookData] = {}
|
||||
by_ppn: dict[str | None, BookData] = {}
|
||||
for b in candidates:
|
||||
key = getattr(b, "ppn", None)
|
||||
prev = by_ppn.get(key)
|
||||
@@ -477,7 +485,7 @@ def find_newer_edition(
|
||||
|
||||
|
||||
class QueryTransformer:
|
||||
def __init__(self, api_schema: Type[Enum], arguments: Union[Iterable[str], str]):
|
||||
def __init__(self, api_schema: type[Enum], arguments: Iterable[str] | str):
|
||||
self.api_schema = api_schema
|
||||
if isinstance(arguments, str):
|
||||
self.arguments = [arguments]
|
||||
@@ -485,8 +493,8 @@ class QueryTransformer:
|
||||
self.arguments = arguments
|
||||
self.drop_empty = True
|
||||
|
||||
def transform(self) -> Dict[str, Any]:
|
||||
arguments: List[str] = []
|
||||
def transform(self) -> dict[str, Any]:
|
||||
arguments: list[str] = []
|
||||
schema = self.api_schema
|
||||
for arg in self.arguments:
|
||||
if "=" not in arg:
|
||||
@@ -497,16 +505,17 @@ class QueryTransformer:
|
||||
if hasattr(schema, key.upper()):
|
||||
api_key = getattr(schema, key.upper()).value
|
||||
if key.upper() == "AUTHOR" and hasattr(schema, "AUTHOR_SCHEMA"):
|
||||
author_schema = getattr(schema, "AUTHOR_SCHEMA").value
|
||||
author_schema = schema.AUTHOR_SCHEMA.value
|
||||
if author_schema == "SpaceAfterComma":
|
||||
value = value.replace(",", ", ")
|
||||
elif author_schema == "NoSpaceAfterComma":
|
||||
value = value.replace(", ", ",")
|
||||
value = value.replace(" ", " ")
|
||||
if key.upper() == "TITLE" and hasattr(
|
||||
schema, "ENCLOSE_TITLE_IN_QUOTES"
|
||||
schema,
|
||||
"ENCLOSE_TITLE_IN_QUOTES",
|
||||
):
|
||||
if getattr(schema, "ENCLOSE_TITLE_IN_QUOTES"):
|
||||
if schema.ENCLOSE_TITLE_IN_QUOTES:
|
||||
value = f'"{value}"'
|
||||
|
||||
arguments.append(f"{api_key}={value}")
|
||||
@@ -519,10 +528,10 @@ class Api:
|
||||
self,
|
||||
site: str,
|
||||
url: str,
|
||||
prefix: Type[Enum],
|
||||
prefix: type[Enum],
|
||||
library_identifier: str,
|
||||
notsupported_args: Optional[List[str]] = None,
|
||||
replace: Optional[Dict[str, str]] = None,
|
||||
notsupported_args: list[str] | None = None,
|
||||
replace: dict[str, str] | None = None,
|
||||
):
|
||||
self.site = site
|
||||
self.url = url
|
||||
@@ -554,7 +563,7 @@ class Api:
|
||||
# Best-effort cleanup
|
||||
self.close()
|
||||
|
||||
def get(self, query_args: Union[Iterable[str], str]) -> List[Record]:
|
||||
def get(self, query_args: Iterable[str] | str) -> list[Record]:
|
||||
start_time = time.monotonic()
|
||||
# if any query_arg ends with =, remove it
|
||||
if isinstance(query_args, str):
|
||||
@@ -566,7 +575,8 @@ class Api:
|
||||
if not any(qa.startswith(na + "=") for na in self.notsupported_args)
|
||||
]
|
||||
query_args = QueryTransformer(
|
||||
api_schema=self.prefix, arguments=query_args
|
||||
api_schema=self.prefix,
|
||||
arguments=query_args,
|
||||
).transform()
|
||||
query = "+and+".join(query_args)
|
||||
for old, new in self.replace.items():
|
||||
@@ -579,12 +589,12 @@ class Api:
|
||||
"Accept-Charset": "latin1,utf-8;q=0.7,*;q=0.3",
|
||||
}
|
||||
# Use persistent session, enforce 1 req/sec, and retry up to 5 times
|
||||
last_error: Optional[Exception] = None
|
||||
last_error: Exception | None = None
|
||||
for attempt in range(1, self._max_retries + 1):
|
||||
# Abort if overall timeout exceeded before starting attempt
|
||||
if time.monotonic() - start_time > self._overall_timeout_seconds:
|
||||
last_error = requests.exceptions.Timeout(
|
||||
f"Overall timeout {self._overall_timeout_seconds}s exceeded before attempt {attempt}"
|
||||
f"Overall timeout {self._overall_timeout_seconds}s exceeded before attempt {attempt}",
|
||||
)
|
||||
break
|
||||
# Enforce rate limit relative to last request end
|
||||
@@ -596,20 +606,22 @@ class Api:
|
||||
try:
|
||||
# Per-attempt read timeout capped at remaining overall budget (but at most 30s)
|
||||
remaining = max(
|
||||
0.0, self._overall_timeout_seconds - (time.monotonic() - start_time)
|
||||
0.0,
|
||||
self._overall_timeout_seconds - (time.monotonic() - start_time),
|
||||
)
|
||||
read_timeout = min(30.0, remaining if remaining > 0 else 0.001)
|
||||
resp = self._session.get(
|
||||
url, headers=headers, timeout=(3.05, read_timeout)
|
||||
url,
|
||||
headers=headers,
|
||||
timeout=(3.05, read_timeout),
|
||||
)
|
||||
self._last_request_time = time.monotonic()
|
||||
if resp.status_code == 200:
|
||||
# Parse using raw bytes (original behavior) to preserve encoding edge cases
|
||||
sr = parse_search_retrieve_response(resp.content)
|
||||
return sr.records
|
||||
else:
|
||||
last_error = Exception(
|
||||
f"Error fetching data from {self.site}: HTTP {resp.status_code} (attempt {attempt}/{self._max_retries})"
|
||||
f"Error fetching data from {self.site}: HTTP {resp.status_code} (attempt {attempt}/{self._max_retries})",
|
||||
)
|
||||
except requests.exceptions.ReadTimeout as e:
|
||||
last_error = e
|
||||
@@ -625,9 +637,9 @@ class Api:
|
||||
# If we exit the loop, all attempts failed
|
||||
raise last_error if last_error else Exception("Unknown request failure")
|
||||
|
||||
def getBooks(self, query_args: Union[Iterable[str], str]) -> List[BookData]:
|
||||
def getBooks(self, query_args: Iterable[str] | str) -> list[BookData]:
|
||||
try:
|
||||
records: List[Record] = self.get(query_args)
|
||||
records: list[Record] = self.get(query_args)
|
||||
except requests.exceptions.ReadTimeout:
|
||||
# Return a list with a single empty BookData object on read timeout
|
||||
return [BookData()]
|
||||
@@ -638,7 +650,7 @@ class Api:
|
||||
# Propagate other errors (could also choose to return empty list)
|
||||
raise
|
||||
# Avoid printing on hot paths; rely on logger if needed
|
||||
books: List[BookData] = []
|
||||
books: list[BookData] = []
|
||||
# extract title from query_args if present
|
||||
title = None
|
||||
for arg in query_args:
|
||||
|
||||
305
src/bibapi/webrequest.py
Normal file
305
src/bibapi/webrequest.py
Normal file
@@ -0,0 +1,305 @@
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# import sleep_and_retry decorator to retry requests
|
||||
from ratelimit import limits, sleep_and_retry
|
||||
|
||||
from src.bibapi._transformers import (
|
||||
RDS_AVAIL_DATA,
|
||||
RDS_GENERIC_DATA,
|
||||
ARRAYData,
|
||||
BibTeXData,
|
||||
COinSData,
|
||||
RDSData,
|
||||
RISData,
|
||||
)
|
||||
from src.bibapi.schemas.bookdata import BookData
|
||||
|
||||
API_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndexrecord/{}/"
|
||||
PPN_URL = "https://rds.ibs-bw.de/phfreiburg/opac/RDSIndex/Search?type0%5B%5D=allfields&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=au&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ti&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ct&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=isn&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=ta&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=co&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=py&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pp&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=pu&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=si&lookfor0%5B%5D={}&join=AND&bool0%5B%5D=AND&type0%5B%5D=zr&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND&type0%5B%5D=cc&lookfor0%5B%5D=&join=AND&bool0%5B%5D=AND"
|
||||
BASE = "https://rds.ibs-bw.de"
|
||||
TITLE = "RDS_TITLE"
|
||||
SIGNATURE = "RDS_SIGNATURE"
|
||||
EDITION = "RDS_EDITION"
|
||||
ISBN = "RDS_ISBN"
|
||||
AUTHOR = "RDS_PERSON"
|
||||
ALLOWED_IPS = [
|
||||
"193.197.140.245", # PHFR Internal
|
||||
]
|
||||
HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 \
|
||||
(HTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36",
|
||||
"Accept-Language": "en-US, en;q=0.5",
|
||||
}
|
||||
RATE_LIMIT = 20
|
||||
RATE_PERIOD = 30
|
||||
|
||||
|
||||
class TransformerType(Enum):
|
||||
"""Enum for possible Transformer types."""
|
||||
|
||||
ARRAY = "ARRAY"
|
||||
COinS = "COinS"
|
||||
BibTeX = "BibTeX"
|
||||
RIS = "RIS"
|
||||
RDS = "RDS"
|
||||
|
||||
|
||||
class WebRequest:
|
||||
def __init__(self) -> None:
|
||||
"""Request data from the web, and format it depending on the mode."""
|
||||
self.apparat = None
|
||||
self.use_any = False # use any book that matches the search term
|
||||
self.signature = None
|
||||
self.ppn = None
|
||||
self.data = None
|
||||
self.timeout = 5
|
||||
self.public_ip = None
|
||||
self._can_run()
|
||||
if self.public_ip not in ALLOWED_IPS:
|
||||
raise PermissionError("IP not allowed to access the requested data.")
|
||||
|
||||
def _can_run(self) -> None:
|
||||
"""Check if requests can be made."""
|
||||
try:
|
||||
# check public IP to see if the requested data can be accessed
|
||||
ip_response = requests.get("https://api.ipify.org", timeout=self.timeout)
|
||||
ip_response.raise_for_status()
|
||||
self.public_ip = ip_response.text
|
||||
except requests.exceptions.RequestException as e:
|
||||
raise ConnectionError("No internet connection") from e
|
||||
|
||||
if self.public_ip is None:
|
||||
raise ConnectionError("No internet connection")
|
||||
|
||||
@property
|
||||
def use_any_book(self):
|
||||
"""Use any book that matches the search term"""
|
||||
self.use_any = True
|
||||
return self
|
||||
|
||||
def set_apparat(self, apparat: int) -> "WebRequest":
|
||||
self.apparat = apparat
|
||||
if int(self.apparat) < 10:
|
||||
self.apparat = f"0{self.apparat}"
|
||||
return self
|
||||
|
||||
def get_ppn(self, signature: str) -> "WebRequest":
|
||||
self.signature = signature
|
||||
if "+" in signature:
|
||||
signature = signature.replace("+", "%2B")
|
||||
if "doi.org" in signature:
|
||||
signature = signature.split("/")[-1]
|
||||
self.ppn = signature
|
||||
return self
|
||||
|
||||
@sleep_and_retry
|
||||
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
|
||||
def search_book(self, searchterm: str) -> str:
|
||||
response = requests.get(PPN_URL.format(searchterm), timeout=self.timeout)
|
||||
return response.text
|
||||
|
||||
@sleep_and_retry
|
||||
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
|
||||
def search_ppn(self, ppn: str) -> str:
|
||||
response = requests.get(API_URL.format(ppn), timeout=self.timeout)
|
||||
return response.text
|
||||
|
||||
def get_book_links(self, searchterm: str) -> list[str]:
|
||||
response: str = self.search_book(searchterm) # type:ignore
|
||||
soup = BeautifulSoup(response, "html.parser")
|
||||
links = soup.find_all("a", class_="title getFull")
|
||||
res: list[str] = []
|
||||
for link in links:
|
||||
res.append(BASE + link["href"])
|
||||
return res
|
||||
|
||||
@sleep_and_retry
|
||||
@limits(calls=RATE_LIMIT, period=RATE_PERIOD)
|
||||
def search(self, link: str) -> str | None:
|
||||
try:
|
||||
response = requests.get(link, timeout=self.timeout)
|
||||
return response.text
|
||||
except requests.exceptions.RequestException:
|
||||
return None
|
||||
|
||||
def get_data(self) -> list[str] | None:
|
||||
links = self.get_book_links(self.ppn)
|
||||
return_data: list[str] = []
|
||||
for link in links:
|
||||
result: str = self.search(link) # type:ignore
|
||||
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
||||
# if found, return text of href
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
locations = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION")
|
||||
if locations:
|
||||
for location in locations:
|
||||
if "1. OG Semesterapparat" in location.text:
|
||||
pre_tag = soup.find_all("pre")
|
||||
return_data = []
|
||||
if pre_tag:
|
||||
for tag in pre_tag:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
return return_data
|
||||
item_location = location.find(
|
||||
"div",
|
||||
class_="col-xs-12 col-md-7 col-lg-8 rds-dl-panel",
|
||||
).text.strip()
|
||||
if self.use_any:
|
||||
pre_tag = soup.find_all("pre")
|
||||
if pre_tag:
|
||||
for tag in pre_tag:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
raise ValueError("No <pre> tag found")
|
||||
if f"Semesterapparat-{self.apparat}" in item_location:
|
||||
pre_tag = soup.find_all("pre")
|
||||
return_data = []
|
||||
if pre_tag:
|
||||
for tag in pre_tag:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
return return_data
|
||||
|
||||
return return_data
|
||||
|
||||
def get_data_elsa(self) -> list[str] | None:
|
||||
links = self.get_book_links(self.ppn)
|
||||
for link in links:
|
||||
result = self.search(link)
|
||||
# in result search for class col-xs-12 rds-dl RDS_LOCATION
|
||||
# if found, return text of href
|
||||
soup = BeautifulSoup(result, "html.parser")
|
||||
locations = soup.find_all("div", class_="col-xs-12 rds-dl RDS_LOCATION")
|
||||
if locations:
|
||||
for _ in locations:
|
||||
pre_tag = soup.find_all("pre")
|
||||
return_data = []
|
||||
if pre_tag:
|
||||
for tag in pre_tag:
|
||||
data = tag.text.strip()
|
||||
return_data.append(data)
|
||||
return return_data
|
||||
return None
|
||||
|
||||
|
||||
class BibTextTransformer:
|
||||
"""Transforms data from the web into a BibText format.
|
||||
|
||||
Valid Modes are ARRAY, COinS, BibTeX, RIS, RDS
|
||||
Raises:
|
||||
ValueError: Raised if mode is not in valid_modes
|
||||
"""
|
||||
|
||||
valid_modes = [
|
||||
TransformerType.ARRAY,
|
||||
TransformerType.COinS,
|
||||
TransformerType.BibTeX,
|
||||
TransformerType.RIS,
|
||||
TransformerType.RDS,
|
||||
]
|
||||
|
||||
def __init__(self, mode: TransformerType = TransformerType.ARRAY) -> None:
|
||||
self.mode = mode.value
|
||||
self.field = None
|
||||
self.signature = None
|
||||
if mode not in self.valid_modes:
|
||||
raise ValueError(f"Mode {mode} not valid")
|
||||
self.data = None
|
||||
# self.bookdata = BookData(**self.data)
|
||||
|
||||
def use_signature(self, signature: str) -> "BibTextTransformer":
|
||||
"""Use the exact signature to search for the book"""
|
||||
self.signature = signature
|
||||
return self
|
||||
|
||||
def get_data(self, data: list[str] | None = None) -> "BibTextTransformer":
|
||||
RIS_IDENT = "TY -"
|
||||
ARRAY_IDENT = "[kid]"
|
||||
COinS_IDENT = "ctx_ver"
|
||||
BIBTEX_IDENT = "@book"
|
||||
RDS_IDENT = "RDS ---------------------------------- "
|
||||
|
||||
if data is None:
|
||||
self.data = None
|
||||
return self
|
||||
|
||||
if self.mode == "RIS":
|
||||
for line in data:
|
||||
if RIS_IDENT in line:
|
||||
self.data = line
|
||||
elif self.mode == "ARRAY":
|
||||
for line in data:
|
||||
if ARRAY_IDENT in line:
|
||||
self.data = line
|
||||
elif self.mode == "COinS":
|
||||
for line in data:
|
||||
if COinS_IDENT in line:
|
||||
self.data = line
|
||||
elif self.mode == "BibTeX":
|
||||
for line in data:
|
||||
if BIBTEX_IDENT in line:
|
||||
self.data = line
|
||||
elif self.mode == "RDS":
|
||||
for line in data:
|
||||
if RDS_IDENT in line:
|
||||
self.data = line
|
||||
return self
|
||||
|
||||
def return_data(
|
||||
self,
|
||||
option: Any = None,
|
||||
) -> (
|
||||
BookData
|
||||
| None
|
||||
| RDS_GENERIC_DATA
|
||||
| RDS_AVAIL_DATA
|
||||
| dict[str, RDS_AVAIL_DATA | RDS_GENERIC_DATA]
|
||||
):
|
||||
"""Return Data to caller.
|
||||
|
||||
Args:
|
||||
option (string, optional): Option for RDS as there are two filetypes. Use rds_availability or rds_data. Anything else gives a dict of both responses. Defaults to None.
|
||||
|
||||
Returns:
|
||||
BookData: a dataclass containing data about the book
|
||||
|
||||
"""
|
||||
if self.data is None:
|
||||
return None
|
||||
match self.mode:
|
||||
case "ARRAY":
|
||||
return ARRAYData(self.signature).transform(self.data)
|
||||
case "COinS":
|
||||
return COinSData().transform(self.data)
|
||||
case "BibTeX":
|
||||
return BibTeXData().transform(self.data)
|
||||
case "RIS":
|
||||
return RISData().transform(self.data)
|
||||
case "RDS":
|
||||
return RDSData().transform(self.data).return_data(option)
|
||||
case _:
|
||||
return None
|
||||
|
||||
|
||||
def cover(isbn):
|
||||
test_url = f"https://www.buchhandel.de/cover/{isbn}/{isbn}-cover-m.jpg"
|
||||
data = requests.get(test_url, stream=True)
|
||||
return data.content
|
||||
|
||||
|
||||
def get_content(soup, css_class):
|
||||
return soup.find("div", class_=css_class).text.strip()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
link = "CU 8500 K64"
|
||||
data = WebRequest(71).get_ppn(link).get_data()
|
||||
bib = BibTextTransformer("ARRAY").get_data().return_data()
|
||||
1
tests/__init__.py
Normal file
1
tests/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Tests for the package."""
|
||||
@@ -1,17 +1,55 @@
|
||||
from typing import Callable, Optional
|
||||
"""Shared pytest fixtures for BibAPI tests."""
|
||||
|
||||
import pytest
|
||||
|
||||
from bibapi import sru
|
||||
|
||||
@pytest.fixture
|
||||
def sample_marc_record_xml() -> str:
|
||||
"""Sample MARC record XML for testing."""
|
||||
return """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<marc:record xmlns:marc="http://www.loc.gov/MARC21/slim">
|
||||
<marc:leader>00000nam a22000001i 4500</marc:leader>
|
||||
<marc:controlfield tag="001">123456789</marc:controlfield>
|
||||
<marc:controlfield tag="005">20230101120000.0</marc:controlfield>
|
||||
<marc:datafield tag="020" ind1=" " ind2=" ">
|
||||
<marc:subfield code="a">9783123456789</marc:subfield>
|
||||
</marc:datafield>
|
||||
<marc:datafield tag="041" ind1=" " ind2=" ">
|
||||
<marc:subfield code="a">ger</marc:subfield>
|
||||
</marc:datafield>
|
||||
<marc:datafield tag="245" ind1="1" ind2="0">
|
||||
<marc:subfield code="a">Test Book Title</marc:subfield>
|
||||
<marc:subfield code="b">A Subtitle</marc:subfield>
|
||||
</marc:datafield>
|
||||
<marc:datafield tag="250" ind1=" " ind2=" ">
|
||||
<marc:subfield code="a">2nd edition</marc:subfield>
|
||||
</marc:datafield>
|
||||
<marc:datafield tag="264" ind1=" " ind2="1">
|
||||
<marc:subfield code="a">Berlin</marc:subfield>
|
||||
<marc:subfield code="b">Test Publisher</marc:subfield>
|
||||
<marc:subfield code="c">2023</marc:subfield>
|
||||
</marc:datafield>
|
||||
<marc:datafield tag="300" ind1=" " ind2=" ">
|
||||
<marc:subfield code="a">456 pages</marc:subfield>
|
||||
</marc:datafield>
|
||||
<marc:datafield tag="338" ind1=" " ind2=" ">
|
||||
<marc:subfield code="a">Band</marc:subfield>
|
||||
</marc:datafield>
|
||||
<marc:datafield tag="700" ind1="1" ind2=" ">
|
||||
<marc:subfield code="a">Author, Test</marc:subfield>
|
||||
</marc:datafield>
|
||||
<marc:datafield tag="924" ind1=" " ind2=" ">
|
||||
<marc:subfield code="9">Frei 129</marc:subfield>
|
||||
<marc:subfield code="g">ABC 123</marc:subfield>
|
||||
<marc:subfield code="b">DE-Frei129</marc:subfield>
|
||||
</marc:datafield>
|
||||
</marc:record>"""
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_sru_xml() -> bytes:
|
||||
"""Return a small SRU searchRetrieveResponse (MARCXML) as bytes.
|
||||
|
||||
Tests can use this raw bytes payload to simulate SRU responses.
|
||||
"""
|
||||
xml = b"""<?xml version="1.0" encoding="UTF-8"?>
|
||||
def sample_sru_response_xml() -> bytes:
|
||||
"""Sample SRU searchRetrieveResponse XML for testing."""
|
||||
return b"""<?xml version="1.0" encoding="UTF-8"?>
|
||||
<zs:searchRetrieveResponse xmlns:zs="http://www.loc.gov/zing/srw/"
|
||||
xmlns:marc="http://www.loc.gov/MARC21/slim">
|
||||
<zs:version>1.1</zs:version>
|
||||
@@ -22,15 +60,35 @@ def sample_sru_xml() -> bytes:
|
||||
<zs:recordPacking>xml</zs:recordPacking>
|
||||
<zs:recordData>
|
||||
<marc:record>
|
||||
<marc:leader>-----nam a22</marc:leader>
|
||||
<marc:controlfield tag="001">PPN123</marc:controlfield>
|
||||
<marc:leader>00000nam a22</marc:leader>
|
||||
<marc:controlfield tag="001">123456789</marc:controlfield>
|
||||
<marc:datafield tag="020" ind1=" " ind2=" ">
|
||||
<marc:subfield code="a">9783123456789</marc:subfield>
|
||||
</marc:datafield>
|
||||
<marc:datafield tag="041" ind1=" " ind2=" ">
|
||||
<marc:subfield code="a">ger</marc:subfield>
|
||||
</marc:datafield>
|
||||
<marc:datafield tag="245" ind1=" " ind2=" ">
|
||||
<marc:subfield code="a">Example Title</marc:subfield>
|
||||
<marc:subfield code="b">Subtitle</marc:subfield>
|
||||
<marc:subfield code="a">Test Book</marc:subfield>
|
||||
</marc:datafield>
|
||||
<marc:datafield tag="250" ind1=" " ind2=" ">
|
||||
<marc:subfield code="a">1st edition</marc:subfield>
|
||||
</marc:datafield>
|
||||
<marc:datafield tag="264" ind1=" " ind2="1">
|
||||
<marc:subfield code="c">2001</marc:subfield>
|
||||
<marc:subfield code="b">Example Publisher</marc:subfield>
|
||||
<marc:subfield code="b">Publisher</marc:subfield>
|
||||
<marc:subfield code="c">2023</marc:subfield>
|
||||
</marc:datafield>
|
||||
<marc:datafield tag="300" ind1=" " ind2=" ">
|
||||
<marc:subfield code="a">200 pages</marc:subfield>
|
||||
</marc:datafield>
|
||||
<marc:datafield tag="338" ind1=" " ind2=" ">
|
||||
<marc:subfield code="a">Band</marc:subfield>
|
||||
</marc:datafield>
|
||||
<marc:datafield tag="700" ind1="1" ind2=" ">
|
||||
<marc:subfield code="a">Author, Test</marc:subfield>
|
||||
</marc:datafield>
|
||||
<marc:datafield tag="924" ind1=" " ind2=" ">
|
||||
<marc:subfield code="b">DE-Frei129</marc:subfield>
|
||||
</marc:datafield>
|
||||
</marc:record>
|
||||
</zs:recordData>
|
||||
@@ -39,70 +97,55 @@ def sample_sru_xml() -> bytes:
|
||||
</zs:records>
|
||||
<zs:echoedSearchRetrieveRequest>
|
||||
<zs:version>1.1</zs:version>
|
||||
<zs:query>pica.tit=Example</zs:query>
|
||||
<zs:maximumRecords>10</zs:maximumRecords>
|
||||
<zs:query>pica.tit=Test</zs:query>
|
||||
<zs:maximumRecords>100</zs:maximumRecords>
|
||||
<zs:recordPacking>xml</zs:recordPacking>
|
||||
<zs:recordSchema>marcxml</zs:recordSchema>
|
||||
</zs:echoedSearchRetrieveRequest>
|
||||
</zs:searchRetrieveResponse>
|
||||
"""
|
||||
return xml
|
||||
</zs:searchRetrieveResponse>"""
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sru_api_factory(monkeypatch) -> Callable[[str, Optional[bytes]], sru.Api]:
|
||||
"""Factory to create an `sru.Api` (or subclass) with network calls mocked.
|
||||
|
||||
Usage:
|
||||
def test_x(sru_api_factory, sample_sru_xml):
|
||||
api = sru_api_factory('SWB', sample_sru_xml)
|
||||
books = api.getBooks(['pica.tit=Example'])
|
||||
|
||||
The fixture monkeypatches requests.Session.get on the Api instance to return
|
||||
a fake Response with the provided bytes payload. If `response_bytes` is
|
||||
None the real network call will be performed (not recommended in unit tests).
|
||||
"""
|
||||
|
||||
def _make(site: str, response_bytes: Optional[bytes] = None) -> sru.Api:
|
||||
mapping = {"SWB": sru.SWB, "DNB": sru.Api}
|
||||
if site == "SWB":
|
||||
api = sru.SWB()
|
||||
elif site == "DNB":
|
||||
# DNB Api class is the base Api configured differently in sru module
|
||||
api = sru.Api(
|
||||
sru.DNBData.NAME.value,
|
||||
sru.DNBData.URL.value,
|
||||
sru.DNBData.ARGSCHEMA.value,
|
||||
)
|
||||
else:
|
||||
# allow custom site/url/prefix via tuple passed as site: (site, url, prefix)
|
||||
if isinstance(site, tuple) and len(site) == 3:
|
||||
api = sru.Api(site[0], site[1], site[2])
|
||||
else:
|
||||
raise ValueError("Unknown site for factory: %r" % (site,))
|
||||
|
||||
if response_bytes is not None:
|
||||
|
||||
class FakeResp:
|
||||
status_code = 200
|
||||
|
||||
def __init__(self, content: bytes):
|
||||
self.content = content
|
||||
|
||||
def fake_get(url, headers=None, timeout=None):
|
||||
return FakeResp(response_bytes)
|
||||
|
||||
# Patch only this instance's session.get
|
||||
monkeypatch.setattr(api._session, "get", fake_get)
|
||||
|
||||
return api
|
||||
|
||||
return _make
|
||||
|
||||
|
||||
import pytest
|
||||
def mock_catalogue_html() -> str:
|
||||
"""Sample HTML response from catalogue search."""
|
||||
return """<!DOCTYPE html>
|
||||
<html>
|
||||
<body>
|
||||
<a class="title getFull" href="/opac/record/123">Book Title</a>
|
||||
</body>
|
||||
</html>"""
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sru_data():
|
||||
return {"bib_id": 20735, "sigil": "Frei129"}
|
||||
def mock_catalogue_detail_html() -> str:
|
||||
"""Sample HTML response from catalogue book detail page."""
|
||||
return """<!DOCTYPE html>
|
||||
<html>
|
||||
<body>
|
||||
<div class="headline text">Test Book Title</div>
|
||||
<div class="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PPN"></div>
|
||||
<div class="col-xs-12 col-md-7 col-lg-8 rds-dl-panel">123456789</div>
|
||||
<div class="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_EDITION"></div>
|
||||
<div class="col-xs-12 col-md-7 col-lg-8 rds-dl-panel">2nd ed.</div>
|
||||
<div class="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON"></div>
|
||||
<div class="col-xs-12 col-md-7 col-lg-8 rds-dl-panel">
|
||||
<a href="#">Author One</a>
|
||||
<a href="#">Author Two</a>
|
||||
</div>
|
||||
<div class="panel-body">
|
||||
<div class="rds-dl RDS_SIGNATURE">
|
||||
<div class="rds-dl-panel">ABC 123</div>
|
||||
</div>
|
||||
<div class="rds-dl RDS_STATUS">
|
||||
<div class="rds-dl-panel">Available</div>
|
||||
</div>
|
||||
<div class="rds-dl RDS_LOCATION">
|
||||
<div class="rds-dl-panel">Main Library</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="RDS_ISBN"></div>
|
||||
<div class="col-xs-12 col-md-7 col-lg-8 rds-dl-panel">9783123456789</div>
|
||||
<div class="RDS_SCOPE"></div>
|
||||
<div class="col-xs-12 col-md-7 col-lg-8 rds-dl-panel">300 pages</div>
|
||||
</body>
|
||||
</html>"""
|
||||
|
||||
309
tests/test_catalogue.py
Normal file
309
tests/test_catalogue.py
Normal file
@@ -0,0 +1,309 @@
|
||||
"""Tests for the Catalogue class, which interacts with the library catalogue."""
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from bibapi.catalogue import Catalogue
|
||||
|
||||
|
||||
class TestCatalogue:
|
||||
"""Tests for the Catalogue class."""
|
||||
|
||||
def test_catalogue_initialization(self, mocker: MockerFixture):
|
||||
"""Test Catalogue initialization."""
|
||||
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||
catalogue = Catalogue()
|
||||
assert catalogue.timeout == 15
|
||||
|
||||
def test_catalogue_custom_timeout(self, mocker: MockerFixture):
|
||||
"""Test Catalogue initialization with custom timeout."""
|
||||
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||
catalogue = Catalogue(timeout=30)
|
||||
assert catalogue.timeout == 30
|
||||
|
||||
def test_check_book_exists(self, mocker: MockerFixture):
|
||||
"""Test the check_book_exists method of the Catalogue class."""
|
||||
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||
catalogue = Catalogue()
|
||||
|
||||
# Mock the get_book_links method to control its output
|
||||
mocker.patch.object(
|
||||
catalogue,
|
||||
"get_book_links",
|
||||
return_value=["link1", "link2"],
|
||||
)
|
||||
|
||||
# Test with a known existing book
|
||||
existing_book_searchterm = "1693321114"
|
||||
assert catalogue.check_book_exists(existing_book_searchterm) is True
|
||||
|
||||
# Change the mock to return an empty list for non-existing book
|
||||
mocker.patch.object(
|
||||
catalogue,
|
||||
"get_book_links",
|
||||
return_value=[],
|
||||
)
|
||||
|
||||
# Test with a known non-existing book
|
||||
non_existing_book_searchterm = "00000000009"
|
||||
assert catalogue.check_book_exists(non_existing_book_searchterm) is False
|
||||
|
||||
def test_no_connection_raises_error(self, mocker: MockerFixture):
|
||||
"""Test that a ConnectionError is raised with no internet connection."""
|
||||
# Mock the check_connection method to simulate no internet connection
|
||||
mocker.patch.object(
|
||||
Catalogue,
|
||||
"check_connection",
|
||||
return_value=False,
|
||||
)
|
||||
|
||||
with pytest.raises(ConnectionError, match="No internet connection available."):
|
||||
Catalogue()
|
||||
|
||||
def test_check_connection_success(self, mocker: MockerFixture):
|
||||
"""Test check_connection returns True on success."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
catalogue = Catalogue.__new__(Catalogue)
|
||||
catalogue.timeout = 15
|
||||
assert catalogue.check_connection() is True
|
||||
|
||||
def test_check_connection_failure(self, mocker: MockerFixture):
|
||||
"""Test check_connection handles request exception."""
|
||||
mocker.patch(
|
||||
"requests.get",
|
||||
side_effect=requests.exceptions.RequestException("Network error"),
|
||||
)
|
||||
|
||||
catalogue = Catalogue.__new__(Catalogue)
|
||||
catalogue.timeout = 15
|
||||
result = catalogue.check_connection()
|
||||
assert result is None # Returns None on exception
|
||||
|
||||
def test_search_book(self, mocker: MockerFixture):
|
||||
"""Test search_book method."""
|
||||
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||
mock_response = MagicMock()
|
||||
mock_response.text = "<html>search results</html>"
|
||||
mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
catalogue = Catalogue()
|
||||
result = catalogue.search_book("test search")
|
||||
assert result == "<html>search results</html>"
|
||||
|
||||
def test_search(self, mocker: MockerFixture):
|
||||
"""Test search method."""
|
||||
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||
mock_response = MagicMock()
|
||||
mock_response.text = "<html>detail page</html>"
|
||||
mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
catalogue = Catalogue()
|
||||
result = catalogue.search("https://example.com/book/123")
|
||||
assert result == "<html>detail page</html>"
|
||||
|
||||
def test_get_book_links(self, mocker: MockerFixture, mock_catalogue_html):
|
||||
"""Test get_book_links method."""
|
||||
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||
mocker.patch.object(
|
||||
Catalogue,
|
||||
"search_book",
|
||||
return_value=mock_catalogue_html,
|
||||
)
|
||||
|
||||
catalogue = Catalogue()
|
||||
links = catalogue.get_book_links("test search")
|
||||
|
||||
assert len(links) == 1
|
||||
assert "https://rds.ibs-bw.de/opac/record/123" in links[0]
|
||||
|
||||
def test_in_library_with_ppn(self, mocker: MockerFixture):
|
||||
"""Test in_library method with valid PPN."""
|
||||
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||
mocker.patch.object(
|
||||
Catalogue,
|
||||
"get_book_links",
|
||||
return_value=["link1"],
|
||||
)
|
||||
|
||||
catalogue = Catalogue()
|
||||
assert catalogue.in_library("123456789") is True
|
||||
|
||||
def test_in_library_without_ppn(self, mocker: MockerFixture):
|
||||
"""Test in_library method with None PPN."""
|
||||
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||
|
||||
catalogue = Catalogue()
|
||||
assert catalogue.in_library(None) is False
|
||||
|
||||
def test_in_library_not_found(self, mocker: MockerFixture):
|
||||
"""Test in_library method when book not found."""
|
||||
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||
mocker.patch.object(
|
||||
Catalogue,
|
||||
"get_book_links",
|
||||
return_value=[],
|
||||
)
|
||||
|
||||
catalogue = Catalogue()
|
||||
assert catalogue.in_library("nonexistent") is False
|
||||
|
||||
def test_get_location_none_ppn(self, mocker: MockerFixture):
|
||||
"""Test get_location method with None PPN."""
|
||||
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||
|
||||
catalogue = Catalogue()
|
||||
assert catalogue.get_location(None) is None
|
||||
|
||||
def test_get_location_not_found(self, mocker: MockerFixture):
|
||||
"""Test get_location when book not found."""
|
||||
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||
mocker.patch.object(Catalogue, "get_book", return_value=None)
|
||||
|
||||
catalogue = Catalogue()
|
||||
assert catalogue.get_location("123") is None
|
||||
|
||||
def test_get_ppn(self, mocker: MockerFixture):
|
||||
"""Test get_ppn method with valid PPN format."""
|
||||
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||
mocker.patch.object(
|
||||
Catalogue,
|
||||
"get_book_links",
|
||||
return_value=["https://example.com/opac/record/1234567890"],
|
||||
)
|
||||
mocker.patch.object(Catalogue, "search", return_value="<html></html>")
|
||||
|
||||
catalogue = Catalogue()
|
||||
ppn = catalogue.get_ppn("test")
|
||||
assert ppn == "1234567890"
|
||||
|
||||
def test_get_ppn_with_x(self, mocker: MockerFixture):
|
||||
"""Test get_ppn method with PPN ending in X."""
|
||||
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||
mocker.patch.object(
|
||||
Catalogue,
|
||||
"get_book_links",
|
||||
return_value=["https://example.com/opac/record/123456789X"],
|
||||
)
|
||||
mocker.patch.object(Catalogue, "search", return_value="<html></html>")
|
||||
|
||||
catalogue = Catalogue()
|
||||
ppn = catalogue.get_ppn("test")
|
||||
assert ppn == "123456789X"
|
||||
|
||||
def test_get_semesterapparat_number(self, mocker: MockerFixture):
|
||||
"""Test get_semesterapparat_number method."""
|
||||
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||
mocker.patch.object(
|
||||
Catalogue,
|
||||
"get_book_links",
|
||||
return_value=["https://example.com/book"],
|
||||
)
|
||||
|
||||
html = """<html>
|
||||
<div class="col-xs-12 rds-dl RDS_LOCATION">
|
||||
Semesterapparat-42
|
||||
</div>
|
||||
</html>"""
|
||||
mocker.patch.object(Catalogue, "search", return_value=html)
|
||||
|
||||
catalogue = Catalogue()
|
||||
result = catalogue.get_semesterapparat_number("test")
|
||||
assert result == 42
|
||||
|
||||
def test_get_semesterapparat_number_handbibliothek(self, mocker: MockerFixture):
|
||||
"""Test get_semesterapparat_number with Handbibliothek location."""
|
||||
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||
mocker.patch.object(
|
||||
Catalogue,
|
||||
"get_book_links",
|
||||
return_value=["https://example.com/book"],
|
||||
)
|
||||
|
||||
html = """<html>
|
||||
<div class="col-xs-12 rds-dl RDS_LOCATION">
|
||||
Floor 1
|
||||
|
||||
Handbibliothek-Reference
|
||||
</div>
|
||||
</html>"""
|
||||
mocker.patch.object(Catalogue, "search", return_value=html)
|
||||
|
||||
catalogue = Catalogue()
|
||||
result = catalogue.get_semesterapparat_number("test")
|
||||
assert "Reference" in str(result) or "Handbibliothek" in str(result)
|
||||
|
||||
def test_get_semesterapparat_number_not_found(self, mocker: MockerFixture):
|
||||
"""Test get_semesterapparat_number when not found."""
|
||||
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||
mocker.patch.object(Catalogue, "get_book_links", return_value=[])
|
||||
|
||||
catalogue = Catalogue()
|
||||
result = catalogue.get_semesterapparat_number("test")
|
||||
assert result == 0
|
||||
|
||||
def test_get_author(self, mocker: MockerFixture):
|
||||
"""Test get_author method."""
|
||||
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||
mocker.patch.object(
|
||||
Catalogue,
|
||||
"get_book_links",
|
||||
return_value=["https://example.com/book"],
|
||||
)
|
||||
|
||||
html = """<html>
|
||||
<div class="col-xs-12 col-md-5 col-lg-4 rds-dl-head RDS_PERSON"></div>
|
||||
<div class="col-xs-12 col-md-7 col-lg-8 rds-dl-panel">
|
||||
<a href="#">Author One</a>
|
||||
<a href="#">Author Two</a>
|
||||
</div>
|
||||
</html>"""
|
||||
mocker.patch.object(Catalogue, "search", return_value=html)
|
||||
|
||||
catalogue = Catalogue()
|
||||
author = catalogue.get_author("kid:123")
|
||||
assert "Author One" in author
|
||||
assert "Author Two" in author
|
||||
assert "; " in author # Separator
|
||||
|
||||
def test_get_signature(self, mocker: MockerFixture):
|
||||
"""Test get_signature method."""
|
||||
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||
mocker.patch.object(
|
||||
Catalogue,
|
||||
"get_book_links",
|
||||
return_value=["https://example.com/book"],
|
||||
)
|
||||
|
||||
html = """<html>
|
||||
<div class="panel-body">
|
||||
<div class="rds-dl RDS_SIGNATURE">
|
||||
<div class="rds-dl-panel">ABC 123</div>
|
||||
</div>
|
||||
<div class="rds-dl RDS_STATUS">
|
||||
<div class="rds-dl-panel">Available</div>
|
||||
</div>
|
||||
<div class="rds-dl RDS_LOCATION">
|
||||
<div class="rds-dl-panel">Semesterapparat-1</div>
|
||||
</div>
|
||||
</div>
|
||||
</html>"""
|
||||
mocker.patch.object(Catalogue, "search", return_value=html)
|
||||
|
||||
catalogue = Catalogue()
|
||||
signature = catalogue.get_signature("9783123456789")
|
||||
assert signature == "ABC 123"
|
||||
|
||||
def test_get_signature_not_found(self, mocker: MockerFixture):
|
||||
"""Test get_signature when not found."""
|
||||
mocker.patch.object(Catalogue, "check_connection", return_value=True)
|
||||
mocker.patch.object(Catalogue, "get_book_links", return_value=[])
|
||||
|
||||
catalogue = Catalogue()
|
||||
signature = catalogue.get_signature("nonexistent")
|
||||
assert signature is None
|
||||
112
tests/test_init.py
Normal file
112
tests/test_init.py
Normal file
@@ -0,0 +1,112 @@
|
||||
"""Tests for the __init__.py wrapper classes."""
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from bibapi import DNB, HBZ, HEBIS, KOBV, OEVK, SWB
|
||||
from bibapi.schemas.api_types import (
|
||||
ALMASchema,
|
||||
DublinCoreSchema,
|
||||
PicaSchema,
|
||||
)
|
||||
|
||||
|
||||
class TestSWBWrapper:
|
||||
"""Tests for the SWB wrapper class."""
|
||||
|
||||
def test_swb_initialization(self):
|
||||
"""Test SWB initializes with correct config."""
|
||||
api = SWB()
|
||||
assert api.site == "SWB"
|
||||
assert "sru.k10plus.de" in api.url
|
||||
assert api.prefix == PicaSchema
|
||||
assert api.library_identifier == "924$b"
|
||||
api.close()
|
||||
|
||||
@patch.object(requests.Session, "get")
|
||||
def test_swb_getbooks(self, mock_get):
|
||||
"""Test SWB getBooks method."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.content = b"""<?xml version="1.0"?>
|
||||
<zs:searchRetrieveResponse xmlns:zs="http://www.loc.gov/zing/srw/">
|
||||
<zs:version>1.1</zs:version>
|
||||
<zs:numberOfRecords>0</zs:numberOfRecords>
|
||||
</zs:searchRetrieveResponse>"""
|
||||
mock_get.return_value = mock_response
|
||||
|
||||
api = SWB()
|
||||
books = api.getBooks(["TITLE=Test"])
|
||||
assert isinstance(books, list)
|
||||
api.close()
|
||||
|
||||
|
||||
class TestDNBWrapper:
|
||||
"""Tests for the DNB wrapper class."""
|
||||
|
||||
def test_dnb_initialization(self):
|
||||
"""Test DNB initializes with correct config.
|
||||
|
||||
Note: DNB class has a bug - it doesn't set library_identifier before
|
||||
calling super().__init__. This test documents the bug.
|
||||
"""
|
||||
# DNB has a bug - library_identifier is not set
|
||||
with pytest.raises(AttributeError, match="library_identifier"):
|
||||
api = DNB()
|
||||
|
||||
|
||||
class TestKOBVWrapper:
|
||||
"""Tests for the KOBV wrapper class."""
|
||||
|
||||
def test_kobv_initialization(self):
|
||||
"""Test KOBV initializes with correct config."""
|
||||
api = KOBV()
|
||||
assert api.site == "KOBV"
|
||||
assert "sru.kobv.de" in api.url
|
||||
assert api.prefix == DublinCoreSchema
|
||||
assert api.library_identifier == "924$b"
|
||||
api.close()
|
||||
|
||||
|
||||
class TestHEBISWrapper:
|
||||
"""Tests for the HEBIS wrapper class."""
|
||||
|
||||
def test_hebis_initialization(self):
|
||||
"""Test HEBIS initializes with correct config."""
|
||||
api = HEBIS()
|
||||
assert api.site == "HEBIS"
|
||||
assert "sru.hebis.de" in api.url
|
||||
assert api.prefix == PicaSchema
|
||||
assert api.library_identifier == "924$b"
|
||||
# HEBIS has specific replace patterns
|
||||
assert " " in api.replace
|
||||
# HEBIS has unsupported args
|
||||
assert "YEAR" in api.notsupported_args
|
||||
api.close()
|
||||
|
||||
|
||||
class TestOEVKWrapper:
|
||||
"""Tests for the OEVK wrapper class."""
|
||||
|
||||
def test_oevk_initialization(self):
|
||||
"""Test OEVK initializes with correct config."""
|
||||
api = OEVK()
|
||||
assert api.site == "OEVK"
|
||||
assert api.prefix == PicaSchema
|
||||
assert api.library_identifier == "924$b"
|
||||
api.close()
|
||||
|
||||
|
||||
class TestHBZWrapper:
|
||||
"""Tests for the HBZ wrapper class."""
|
||||
|
||||
def test_hbz_initialization(self):
|
||||
"""Test HBZ initializes with correct config."""
|
||||
api = HBZ()
|
||||
assert api.site == "HBZ"
|
||||
assert "alma.exlibrisgroup.com" in api.url
|
||||
assert api.prefix == ALMASchema
|
||||
assert api.library_identifier == "852$a"
|
||||
api.close()
|
||||
486
tests/test_marcxml_parser.py
Normal file
486
tests/test_marcxml_parser.py
Normal file
@@ -0,0 +1,486 @@
|
||||
"""Tests for MARCXML parsing functions in sru.py."""
|
||||
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
import pytest
|
||||
|
||||
from bibapi.schemas.marcxml import (
|
||||
DataField,
|
||||
SubField,
|
||||
)
|
||||
from bibapi.sru import (
|
||||
_smart_join_title,
|
||||
_text,
|
||||
controlfield_value,
|
||||
datafield_value,
|
||||
datafields_value,
|
||||
find_datafields_with_subfields,
|
||||
first_subfield_value,
|
||||
first_subfield_value_from_fields,
|
||||
iter_datafields,
|
||||
parse_marc_record,
|
||||
parse_search_retrieve_response,
|
||||
subfield_values,
|
||||
subfield_values_from_fields,
|
||||
)
|
||||
|
||||
# --- Fixtures for sample XML data ---
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def minimal_marc_xml() -> str:
|
||||
"""Minimal MARC record XML string."""
|
||||
return """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<marc:record xmlns:marc="http://www.loc.gov/MARC21/slim">
|
||||
<marc:leader>00000nam a22000001i 4500</marc:leader>
|
||||
<marc:controlfield tag="001">PPN12345</marc:controlfield>
|
||||
<marc:controlfield tag="005">20230101120000.0</marc:controlfield>
|
||||
<marc:datafield tag="245" ind1="1" ind2="0">
|
||||
<marc:subfield code="a">Test Title</marc:subfield>
|
||||
<marc:subfield code="b">A Subtitle</marc:subfield>
|
||||
</marc:datafield>
|
||||
</marc:record>"""
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def full_marc_xml() -> str:
|
||||
"""More complete MARC record for testing."""
|
||||
return """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<marc:record xmlns:marc="http://www.loc.gov/MARC21/slim">
|
||||
<marc:leader>00000nam a22000001i 4500</marc:leader>
|
||||
<marc:controlfield tag="001">PPN98765</marc:controlfield>
|
||||
<marc:controlfield tag="005">20231215150000.0</marc:controlfield>
|
||||
<marc:controlfield tag="008">230101s2023 gw 000 0 ger d</marc:controlfield>
|
||||
<marc:datafield tag="020" ind1=" " ind2=" ">
|
||||
<marc:subfield code="a">9783123456789</marc:subfield>
|
||||
</marc:datafield>
|
||||
<marc:datafield tag="020" ind1=" " ind2=" ">
|
||||
<marc:subfield code="a">9783987654321</marc:subfield>
|
||||
</marc:datafield>
|
||||
<marc:datafield tag="041" ind1=" " ind2=" ">
|
||||
<marc:subfield code="a">ger</marc:subfield>
|
||||
<marc:subfield code="a">eng</marc:subfield>
|
||||
</marc:datafield>
|
||||
<marc:datafield tag="245" ind1="1" ind2="0">
|
||||
<marc:subfield code="a">Comprehensive Test Book</marc:subfield>
|
||||
<marc:subfield code="b">With Many Details</marc:subfield>
|
||||
<marc:subfield code="c">by Author Name</marc:subfield>
|
||||
</marc:datafield>
|
||||
<marc:datafield tag="250" ind1=" " ind2=" ">
|
||||
<marc:subfield code="a">3rd edition</marc:subfield>
|
||||
</marc:datafield>
|
||||
<marc:datafield tag="264" ind1=" " ind2="1">
|
||||
<marc:subfield code="a">Berlin</marc:subfield>
|
||||
<marc:subfield code="b">Test Publisher</marc:subfield>
|
||||
<marc:subfield code="c">2023</marc:subfield>
|
||||
</marc:datafield>
|
||||
<marc:datafield tag="300" ind1=" " ind2=" ">
|
||||
<marc:subfield code="a">456 pages</marc:subfield>
|
||||
</marc:datafield>
|
||||
<marc:datafield tag="338" ind1=" " ind2=" ">
|
||||
<marc:subfield code="a">Band</marc:subfield>
|
||||
</marc:datafield>
|
||||
<marc:datafield tag="700" ind1="1" ind2=" ">
|
||||
<marc:subfield code="a">Author, First</marc:subfield>
|
||||
</marc:datafield>
|
||||
<marc:datafield tag="700" ind1="1" ind2=" ">
|
||||
<marc:subfield code="a">Author, Second</marc:subfield>
|
||||
</marc:datafield>
|
||||
<marc:datafield tag="924" ind1=" " ind2=" ">
|
||||
<marc:subfield code="9">Frei 129</marc:subfield>
|
||||
<marc:subfield code="g">ABC 123</marc:subfield>
|
||||
<marc:subfield code="b">DE-Frei129</marc:subfield>
|
||||
</marc:datafield>
|
||||
</marc:record>"""
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sru_response_xml() -> bytes:
|
||||
"""Complete SRU searchRetrieveResponse XML."""
|
||||
return b"""<?xml version="1.0" encoding="UTF-8"?>
|
||||
<zs:searchRetrieveResponse xmlns:zs="http://www.loc.gov/zing/srw/"
|
||||
xmlns:marc="http://www.loc.gov/MARC21/slim">
|
||||
<zs:version>1.1</zs:version>
|
||||
<zs:numberOfRecords>2</zs:numberOfRecords>
|
||||
<zs:records>
|
||||
<zs:record>
|
||||
<zs:recordSchema>marcxml</zs:recordSchema>
|
||||
<zs:recordPacking>xml</zs:recordPacking>
|
||||
<zs:recordData>
|
||||
<marc:record>
|
||||
<marc:leader>00000nam a22</marc:leader>
|
||||
<marc:controlfield tag="001">PPN001</marc:controlfield>
|
||||
<marc:datafield tag="245" ind1=" " ind2=" ">
|
||||
<marc:subfield code="a">First Book</marc:subfield>
|
||||
</marc:datafield>
|
||||
</marc:record>
|
||||
</zs:recordData>
|
||||
<zs:recordPosition>1</zs:recordPosition>
|
||||
</zs:record>
|
||||
<zs:record>
|
||||
<zs:recordSchema>marcxml</zs:recordSchema>
|
||||
<zs:recordPacking>xml</zs:recordPacking>
|
||||
<zs:recordData>
|
||||
<marc:record>
|
||||
<marc:leader>00000nam a22</marc:leader>
|
||||
<marc:controlfield tag="001">PPN002</marc:controlfield>
|
||||
<marc:datafield tag="245" ind1=" " ind2=" ">
|
||||
<marc:subfield code="a">Second Book</marc:subfield>
|
||||
</marc:datafield>
|
||||
</marc:record>
|
||||
</zs:recordData>
|
||||
<zs:recordPosition>2</zs:recordPosition>
|
||||
</zs:record>
|
||||
</zs:records>
|
||||
<zs:echoedSearchRetrieveRequest>
|
||||
<zs:version>1.1</zs:version>
|
||||
<zs:query>pica.tit=Test</zs:query>
|
||||
<zs:maximumRecords>100</zs:maximumRecords>
|
||||
<zs:recordPacking>xml</zs:recordPacking>
|
||||
<zs:recordSchema>marcxml</zs:recordSchema>
|
||||
</zs:echoedSearchRetrieveRequest>
|
||||
</zs:searchRetrieveResponse>"""
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sru_response_no_records() -> bytes:
|
||||
"""SRU response with zero records."""
|
||||
return b"""<?xml version="1.0" encoding="UTF-8"?>
|
||||
<zs:searchRetrieveResponse xmlns:zs="http://www.loc.gov/zing/srw/">
|
||||
<zs:version>1.1</zs:version>
|
||||
<zs:numberOfRecords>0</zs:numberOfRecords>
|
||||
</zs:searchRetrieveResponse>"""
|
||||
|
||||
|
||||
# --- Tests for _text helper ---
|
||||
|
||||
|
||||
class TestTextHelper:
|
||||
def test_text_with_element_and_text(self):
|
||||
elem = ET.fromstring("<tag>Hello</tag>")
|
||||
assert _text(elem) == "Hello"
|
||||
|
||||
def test_text_with_element_no_text(self):
|
||||
elem = ET.fromstring("<tag></tag>")
|
||||
assert _text(elem) == ""
|
||||
|
||||
def test_text_with_none(self):
|
||||
assert _text(None) == ""
|
||||
|
||||
def test_text_with_whitespace(self):
|
||||
elem = ET.fromstring("<tag> spaced </tag>")
|
||||
assert _text(elem) == " spaced "
|
||||
|
||||
|
||||
# --- Tests for parse_marc_record ---
|
||||
|
||||
|
||||
class TestParseMarcRecord:
|
||||
def test_parse_minimal_record(self, minimal_marc_xml):
|
||||
root = ET.fromstring(minimal_marc_xml)
|
||||
record = parse_marc_record(root)
|
||||
|
||||
assert record.leader == "00000nam a22000001i 4500"
|
||||
assert len(record.controlfields) == 2
|
||||
assert record.controlfields[0].tag == "001"
|
||||
assert record.controlfields[0].value == "PPN12345"
|
||||
|
||||
def test_parse_datafields(self, minimal_marc_xml):
|
||||
root = ET.fromstring(minimal_marc_xml)
|
||||
record = parse_marc_record(root)
|
||||
|
||||
assert len(record.datafields) == 1
|
||||
df = record.datafields[0]
|
||||
assert df.tag == "245"
|
||||
assert df.ind1 == "1"
|
||||
assert df.ind2 == "0"
|
||||
assert len(df.subfields) == 2
|
||||
assert df.subfields[0].code == "a"
|
||||
assert df.subfields[0].value == "Test Title"
|
||||
|
||||
def test_parse_full_record(self, full_marc_xml):
|
||||
root = ET.fromstring(full_marc_xml)
|
||||
record = parse_marc_record(root)
|
||||
|
||||
assert len(record.controlfields) == 3
|
||||
# Check multiple datafields
|
||||
tags = [df.tag for df in record.datafields]
|
||||
assert "020" in tags
|
||||
assert "245" in tags
|
||||
assert "700" in tags
|
||||
assert "924" in tags
|
||||
|
||||
def test_parse_multiple_subfields_same_code(self, full_marc_xml):
|
||||
root = ET.fromstring(full_marc_xml)
|
||||
record = parse_marc_record(root)
|
||||
|
||||
# Find 041 field with multiple $a subfields
|
||||
df_041 = next(df for df in record.datafields if df.tag == "041")
|
||||
a_values = [sf.value for sf in df_041.subfields if sf.code == "a"]
|
||||
assert a_values == ["ger", "eng"]
|
||||
|
||||
|
||||
# --- Tests for parse_search_retrieve_response ---
|
||||
|
||||
|
||||
class TestParseSearchRetrieveResponse:
|
||||
def test_parse_response_with_records(self, sru_response_xml):
|
||||
response = parse_search_retrieve_response(sru_response_xml)
|
||||
|
||||
assert response.version == "1.1"
|
||||
assert response.numberOfRecords == 2
|
||||
assert len(response.records) == 2
|
||||
|
||||
def test_parse_response_record_details(self, sru_response_xml):
|
||||
response = parse_search_retrieve_response(sru_response_xml)
|
||||
|
||||
rec1 = response.records[0]
|
||||
assert rec1.recordSchema == "marcxml"
|
||||
assert rec1.recordPacking == "xml"
|
||||
assert rec1.recordPosition == 1
|
||||
assert controlfield_value(rec1.recordData, "001") == "PPN001"
|
||||
|
||||
def test_parse_response_no_records(self, sru_response_no_records):
|
||||
response = parse_search_retrieve_response(sru_response_no_records)
|
||||
|
||||
assert response.version == "1.1"
|
||||
assert response.numberOfRecords == 0
|
||||
assert len(response.records) == 0
|
||||
|
||||
def test_parse_echoed_request(self, sru_response_xml):
|
||||
response = parse_search_retrieve_response(sru_response_xml)
|
||||
|
||||
echoed = response.echoedSearchRetrieveRequest
|
||||
assert echoed is not None
|
||||
assert echoed.version == "1.1"
|
||||
assert echoed.query == "pica.tit=Test"
|
||||
assert echoed.maximumRecords == 100
|
||||
assert echoed.recordSchema == "marcxml"
|
||||
|
||||
def test_parse_response_as_string(self, sru_response_xml):
|
||||
# Should also work with string input
|
||||
response = parse_search_retrieve_response(sru_response_xml.decode("utf-8"))
|
||||
assert response.numberOfRecords == 2
|
||||
|
||||
|
||||
# --- Tests for query helper functions ---
|
||||
|
||||
|
||||
class TestIterDatafields:
|
||||
def test_iter_all_datafields(self, full_marc_xml):
|
||||
root = ET.fromstring(full_marc_xml)
|
||||
record = parse_marc_record(root)
|
||||
|
||||
all_fields = list(iter_datafields(record))
|
||||
assert len(all_fields) == len(record.datafields)
|
||||
|
||||
def test_iter_datafields_by_tag(self, full_marc_xml):
|
||||
root = ET.fromstring(full_marc_xml)
|
||||
record = parse_marc_record(root)
|
||||
|
||||
fields_020 = list(iter_datafields(record, tag="020"))
|
||||
assert len(fields_020) == 2 # Two ISBN fields
|
||||
|
||||
def test_iter_datafields_by_indicator(self, full_marc_xml):
|
||||
root = ET.fromstring(full_marc_xml)
|
||||
record = parse_marc_record(root)
|
||||
|
||||
fields = list(iter_datafields(record, tag="264", ind2="1"))
|
||||
assert len(fields) == 1
|
||||
|
||||
|
||||
class TestSubfieldValues:
|
||||
def test_subfield_values_single(self, full_marc_xml):
|
||||
root = ET.fromstring(full_marc_xml)
|
||||
record = parse_marc_record(root)
|
||||
|
||||
values = subfield_values(record, "245", "a")
|
||||
assert values == ["Comprehensive Test Book"]
|
||||
|
||||
def test_subfield_values_multiple(self, full_marc_xml):
|
||||
root = ET.fromstring(full_marc_xml)
|
||||
record = parse_marc_record(root)
|
||||
|
||||
# Multiple ISBN values
|
||||
values = subfield_values(record, "020", "a")
|
||||
assert len(values) == 2
|
||||
assert "9783123456789" in values
|
||||
assert "9783987654321" in values
|
||||
|
||||
def test_subfield_values_empty(self, full_marc_xml):
|
||||
root = ET.fromstring(full_marc_xml)
|
||||
record = parse_marc_record(root)
|
||||
|
||||
values = subfield_values(record, "999", "x")
|
||||
assert values == []
|
||||
|
||||
|
||||
class TestFirstSubfieldValue:
|
||||
def test_first_subfield_value_found(self, full_marc_xml):
|
||||
root = ET.fromstring(full_marc_xml)
|
||||
record = parse_marc_record(root)
|
||||
|
||||
value = first_subfield_value(record, "245", "a")
|
||||
assert value == "Comprehensive Test Book"
|
||||
|
||||
def test_first_subfield_value_not_found(self, full_marc_xml):
|
||||
root = ET.fromstring(full_marc_xml)
|
||||
record = parse_marc_record(root)
|
||||
|
||||
value = first_subfield_value(record, "999", "x")
|
||||
assert value is None
|
||||
|
||||
def test_first_subfield_value_with_default(self, full_marc_xml):
|
||||
root = ET.fromstring(full_marc_xml)
|
||||
record = parse_marc_record(root)
|
||||
|
||||
value = first_subfield_value(record, "999", "x", default="N/A")
|
||||
assert value == "N/A"
|
||||
|
||||
def test_first_subfield_value_with_indicator(self, full_marc_xml):
|
||||
root = ET.fromstring(full_marc_xml)
|
||||
record = parse_marc_record(root)
|
||||
|
||||
value = first_subfield_value(record, "264", "c", ind2="1")
|
||||
assert value == "2023"
|
||||
|
||||
|
||||
class TestControlFieldValue:
|
||||
def test_controlfield_value_found(self, full_marc_xml):
|
||||
root = ET.fromstring(full_marc_xml)
|
||||
record = parse_marc_record(root)
|
||||
|
||||
value = controlfield_value(record, "001")
|
||||
assert value == "PPN98765"
|
||||
|
||||
def test_controlfield_value_not_found(self, full_marc_xml):
|
||||
root = ET.fromstring(full_marc_xml)
|
||||
record = parse_marc_record(root)
|
||||
|
||||
value = controlfield_value(record, "999")
|
||||
assert value is None
|
||||
|
||||
def test_controlfield_value_with_default(self, full_marc_xml):
|
||||
root = ET.fromstring(full_marc_xml)
|
||||
record = parse_marc_record(root)
|
||||
|
||||
value = controlfield_value(record, "999", default="unknown")
|
||||
assert value == "unknown"
|
||||
|
||||
|
||||
class TestFindDatafieldsWithSubfields:
|
||||
def test_find_with_where_all(self, full_marc_xml):
|
||||
root = ET.fromstring(full_marc_xml)
|
||||
record = parse_marc_record(root)
|
||||
|
||||
fields = find_datafields_with_subfields(
|
||||
record,
|
||||
"924",
|
||||
where_all={"9": "Frei 129"},
|
||||
)
|
||||
assert len(fields) == 1
|
||||
assert fields[0].tag == "924"
|
||||
|
||||
def test_find_with_where_all_not_found(self, full_marc_xml):
|
||||
root = ET.fromstring(full_marc_xml)
|
||||
record = parse_marc_record(root)
|
||||
|
||||
fields = find_datafields_with_subfields(
|
||||
record,
|
||||
"924",
|
||||
where_all={"9": "NonExistent"},
|
||||
)
|
||||
assert len(fields) == 0
|
||||
|
||||
def test_find_with_casefold(self, full_marc_xml):
|
||||
root = ET.fromstring(full_marc_xml)
|
||||
record = parse_marc_record(root)
|
||||
|
||||
fields = find_datafields_with_subfields(
|
||||
record,
|
||||
"924",
|
||||
where_all={"9": "frei 129"}, # lowercase
|
||||
casefold=True,
|
||||
)
|
||||
assert len(fields) == 1
|
||||
|
||||
|
||||
class TestDatafieldValue:
|
||||
def test_datafield_value_found(self):
|
||||
df = DataField(
|
||||
tag="245",
|
||||
subfields=[
|
||||
SubField(code="a", value="Title"),
|
||||
SubField(code="b", value="Subtitle"),
|
||||
],
|
||||
)
|
||||
assert datafield_value(df, "a") == "Title"
|
||||
assert datafield_value(df, "b") == "Subtitle"
|
||||
|
||||
def test_datafield_value_not_found(self):
|
||||
df = DataField(tag="245", subfields=[SubField(code="a", value="Title")])
|
||||
assert datafield_value(df, "z") is None
|
||||
|
||||
def test_datafield_value_with_default(self):
|
||||
df = DataField(tag="245", subfields=[])
|
||||
assert datafield_value(df, "a", default="N/A") == "N/A"
|
||||
|
||||
|
||||
class TestDatafieldsValue:
|
||||
def test_datafields_value_found(self):
|
||||
fields = [
|
||||
DataField(tag="700", subfields=[SubField(code="a", value="Author One")]),
|
||||
DataField(tag="700", subfields=[SubField(code="a", value="Author Two")]),
|
||||
]
|
||||
assert datafields_value(fields, "a") == "Author One"
|
||||
|
||||
def test_datafields_value_empty_list(self):
|
||||
assert datafields_value([], "a") is None
|
||||
|
||||
|
||||
class TestSubfieldValuesFromFields:
|
||||
def test_values_from_multiple_fields(self):
|
||||
fields = [
|
||||
DataField(tag="700", subfields=[SubField(code="a", value="Author One")]),
|
||||
DataField(tag="700", subfields=[SubField(code="a", value="Author Two")]),
|
||||
]
|
||||
values = subfield_values_from_fields(fields, "a")
|
||||
assert values == ["Author One", "Author Two"]
|
||||
|
||||
|
||||
class TestFirstSubfieldValueFromFields:
|
||||
def test_first_value_from_fields(self):
|
||||
fields = [
|
||||
DataField(tag="700", subfields=[SubField(code="a", value="First")]),
|
||||
DataField(tag="700", subfields=[SubField(code="a", value="Second")]),
|
||||
]
|
||||
assert first_subfield_value_from_fields(fields, "a") == "First"
|
||||
|
||||
|
||||
# --- Tests for _smart_join_title ---
|
||||
|
||||
|
||||
class TestSmartJoinTitle:
|
||||
def test_join_with_subtitle(self):
|
||||
result = _smart_join_title("Main Title", "Subtitle")
|
||||
assert result == "Main Title : Subtitle"
|
||||
|
||||
def test_join_without_subtitle(self):
|
||||
result = _smart_join_title("Main Title", None)
|
||||
assert result == "Main Title"
|
||||
|
||||
def test_join_with_empty_subtitle(self):
|
||||
result = _smart_join_title("Main Title", "")
|
||||
assert result == "Main Title"
|
||||
|
||||
def test_join_with_existing_colon(self):
|
||||
result = _smart_join_title("Main Title:", "Subtitle")
|
||||
assert result == "Main Title: Subtitle"
|
||||
|
||||
def test_join_with_existing_semicolon(self):
|
||||
result = _smart_join_title("Main Title;", "More")
|
||||
assert result == "Main Title; More"
|
||||
|
||||
def test_join_strips_whitespace(self):
|
||||
result = _smart_join_title(" Main Title ", " Subtitle ")
|
||||
assert result == "Main Title : Subtitle"
|
||||
244
tests/test_schemas.py
Normal file
244
tests/test_schemas.py
Normal file
@@ -0,0 +1,244 @@
|
||||
"""Tests for schema modules."""
|
||||
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from bibapi.schemas.api_types import (
|
||||
ALMASchema,
|
||||
DNBSchema,
|
||||
DublinCoreSchema,
|
||||
HBZSchema,
|
||||
HebisSchema,
|
||||
KOBVSchema,
|
||||
OEVKSchema,
|
||||
PicaSchema,
|
||||
SWBSchema,
|
||||
)
|
||||
from bibapi.schemas.bookdata import BookData
|
||||
from bibapi.schemas.errors import BibAPIError, CatalogueError, NetworkError
|
||||
from bibapi.sru import QueryTransformer
|
||||
|
||||
# --- QueryTransformer tests with different schemas ---
|
||||
|
||||
arguments = [
|
||||
"TITLE=Java ist auch eine Insel",
|
||||
"AUTHOR=Ullenboom, Christian",
|
||||
"YEAR=2020",
|
||||
"PPN=1693321114",
|
||||
]
|
||||
|
||||
|
||||
def test_pica_schema():
|
||||
transformer = QueryTransformer(PicaSchema, arguments)
|
||||
transformed = transformer.transform()
|
||||
assert len(transformed) == 4
|
||||
assert transformed[0].startswith(PicaSchema.TITLE.value)
|
||||
assert transformed[1].startswith(PicaSchema.AUTHOR.value)
|
||||
assert transformed[2].startswith(PicaSchema.YEAR.value)
|
||||
assert transformed[3].startswith(PicaSchema.PPN.value)
|
||||
|
||||
|
||||
def test_alma_schema():
|
||||
transformer = QueryTransformer(ALMASchema, arguments)
|
||||
transformed = transformer.transform()
|
||||
assert len(transformed) == 3 # PPN is not supported
|
||||
assert transformed[0].startswith(ALMASchema.TITLE.value)
|
||||
assert transformed[1].startswith(ALMASchema.AUTHOR.value)
|
||||
assert transformed[2].startswith(ALMASchema.YEAR.value)
|
||||
|
||||
|
||||
def test_dublin_core_schema():
|
||||
transformer = QueryTransformer(DublinCoreSchema, arguments)
|
||||
transformed = transformer.transform()
|
||||
assert len(transformed) == 3 # YEAR is supported, PPN is not
|
||||
assert transformed[0].startswith(DublinCoreSchema.TITLE.value)
|
||||
assert transformed[1].startswith(DublinCoreSchema.AUTHOR.value)
|
||||
assert transformed[2].startswith(DublinCoreSchema.YEAR.value)
|
||||
|
||||
|
||||
# --- API Schema configuration tests ---
|
||||
|
||||
|
||||
class TestApiSchemas:
|
||||
"""Tests for API schema configurations."""
|
||||
|
||||
def test_swb_schema_config(self):
|
||||
"""Test SWB schema configuration."""
|
||||
assert SWBSchema.NAME.value == "SWB"
|
||||
assert "sru.k10plus.de" in SWBSchema.URL.value
|
||||
assert SWBSchema.ARGSCHEMA.value == PicaSchema
|
||||
assert SWBSchema.LIBRARY_NAME_LOCATION_FIELD.value == "924$b"
|
||||
|
||||
def test_dnb_schema_config(self):
|
||||
"""Test DNB schema configuration."""
|
||||
assert DNBSchema.NAME.value == "DNB"
|
||||
assert "services.dnb.de" in DNBSchema.URL.value
|
||||
assert DNBSchema.ARGSCHEMA.value == DublinCoreSchema
|
||||
|
||||
def test_kobv_schema_config(self):
|
||||
"""Test KOBV schema configuration."""
|
||||
assert KOBVSchema.NAME.value == "KOBV"
|
||||
assert "sru.kobv.de" in KOBVSchema.URL.value
|
||||
assert KOBVSchema.ARGSCHEMA.value == DublinCoreSchema
|
||||
|
||||
def test_hebis_schema_config(self):
|
||||
"""Test HEBIS schema configuration."""
|
||||
assert HebisSchema.NAME.value == "HEBIS"
|
||||
assert "sru.hebis.de" in HebisSchema.URL.value
|
||||
assert HebisSchema.ARGSCHEMA.value == PicaSchema
|
||||
# HEBIS has specific character replacements
|
||||
assert " " in HebisSchema.REPLACE.value
|
||||
|
||||
def test_oevk_schema_config(self):
|
||||
"""Test OEVK schema configuration."""
|
||||
assert OEVKSchema.NAME.value == "OEVK"
|
||||
assert OEVKSchema.ARGSCHEMA.value == PicaSchema
|
||||
|
||||
def test_hbz_schema_config(self):
|
||||
"""Test HBZ schema configuration."""
|
||||
assert HBZSchema.NAME.value == "HBZ"
|
||||
assert HBZSchema.ARGSCHEMA.value == ALMASchema
|
||||
assert HBZSchema.LIBRARY_NAME_LOCATION_FIELD.value == "852$a"
|
||||
# HBZ doesn't support PPN
|
||||
assert "PPN" in HBZSchema.NOTSUPPORTEDARGS.value
|
||||
|
||||
|
||||
# --- BookData tests ---
|
||||
|
||||
|
||||
class TestBookData:
|
||||
"""Tests for the BookData class."""
|
||||
|
||||
def test_bookdata_creation_defaults(self):
|
||||
"""Test BookData creation with defaults."""
|
||||
book = BookData()
|
||||
assert book.ppn is None
|
||||
assert book.title is None
|
||||
assert book.in_apparat is False
|
||||
assert book.in_library is False
|
||||
|
||||
def test_bookdata_creation_with_values(self):
|
||||
"""Test BookData creation with values."""
|
||||
book = BookData(
|
||||
ppn="123456",
|
||||
title="Test Book",
|
||||
signature="ABC 123",
|
||||
year=2023,
|
||||
isbn=["9783123456789"],
|
||||
)
|
||||
assert book.ppn == "123456"
|
||||
assert book.title == "Test Book"
|
||||
assert book.signature == "ABC 123"
|
||||
assert book.year == "2023" # Converted to string without non-digits
|
||||
assert book.in_library is True # Because signature exists
|
||||
|
||||
def test_bookdata_post_init_year_cleaning(self):
|
||||
"""Test that year is cleaned of non-digits."""
|
||||
book = BookData(year="2023 [erschienen]")
|
||||
assert book.year == "2023"
|
||||
|
||||
def test_bookdata_post_init_language_normalization(self):
|
||||
"""Test language list normalization."""
|
||||
book = BookData(language=["ger", "eng", " fra "])
|
||||
assert book.language == "ger,eng,fra"
|
||||
|
||||
def test_bookdata_post_init_library_location(self):
|
||||
"""Test library_location is converted to string."""
|
||||
book = BookData(library_location=123)
|
||||
assert book.library_location == "123"
|
||||
|
||||
def test_bookdata_from_dict(self):
|
||||
"""Test BookData.from_dict method."""
|
||||
book = BookData()
|
||||
data = {"ppn": "123", "title": "Test", "year": "2023"}
|
||||
book.from_dict(data)
|
||||
assert book.ppn == "123"
|
||||
assert book.title == "Test"
|
||||
|
||||
def test_bookdata_merge(self):
|
||||
"""Test BookData.merge method."""
|
||||
book1 = BookData(ppn="123", title="Book 1")
|
||||
book2 = BookData(title="Book 2", author="Author", isbn=["978123"])
|
||||
|
||||
book1.merge(book2)
|
||||
assert book1.ppn == "123" # Original value preserved
|
||||
assert book1.title == "Book 1" # Original value preserved (not None)
|
||||
assert book1.author == "Author" # Merged from book2
|
||||
assert "978123" in book1.isbn # Merged list
|
||||
|
||||
def test_bookdata_merge_lists(self):
|
||||
"""Test BookData.merge with list merging."""
|
||||
book1 = BookData(isbn=["978123"])
|
||||
book2 = BookData(isbn=["978456", "978123"]) # Has duplicate
|
||||
|
||||
book1.merge(book2)
|
||||
# Should have both ISBNs but no duplicates
|
||||
assert len(book1.isbn) == 2
|
||||
assert "978123" in book1.isbn
|
||||
assert "978456" in book1.isbn
|
||||
|
||||
def test_bookdata_to_dict(self):
|
||||
"""Test BookData.to_dict property."""
|
||||
book = BookData(ppn="123", title="Test Book")
|
||||
json_str = book.to_dict
|
||||
data = json.loads(json_str)
|
||||
assert data["ppn"] == "123"
|
||||
assert data["title"] == "Test Book"
|
||||
assert "old_book" not in data # Should be removed
|
||||
|
||||
def test_bookdata_from_string(self):
|
||||
"""Test BookData.from_string method."""
|
||||
json_str = '{"ppn": "123", "title": "Test"}'
|
||||
book = BookData().from_string(json_str)
|
||||
assert book.ppn == "123"
|
||||
assert book.title == "Test"
|
||||
|
||||
def test_bookdata_edition_number(self):
|
||||
"""Test BookData.edition_number property."""
|
||||
book = BookData(edition="3rd edition")
|
||||
assert book.edition_number == 3
|
||||
|
||||
book2 = BookData(edition="First edition")
|
||||
assert book2.edition_number == 0 # No digit found
|
||||
|
||||
book3 = BookData(edition=None)
|
||||
assert book3.edition_number == 0
|
||||
|
||||
def test_bookdata_get_book_type(self):
|
||||
"""Test BookData.get_book_type method."""
|
||||
book = BookData(media_type="print", pages="Online Resource")
|
||||
assert book.get_book_type() == "eBook"
|
||||
|
||||
book2 = BookData(media_type="print", pages="300 pages")
|
||||
assert book2.get_book_type() == "Druckausgabe"
|
||||
|
||||
|
||||
# --- Error classes tests ---
|
||||
|
||||
|
||||
class TestErrors:
|
||||
"""Tests for error classes."""
|
||||
|
||||
def test_bibapi_error(self):
|
||||
"""Test BibAPIError exception."""
|
||||
with pytest.raises(BibAPIError):
|
||||
raise BibAPIError("Test error")
|
||||
|
||||
def test_catalogue_error(self):
|
||||
"""Test CatalogueError exception."""
|
||||
with pytest.raises(CatalogueError):
|
||||
raise CatalogueError("Catalogue error")
|
||||
|
||||
# Should also be a BibAPIError
|
||||
with pytest.raises(BibAPIError):
|
||||
raise CatalogueError("Catalogue error")
|
||||
|
||||
def test_network_error(self):
|
||||
"""Test NetworkError exception."""
|
||||
with pytest.raises(NetworkError):
|
||||
raise NetworkError("Network error")
|
||||
|
||||
# Should also be a BibAPIError
|
||||
with pytest.raises(BibAPIError):
|
||||
raise NetworkError("Network error")
|
||||
@@ -1,8 +1,389 @@
|
||||
from src.bibapi.sru import SWB
|
||||
"""Comprehensive tests for the SRU module."""
|
||||
|
||||
import xml.etree.ElementTree as ET
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from bibapi.schemas.api_types import ALMASchema, DublinCoreSchema, PicaSchema
|
||||
from bibapi.schemas.bookdata import BookData
|
||||
from bibapi.sru import (
|
||||
Api,
|
||||
QueryTransformer,
|
||||
book_from_marc,
|
||||
find_newer_edition,
|
||||
parse_marc_record,
|
||||
)
|
||||
from src.bibapi import SWB
|
||||
|
||||
# --- Integration test (requires network) ---
|
||||
|
||||
|
||||
def test_swb_schema():
|
||||
@pytest.mark.integration
|
||||
def test_swb_schema() -> None:
|
||||
"""Integration test that requires network access."""
|
||||
result = SWB().getBooks(["pica.tit=Java ist auch eine Insel", "pica.bib=20735"])
|
||||
assert len(result) == 1
|
||||
assert result[0].title == "Java ist auch eine Insel"
|
||||
assert
|
||||
|
||||
|
||||
# --- Api class tests ---
|
||||
|
||||
|
||||
class TestApiClass:
|
||||
"""Tests for the Api class."""
|
||||
|
||||
def test_api_initialization(self):
|
||||
"""Test Api class initialization."""
|
||||
api = Api(
|
||||
site="TestSite",
|
||||
url="https://example.com/sru?query={}",
|
||||
prefix=PicaSchema,
|
||||
library_identifier="924$b",
|
||||
)
|
||||
assert api.site == "TestSite"
|
||||
assert api.url == "https://example.com/sru?query={}"
|
||||
assert api.prefix == PicaSchema
|
||||
assert api.library_identifier == "924$b"
|
||||
assert api._rate_limit_seconds == 1.0
|
||||
assert api._max_retries == 5
|
||||
assert api._overall_timeout_seconds == 30.0
|
||||
api.close()
|
||||
|
||||
def test_api_with_notsupported_args(self):
|
||||
"""Test Api initialization with unsupported arguments."""
|
||||
api = Api(
|
||||
site="TestSite",
|
||||
url="https://example.com/sru?query={}",
|
||||
prefix=PicaSchema,
|
||||
library_identifier="924$b",
|
||||
notsupported_args=["YEAR", "PPN"],
|
||||
)
|
||||
assert "YEAR" in api.notsupported_args
|
||||
assert "PPN" in api.notsupported_args
|
||||
api.close()
|
||||
|
||||
def test_api_with_replace_dict(self):
|
||||
"""Test Api initialization with replace dictionary."""
|
||||
api = Api(
|
||||
site="TestSite",
|
||||
url="https://example.com/sru?query={}",
|
||||
prefix=PicaSchema,
|
||||
library_identifier="924$b",
|
||||
replace={" ": "+", "&": "%26"},
|
||||
)
|
||||
assert api.replace == {" ": "+", "&": "%26"}
|
||||
api.close()
|
||||
|
||||
@patch.object(requests.Session, "get")
|
||||
def test_api_get_success(self, mock_get, sample_sru_response_xml):
|
||||
"""Test successful API get request."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.content = sample_sru_response_xml
|
||||
mock_get.return_value = mock_response
|
||||
|
||||
api = Api(
|
||||
site="TestSite",
|
||||
url="https://example.com/sru?query={}",
|
||||
prefix=PicaSchema,
|
||||
library_identifier="924$b",
|
||||
)
|
||||
records = api.get(["title=Test"])
|
||||
assert len(records) == 1
|
||||
api.close()
|
||||
|
||||
@patch.object(requests.Session, "get")
|
||||
def test_api_get_with_string_query(self, mock_get, sample_sru_response_xml):
|
||||
"""Test API get with string query (not list)."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.content = sample_sru_response_xml
|
||||
mock_get.return_value = mock_response
|
||||
|
||||
api = Api(
|
||||
site="TestSite",
|
||||
url="https://example.com/sru?query={}",
|
||||
prefix=PicaSchema,
|
||||
library_identifier="924$b",
|
||||
)
|
||||
records = api.get("title=Test")
|
||||
assert len(records) == 1
|
||||
api.close()
|
||||
|
||||
@patch.object(requests.Session, "get")
|
||||
def test_api_get_filters_notsupported_args(self, mock_get, sample_sru_response_xml):
|
||||
"""Test that unsupported args are filtered out."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.content = sample_sru_response_xml
|
||||
mock_get.return_value = mock_response
|
||||
|
||||
api = Api(
|
||||
site="TestSite",
|
||||
url="https://example.com/sru?query={}",
|
||||
prefix=PicaSchema,
|
||||
library_identifier="924$b",
|
||||
notsupported_args=["YEAR"],
|
||||
)
|
||||
# YEAR should be filtered out
|
||||
records = api.get(["title=Test", "YEAR=2023"])
|
||||
assert len(records) == 1
|
||||
api.close()
|
||||
|
||||
@patch.object(requests.Session, "get")
|
||||
def test_api_get_http_error_retries(self, mock_get):
|
||||
"""Test that API retries on HTTP errors."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 500
|
||||
mock_get.return_value = mock_response
|
||||
|
||||
api = Api(
|
||||
site="TestSite",
|
||||
url="https://example.com/sru?query={}",
|
||||
prefix=PicaSchema,
|
||||
library_identifier="924$b",
|
||||
)
|
||||
api._max_retries = 2
|
||||
api._rate_limit_seconds = 0.01 # Speed up test
|
||||
api._overall_timeout_seconds = 5.0
|
||||
|
||||
with pytest.raises(Exception, match="HTTP 500"):
|
||||
api.get(["title=Test"])
|
||||
api.close()
|
||||
|
||||
@patch.object(requests.Session, "get")
|
||||
def test_api_get_timeout_returns_empty_bookdata(self, mock_get):
|
||||
"""Test that timeout returns empty BookData list."""
|
||||
mock_get.side_effect = requests.exceptions.ReadTimeout("Timeout")
|
||||
|
||||
api = Api(
|
||||
site="TestSite",
|
||||
url="https://example.com/sru?query={}",
|
||||
prefix=PicaSchema,
|
||||
library_identifier="924$b",
|
||||
)
|
||||
api._max_retries = 1
|
||||
api._rate_limit_seconds = 0.01
|
||||
|
||||
books = api.getBooks(["title=Test"])
|
||||
assert len(books) == 1
|
||||
assert books[0].ppn is None # Empty BookData
|
||||
api.close()
|
||||
|
||||
@patch.object(requests.Session, "get")
|
||||
def test_api_getbooks_filters_by_title(self, mock_get, sample_sru_response_xml):
|
||||
"""Test that getBooks filters results by title prefix."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.content = sample_sru_response_xml
|
||||
mock_get.return_value = mock_response
|
||||
|
||||
api = Api(
|
||||
site="TestSite",
|
||||
url="https://example.com/sru?query={}",
|
||||
prefix=PicaSchema,
|
||||
library_identifier="924$b",
|
||||
)
|
||||
# Title in sample is "Test Book" - filtering for "Test" should match
|
||||
books = api.getBooks(["pica.tit=Test"])
|
||||
assert len(books) == 1
|
||||
|
||||
# Filtering for "NonExistent" should not match
|
||||
books = api.getBooks(["pica.tit=NonExistent"])
|
||||
assert len(books) == 0
|
||||
api.close()
|
||||
|
||||
def test_api_close(self):
|
||||
"""Test Api close method."""
|
||||
api = Api(
|
||||
site="TestSite",
|
||||
url="https://example.com/sru?query={}",
|
||||
prefix=PicaSchema,
|
||||
library_identifier="924$b",
|
||||
)
|
||||
# Should not raise
|
||||
api.close()
|
||||
api.close() # Double close should be safe
|
||||
|
||||
|
||||
# --- QueryTransformer tests ---
|
||||
|
||||
|
||||
class TestQueryTransformer:
|
||||
"""Tests for the QueryTransformer class."""
|
||||
|
||||
def test_transform_pica_schema(self):
|
||||
"""Test transformation with PicaSchema."""
|
||||
args = ["TITLE=Test Book", "AUTHOR=Smith, John"]
|
||||
transformer = QueryTransformer(PicaSchema, args)
|
||||
result = transformer.transform()
|
||||
|
||||
assert len(result) == 2
|
||||
# Check that pica.tit is in the result
|
||||
assert any(r.startswith("pica.tit=") for r in result)
|
||||
# Author should have comma without space
|
||||
assert any(r.startswith("pica.per=") for r in result)
|
||||
|
||||
def test_transform_alma_schema(self):
|
||||
"""Test transformation with ALMASchema."""
|
||||
args = ["TITLE=Test Book", "AUTHOR=Smith, John"]
|
||||
transformer = QueryTransformer(ALMASchema, args)
|
||||
result = transformer.transform()
|
||||
|
||||
assert len(result) == 2
|
||||
# Title should be enclosed in quotes
|
||||
assert any('alma.title="Test Book"' in r for r in result)
|
||||
|
||||
def test_transform_dublin_core_schema(self):
|
||||
"""Test transformation with DublinCoreSchema."""
|
||||
args = ["TITLE=Test Book", "AUTHOR=Smith,John"]
|
||||
transformer = QueryTransformer(DublinCoreSchema, args)
|
||||
result = transformer.transform()
|
||||
|
||||
assert len(result) == 2
|
||||
# Check that dc.title is in the result
|
||||
assert any(r.startswith("dc.title=") for r in result)
|
||||
# Author should have space after comma
|
||||
assert any(r.startswith("dc.creator=") for r in result)
|
||||
|
||||
def test_transform_string_input(self):
|
||||
"""Test transformation with string input instead of list."""
|
||||
transformer = QueryTransformer(PicaSchema, "TITLE=Test Book")
|
||||
result = transformer.transform()
|
||||
assert len(result) == 1
|
||||
|
||||
def test_transform_drops_empty_values(self):
|
||||
"""Test that empty values are dropped when drop_empty is True."""
|
||||
args = ["TITLE=Test Book", "AUTHOR="]
|
||||
transformer = QueryTransformer(PicaSchema, args)
|
||||
result = transformer.transform()
|
||||
assert len(result) == 1
|
||||
|
||||
def test_transform_invalid_format_ignored(self):
|
||||
"""Test that arguments without = are ignored."""
|
||||
args = ["TITLE=Test Book", "InvalidArg", "AUTHOR=Smith"]
|
||||
transformer = QueryTransformer(PicaSchema, args)
|
||||
result = transformer.transform()
|
||||
assert len(result) == 2
|
||||
|
||||
def test_transform_unknown_key_ignored(self):
|
||||
"""Test that unknown keys are ignored."""
|
||||
args = ["TITLE=Test Book", "UNKNOWNKEY=value"]
|
||||
transformer = QueryTransformer(PicaSchema, args)
|
||||
result = transformer.transform()
|
||||
assert len(result) == 1
|
||||
|
||||
|
||||
# --- book_from_marc tests ---
|
||||
|
||||
|
||||
class TestBookFromMarc:
|
||||
"""Tests for the book_from_marc function."""
|
||||
|
||||
def test_book_from_marc_basic(self, sample_marc_record_xml):
|
||||
"""Test basic book extraction from MARC record."""
|
||||
root = ET.fromstring(sample_marc_record_xml)
|
||||
record = parse_marc_record(root)
|
||||
book = book_from_marc(record, "924$b")
|
||||
|
||||
assert book.ppn == "123456789"
|
||||
assert book.title == "Test Book Title"
|
||||
assert book.edition == "2nd edition"
|
||||
assert book.year == "2023"
|
||||
assert book.publisher == "Test Publisher"
|
||||
assert "9783123456789" in book.isbn
|
||||
assert book.pages == "456 pages"
|
||||
assert book.media_type == "Band"
|
||||
assert book.author == "Author, Test"
|
||||
|
||||
def test_book_from_marc_signature(self, sample_marc_record_xml):
|
||||
"""Test signature extraction from MARC record with Frei 129."""
|
||||
root = ET.fromstring(sample_marc_record_xml)
|
||||
record = parse_marc_record(root)
|
||||
book = book_from_marc(record, "924$b")
|
||||
|
||||
# Signature should be from 924 where $9 == "Frei 129" -> $g
|
||||
assert book.signature == "ABC 123"
|
||||
|
||||
def test_book_from_marc_libraries(self, sample_marc_record_xml):
|
||||
"""Test library extraction from MARC record."""
|
||||
root = ET.fromstring(sample_marc_record_xml)
|
||||
record = parse_marc_record(root)
|
||||
book = book_from_marc(record, "924$b")
|
||||
|
||||
assert "DE-Frei129" in book.libraries
|
||||
|
||||
|
||||
# --- find_newer_edition tests ---
|
||||
|
||||
|
||||
class TestFindNewerEdition:
|
||||
"""Tests for the find_newer_edition function."""
|
||||
|
||||
def test_find_newer_edition_by_year(self):
|
||||
"""Test finding newer edition by year."""
|
||||
swb = BookData(ppn="1", year=2020, edition="1st edition")
|
||||
dnb = [
|
||||
BookData(ppn="2", year=2023, edition="3rd edition"),
|
||||
BookData(ppn="3", year=2019, edition="1st edition"),
|
||||
]
|
||||
result = find_newer_edition(swb, dnb)
|
||||
assert result is not None
|
||||
assert len(result) == 1
|
||||
# Year is stored as string after post_init
|
||||
assert result[0].year == "2023"
|
||||
|
||||
def test_find_newer_edition_by_edition_number(self):
|
||||
"""Test finding newer edition by edition number."""
|
||||
swb = BookData(ppn="1", year=2020, edition="1st edition")
|
||||
dnb = [
|
||||
BookData(ppn="2", year=2020, edition="3rd edition"),
|
||||
]
|
||||
result = find_newer_edition(swb, dnb)
|
||||
assert result is not None
|
||||
assert len(result) == 1
|
||||
assert result[0].edition_number == 3
|
||||
|
||||
def test_find_newer_edition_none_found(self):
|
||||
"""Test when no newer edition exists."""
|
||||
swb = BookData(ppn="1", year=2023, edition="5th edition")
|
||||
dnb = [
|
||||
BookData(ppn="2", year=2020, edition="1st edition"),
|
||||
BookData(ppn="3", year=2019, edition="2nd edition"),
|
||||
]
|
||||
result = find_newer_edition(swb, dnb)
|
||||
assert result is None
|
||||
|
||||
def test_find_newer_edition_empty_list(self):
|
||||
"""Test with empty DNB result list."""
|
||||
swb = BookData(ppn="1", year=2020)
|
||||
result = find_newer_edition(swb, [])
|
||||
assert result is None
|
||||
|
||||
def test_find_newer_edition_prefers_matching_signature(self):
|
||||
"""Test that matching signature is preferred."""
|
||||
swb = BookData(ppn="1", year=2020, signature="ABC 123")
|
||||
dnb = [
|
||||
BookData(ppn="2", year=2023, signature="ABC 123"),
|
||||
BookData(ppn="3", year=2023, signature="XYZ 789"),
|
||||
]
|
||||
result = find_newer_edition(swb, dnb)
|
||||
assert result is not None
|
||||
assert len(result) == 1
|
||||
# Should prefer matching signature (first one) but XYZ 789 differs
|
||||
# so it's filtered out. Result should be the matching one.
|
||||
|
||||
def test_find_newer_edition_deduplicates_by_ppn(self):
|
||||
"""Test that results are deduplicated by PPN."""
|
||||
swb = BookData(ppn="1", year=2020)
|
||||
dnb = [
|
||||
BookData(ppn="2", year=2023, signature="ABC"),
|
||||
BookData(ppn="2", year=2023), # Duplicate PPN, no signature
|
||||
]
|
||||
result = find_newer_edition(swb, dnb)
|
||||
assert result is not None
|
||||
assert len(result) == 1
|
||||
# Should prefer the one with signature
|
||||
assert result[0].signature == "ABC"
|
||||
|
||||
375
tests/test_transformers.py
Normal file
375
tests/test_transformers.py
Normal file
@@ -0,0 +1,375 @@
|
||||
"""Tests for the _transformers module."""
|
||||
|
||||
from src.bibapi._transformers import (
|
||||
RDS_AVAIL_DATA,
|
||||
RDS_DATA,
|
||||
RDS_GENERIC_DATA,
|
||||
ARRAYData,
|
||||
BibTeXData,
|
||||
COinSData,
|
||||
DictToTable,
|
||||
Item,
|
||||
RISData,
|
||||
)
|
||||
from src.bibapi.schemas.bookdata import BookData
|
||||
|
||||
# --- Item dataclass tests ---
|
||||
|
||||
|
||||
class TestItem:
|
||||
"""Tests for the Item dataclass."""
|
||||
|
||||
def test_item_creation_defaults(self):
|
||||
"""Test Item creation with defaults."""
|
||||
item = Item()
|
||||
assert item.superlocation == ""
|
||||
assert item.status == ""
|
||||
assert item.availability == ""
|
||||
|
||||
def test_item_creation_with_values(self):
|
||||
"""Test Item creation with values."""
|
||||
item = Item(
|
||||
superlocation="Main Library",
|
||||
status="available",
|
||||
callnumber="ABC 123",
|
||||
)
|
||||
assert item.superlocation == "Main Library"
|
||||
assert item.status == "available"
|
||||
assert item.callnumber == "ABC 123"
|
||||
|
||||
def test_item_from_dict(self):
|
||||
"""Test Item.from_dict method."""
|
||||
item = Item()
|
||||
data = {
|
||||
"items": [
|
||||
{
|
||||
"status": "available",
|
||||
"callnumber": "ABC 123",
|
||||
"location": "Floor 1",
|
||||
},
|
||||
],
|
||||
}
|
||||
result = item.from_dict(data)
|
||||
assert result.status == "available"
|
||||
assert result.callnumber == "ABC 123"
|
||||
assert result.location == "Floor 1"
|
||||
|
||||
|
||||
# --- RDS_DATA dataclass tests ---
|
||||
|
||||
|
||||
class TestRDSData:
|
||||
"""Tests for the RDS_DATA dataclass."""
|
||||
|
||||
def test_rds_data_creation_defaults(self):
|
||||
"""Test RDS_DATA creation with defaults."""
|
||||
rds = RDS_DATA()
|
||||
assert rds.RDS_SIGNATURE == ""
|
||||
assert rds.RDS_STATUS == ""
|
||||
assert rds.RDS_LOCATION == ""
|
||||
|
||||
def test_rds_data_import_from_dict(self):
|
||||
"""Test RDS_DATA.import_from_dict method."""
|
||||
rds = RDS_DATA()
|
||||
data = {
|
||||
"RDS_SIGNATURE": "ABC 123",
|
||||
"RDS_STATUS": "available",
|
||||
"RDS_LOCATION": "Floor 1",
|
||||
}
|
||||
result = rds.import_from_dict(data)
|
||||
assert result.RDS_SIGNATURE == "ABC 123"
|
||||
assert result.RDS_STATUS == "available"
|
||||
assert result.RDS_LOCATION == "Floor 1"
|
||||
|
||||
|
||||
# --- RDS_AVAIL_DATA dataclass tests ---
|
||||
|
||||
|
||||
class TestRDSAvailData:
|
||||
"""Tests for the RDS_AVAIL_DATA dataclass."""
|
||||
|
||||
def test_rds_avail_data_creation_defaults(self):
|
||||
"""Test RDS_AVAIL_DATA creation with defaults."""
|
||||
rds = RDS_AVAIL_DATA()
|
||||
assert rds.library_sigil == ""
|
||||
assert rds.items == []
|
||||
|
||||
def test_rds_avail_data_import_from_dict(self):
|
||||
"""Test RDS_AVAIL_DATA.import_from_dict method."""
|
||||
rds = RDS_AVAIL_DATA()
|
||||
json_data = (
|
||||
'{"DE-Frei129": {"Location1": {"items": [{"status": "available"}]}}}'
|
||||
)
|
||||
result = rds.import_from_dict(json_data)
|
||||
assert result.library_sigil == "DE-Frei129"
|
||||
assert len(result.items) == 1
|
||||
|
||||
|
||||
# --- RDS_GENERIC_DATA dataclass tests ---
|
||||
|
||||
|
||||
class TestRDSGenericData:
|
||||
"""Tests for the RDS_GENERIC_DATA dataclass."""
|
||||
|
||||
def test_rds_generic_data_creation_defaults(self):
|
||||
"""Test RDS_GENERIC_DATA creation with defaults."""
|
||||
rds = RDS_GENERIC_DATA()
|
||||
assert rds.LibrarySigil == ""
|
||||
assert rds.RDS_DATA == []
|
||||
|
||||
def test_rds_generic_data_import_from_dict(self):
|
||||
"""Test RDS_GENERIC_DATA.import_from_dict method."""
|
||||
rds = RDS_GENERIC_DATA()
|
||||
json_data = '{"DE-Frei129": [{"RDS_SIGNATURE": "ABC 123"}]}'
|
||||
result = rds.import_from_dict(json_data)
|
||||
assert result.LibrarySigil == "DE-Frei129"
|
||||
assert len(result.RDS_DATA) == 1
|
||||
|
||||
|
||||
# --- ARRAYData tests ---
|
||||
|
||||
|
||||
class TestARRAYData:
|
||||
"""Tests for the ARRAYData transformer."""
|
||||
|
||||
def test_array_data_transform(self):
|
||||
"""Test ARRAYData transform method."""
|
||||
sample_data = """
|
||||
[kid] => 123456789
|
||||
[ti_long] => Array
|
||||
(
|
||||
[0] => Test Book Title
|
||||
)
|
||||
[isbn] => Array
|
||||
(
|
||||
[0] => 9783123456789
|
||||
)
|
||||
[la_facet] => Array
|
||||
(
|
||||
[0] => German
|
||||
)
|
||||
[pu] => Array
|
||||
(
|
||||
[0] => Test Publisher
|
||||
)
|
||||
[py_display] => Array
|
||||
(
|
||||
[0] => 2023
|
||||
)
|
||||
[umfang] => Array
|
||||
(
|
||||
[0] => 300 pages
|
||||
)
|
||||
"""
|
||||
transformer = ARRAYData()
|
||||
result = transformer.transform(sample_data)
|
||||
|
||||
assert isinstance(result, BookData)
|
||||
assert result.ppn == "123456789"
|
||||
|
||||
def test_array_data_with_signature(self):
|
||||
"""Test ARRAYData with predefined signature."""
|
||||
sample_data = "[kid] => 123456789"
|
||||
transformer = ARRAYData(signature="ABC 123")
|
||||
result = transformer.transform(sample_data)
|
||||
|
||||
assert isinstance(result, BookData)
|
||||
|
||||
|
||||
# --- COinSData tests ---
|
||||
|
||||
|
||||
class TestCOinSData:
|
||||
"""Tests for the COinSData transformer."""
|
||||
|
||||
def test_coins_data_transform(self):
|
||||
"""Test COinSData transform method."""
|
||||
# Note: COinS format uses & separators, last field shouldn't have trailing &
|
||||
sample_data = (
|
||||
"ctx_ver=Z39.88-2004&"
|
||||
"rft_id=info:sid/test?kid=123456&"
|
||||
"rft.btitle=Test Bookrft&" # btitle ends parsing at next 'rft'
|
||||
"rft.aulast=Smithrft&"
|
||||
"rft.aufirst=Johnrft&"
|
||||
"rft.edition=2ndrft&"
|
||||
"rft.isbn=9783123456789rft&"
|
||||
"rft.pub=Publisherrft&"
|
||||
"rft.date=2023rft&"
|
||||
"rft.tpages=300"
|
||||
)
|
||||
transformer = COinSData()
|
||||
result = transformer.transform(sample_data)
|
||||
|
||||
assert isinstance(result, BookData)
|
||||
# The transformer splits on 'rft' after the field value
|
||||
assert "Test Book" in result.title
|
||||
assert "Smith" in result.author
|
||||
|
||||
|
||||
# --- RISData tests ---
|
||||
|
||||
|
||||
class TestRISData:
|
||||
"""Tests for the RISData transformer."""
|
||||
|
||||
def test_ris_data_transform(self):
|
||||
"""Test RISData transform method."""
|
||||
sample_data = """TY - BOOK
|
||||
TI - Test Book Title
|
||||
AU - Smith, John
|
||||
ET - 2nd edition
|
||||
CN - ABC 123
|
||||
SN - 9783123456789
|
||||
LA - English
|
||||
PB - Test Publisher
|
||||
PY - 2023
|
||||
SP - 300
|
||||
DP - https://example.com/book?kid=123456
|
||||
ER -"""
|
||||
transformer = RISData()
|
||||
result = transformer.transform(sample_data)
|
||||
|
||||
assert isinstance(result, BookData)
|
||||
assert result.title == "Test Book Title"
|
||||
assert result.signature == "ABC 123"
|
||||
assert result.edition == "2nd edition"
|
||||
assert result.year == "2023"
|
||||
|
||||
|
||||
# --- BibTeXData tests ---
|
||||
|
||||
|
||||
class TestBibTeXData:
|
||||
"""Tests for the BibTeXData transformer."""
|
||||
|
||||
def test_bibtex_data_transform(self):
|
||||
"""Test BibTeXData transform method."""
|
||||
sample_data = """@book{test2023,
|
||||
title = {Test Book Title},
|
||||
author = {Smith, John and Doe, Jane},
|
||||
edition = {2nd},
|
||||
isbn = {9783123456789},
|
||||
language = {English},
|
||||
publisher = {Test Publisher},
|
||||
year = {2023},
|
||||
pages = {300},
|
||||
bestand = {ABC 123}
|
||||
}"""
|
||||
transformer = BibTeXData()
|
||||
result = transformer.transform(sample_data)
|
||||
|
||||
assert isinstance(result, BookData)
|
||||
assert result.title == "Test Book Title"
|
||||
# BibTeX transformer joins with ; and removes commas
|
||||
assert "Smith John" in result.author
|
||||
assert "Doe Jane" in result.author
|
||||
assert result.signature == "ABC 123"
|
||||
|
||||
|
||||
# --- DictToTable tests ---
|
||||
|
||||
|
||||
class TestDictToTable:
|
||||
"""Tests for the DictToTable transformer."""
|
||||
|
||||
def test_dict_to_table_book_mode(self):
|
||||
"""Test DictToTable with book mode."""
|
||||
data = {
|
||||
"mode": "book",
|
||||
"book_author": "Smith, John",
|
||||
"book_signature": "ABC 123",
|
||||
"book_place": "Berlin",
|
||||
"book_year": "2023",
|
||||
"book_title": "Test Book",
|
||||
"book_edition": "2nd",
|
||||
"book_pages": "300",
|
||||
"book_publisher": "Publisher",
|
||||
"book_isbn": "9783123456789",
|
||||
}
|
||||
transformer = DictToTable()
|
||||
result = transformer.transform(data)
|
||||
|
||||
assert result["type"] == "book"
|
||||
assert result["work_author"] == "Smith, John"
|
||||
assert result["signature"] == "ABC 123"
|
||||
assert result["year"] == "2023"
|
||||
|
||||
def test_dict_to_table_hg_mode(self):
|
||||
"""Test DictToTable with hg (editor) mode."""
|
||||
data = {
|
||||
"mode": "hg",
|
||||
"hg_author": "Chapter Author",
|
||||
"hg_editor": "Editor Name",
|
||||
"hg_year": "2023",
|
||||
"hg_title": "Collection Title",
|
||||
"hg_publisher": "Publisher",
|
||||
"hg_place": "Berlin",
|
||||
"hg_edition": "1st",
|
||||
"hg_chaptertitle": "Chapter Title",
|
||||
"hg_pages": "50-75",
|
||||
"hg_signature": "ABC 123",
|
||||
"hg_isbn": "9783123456789",
|
||||
}
|
||||
transformer = DictToTable()
|
||||
result = transformer.transform(data)
|
||||
|
||||
assert result["type"] == "hg"
|
||||
assert result["section_author"] == "Chapter Author"
|
||||
assert result["work_author"] == "Editor Name"
|
||||
assert result["chapter_title"] == "Chapter Title"
|
||||
|
||||
def test_dict_to_table_zs_mode(self):
|
||||
"""Test DictToTable with zs (journal) mode."""
|
||||
data = {
|
||||
"mode": "zs",
|
||||
"zs_author": "Article Author",
|
||||
"zs_chapter_title": "Article Title",
|
||||
"zs_place": "Berlin",
|
||||
"zs_issue": "Vol. 5, No. 2",
|
||||
"zs_pages": "100-120",
|
||||
"zs_publisher": "Publisher",
|
||||
"zs_isbn": "1234-5678",
|
||||
"zs_year": "2023",
|
||||
"zs_signature": "PER 123",
|
||||
"zs_title": "Journal Name",
|
||||
}
|
||||
transformer = DictToTable()
|
||||
result = transformer.transform(data)
|
||||
|
||||
assert result["type"] == "zs"
|
||||
assert result["section_author"] == "Article Author"
|
||||
assert result["chapter_title"] == "Article Title"
|
||||
assert result["issue"] == "Vol. 5, No. 2"
|
||||
|
||||
def test_dict_to_table_reset(self):
|
||||
"""Test DictToTable reset method."""
|
||||
transformer = DictToTable()
|
||||
transformer.work_author = "Test"
|
||||
transformer.year = "2023"
|
||||
|
||||
transformer.reset()
|
||||
|
||||
assert transformer.work_author is None
|
||||
assert transformer.year is None
|
||||
|
||||
def test_dict_to_table_make_result_excludes_none(self):
|
||||
"""Test that makeResult excludes None values."""
|
||||
transformer = DictToTable()
|
||||
transformer.work_author = "Test Author"
|
||||
transformer.year = "2023"
|
||||
# Leave others as None
|
||||
|
||||
result = transformer.makeResult()
|
||||
|
||||
assert "work_author" in result
|
||||
assert "year" in result
|
||||
assert "section_author" not in result # Should be excluded
|
||||
assert "pages" not in result # Should be excluded
|
||||
|
||||
def test_dict_to_table_invalid_mode(self):
|
||||
"""Test DictToTable with invalid mode returns None."""
|
||||
data = {"mode": "invalid"}
|
||||
transformer = DictToTable()
|
||||
result = transformer.transform(data)
|
||||
|
||||
assert result is None
|
||||
309
tests/test_webrequest.py
Normal file
309
tests/test_webrequest.py
Normal file
@@ -0,0 +1,309 @@
|
||||
"""Tests for the webrequest module."""
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from src.bibapi.webrequest import (
|
||||
ALLOWED_IPS,
|
||||
BibTextTransformer,
|
||||
TransformerType,
|
||||
WebRequest,
|
||||
cover,
|
||||
get_content,
|
||||
)
|
||||
|
||||
|
||||
class TestTransformerType:
|
||||
"""Tests for TransformerType enum."""
|
||||
|
||||
def test_transformer_type_values(self):
|
||||
"""Test TransformerType enum values."""
|
||||
assert TransformerType.ARRAY.value == "ARRAY"
|
||||
assert TransformerType.COinS.value == "COinS"
|
||||
assert TransformerType.BibTeX.value == "BibTeX"
|
||||
assert TransformerType.RIS.value == "RIS"
|
||||
assert TransformerType.RDS.value == "RDS"
|
||||
|
||||
|
||||
class TestWebRequest:
|
||||
"""Tests for WebRequest class."""
|
||||
|
||||
def test_webrequest_init_not_allowed_ip(self):
|
||||
"""Test WebRequest raises PermissionError for non-allowed IP."""
|
||||
with patch("requests.get") as mock_get:
|
||||
mock_response = MagicMock()
|
||||
mock_response.text = "192.168.1.1" # Not in ALLOWED_IPS
|
||||
mock_get.return_value = mock_response
|
||||
|
||||
with pytest.raises(PermissionError, match="IP not allowed"):
|
||||
WebRequest()
|
||||
|
||||
def test_webrequest_init_allowed_ip(self):
|
||||
"""Test WebRequest initializes successfully with allowed IP."""
|
||||
with patch("requests.get") as mock_get:
|
||||
mock_response = MagicMock()
|
||||
mock_response.text = ALLOWED_IPS[0] # Use first allowed IP
|
||||
mock_get.return_value = mock_response
|
||||
|
||||
wr = WebRequest()
|
||||
assert wr.public_ip == ALLOWED_IPS[0]
|
||||
assert wr.timeout == 5
|
||||
assert wr.use_any is False
|
||||
|
||||
def test_webrequest_no_connection(self):
|
||||
"""Test WebRequest raises ConnectionError when no internet."""
|
||||
with patch("requests.get") as mock_get:
|
||||
mock_get.side_effect = requests.exceptions.RequestException("No connection")
|
||||
|
||||
with pytest.raises(ConnectionError, match="No internet connection"):
|
||||
WebRequest()
|
||||
|
||||
def test_webrequest_use_any_book(self):
|
||||
"""Test use_any_book property."""
|
||||
with patch("requests.get") as mock_get:
|
||||
mock_response = MagicMock()
|
||||
mock_response.text = ALLOWED_IPS[0]
|
||||
mock_get.return_value = mock_response
|
||||
|
||||
wr = WebRequest()
|
||||
result = wr.use_any_book
|
||||
assert result.use_any is True
|
||||
|
||||
def test_webrequest_set_apparat(self):
|
||||
"""Test set_apparat method."""
|
||||
with patch("requests.get") as mock_get:
|
||||
mock_response = MagicMock()
|
||||
mock_response.text = ALLOWED_IPS[0]
|
||||
mock_get.return_value = mock_response
|
||||
|
||||
wr = WebRequest()
|
||||
result = wr.set_apparat(5)
|
||||
assert result.apparat == "05" # Padded with 0
|
||||
|
||||
result = wr.set_apparat(15)
|
||||
assert result.apparat == 15 # Not padded
|
||||
|
||||
def test_webrequest_get_ppn(self):
|
||||
"""Test get_ppn method."""
|
||||
with patch("requests.get") as mock_get:
|
||||
mock_response = MagicMock()
|
||||
mock_response.text = ALLOWED_IPS[0]
|
||||
mock_get.return_value = mock_response
|
||||
|
||||
wr = WebRequest()
|
||||
|
||||
# Normal signature
|
||||
result = wr.get_ppn("ABC 123")
|
||||
assert result.ppn == "ABC 123"
|
||||
assert result.signature == "ABC 123"
|
||||
|
||||
# Signature with +
|
||||
result = wr.get_ppn("ABC+123")
|
||||
assert result.ppn == "ABC%2B123"
|
||||
|
||||
# DOI
|
||||
result = wr.get_ppn("https://doi.org/10.1234/test")
|
||||
assert result.ppn == "test"
|
||||
|
||||
def test_webrequest_search_book(self):
|
||||
"""Test search_book method."""
|
||||
with patch("requests.get") as mock_get:
|
||||
# First call for IP check
|
||||
ip_response = MagicMock()
|
||||
ip_response.text = ALLOWED_IPS[0]
|
||||
|
||||
# Second call for actual search
|
||||
search_response = MagicMock()
|
||||
search_response.text = "<html>results</html>"
|
||||
|
||||
mock_get.side_effect = [ip_response, search_response]
|
||||
|
||||
wr = WebRequest()
|
||||
result = wr.search_book("test search")
|
||||
assert result == "<html>results</html>"
|
||||
|
||||
def test_webrequest_search_ppn(self):
|
||||
"""Test search_ppn method."""
|
||||
with patch("requests.get") as mock_get:
|
||||
ip_response = MagicMock()
|
||||
ip_response.text = ALLOWED_IPS[0]
|
||||
|
||||
ppn_response = MagicMock()
|
||||
ppn_response.text = "<html>ppn result</html>"
|
||||
|
||||
mock_get.side_effect = [ip_response, ppn_response]
|
||||
|
||||
wr = WebRequest()
|
||||
result = wr.search_ppn("123456")
|
||||
assert result == "<html>ppn result</html>"
|
||||
|
||||
def test_webrequest_search(self):
|
||||
"""Test search method."""
|
||||
with patch("requests.get") as mock_get:
|
||||
ip_response = MagicMock()
|
||||
ip_response.text = ALLOWED_IPS[0]
|
||||
|
||||
search_response = MagicMock()
|
||||
search_response.text = "<html>detail page</html>"
|
||||
|
||||
mock_get.side_effect = [ip_response, search_response]
|
||||
|
||||
wr = WebRequest()
|
||||
result = wr.search("https://example.com/book")
|
||||
assert result == "<html>detail page</html>"
|
||||
|
||||
def test_webrequest_search_error(self):
|
||||
"""Test search method handles errors."""
|
||||
with patch("requests.get") as mock_get:
|
||||
ip_response = MagicMock()
|
||||
ip_response.text = ALLOWED_IPS[0]
|
||||
|
||||
mock_get.side_effect = [ip_response, requests.exceptions.RequestException()]
|
||||
|
||||
wr = WebRequest()
|
||||
result = wr.search("https://example.com/book")
|
||||
assert result is None
|
||||
|
||||
def test_webrequest_get_book_links(self):
|
||||
"""Test get_book_links method."""
|
||||
html = """<html>
|
||||
<a class="title getFull" href="/opac/book/123">Book 1</a>
|
||||
<a class="title getFull" href="/opac/book/456">Book 2</a>
|
||||
</html>"""
|
||||
|
||||
with patch("requests.get") as mock_get:
|
||||
ip_response = MagicMock()
|
||||
ip_response.text = ALLOWED_IPS[0]
|
||||
|
||||
search_response = MagicMock()
|
||||
search_response.text = html
|
||||
|
||||
mock_get.side_effect = [ip_response, search_response]
|
||||
|
||||
wr = WebRequest()
|
||||
wr.ppn = "test"
|
||||
links = wr.get_book_links("test")
|
||||
|
||||
assert len(links) == 2
|
||||
assert "https://rds.ibs-bw.de/opac/book/123" in links[0]
|
||||
|
||||
|
||||
class TestBibTextTransformer:
|
||||
"""Tests for BibTextTransformer class."""
|
||||
|
||||
def test_bibtexttransformer_init_valid_mode(self):
|
||||
"""Test BibTextTransformer initialization with valid mode."""
|
||||
bt = BibTextTransformer(TransformerType.ARRAY)
|
||||
assert bt.mode == "ARRAY"
|
||||
|
||||
def test_bibtexttransformer_init_default_mode(self):
|
||||
"""Test BibTextTransformer uses ARRAY as default mode."""
|
||||
bt = BibTextTransformer()
|
||||
assert bt.mode == "ARRAY"
|
||||
|
||||
def test_bibtexttransformer_invalid_mode(self):
|
||||
"""Test BibTextTransformer raises error for invalid mode."""
|
||||
|
||||
# Create a fake invalid mode
|
||||
class FakeMode:
|
||||
value = "INVALID"
|
||||
|
||||
with pytest.raises(ValueError, match="not valid"):
|
||||
BibTextTransformer(FakeMode())
|
||||
|
||||
def test_bibtexttransformer_use_signature(self):
|
||||
"""Test use_signature method."""
|
||||
bt = BibTextTransformer()
|
||||
result = bt.use_signature("ABC 123")
|
||||
assert result.signature == "ABC 123"
|
||||
|
||||
def test_bibtexttransformer_get_data_none(self):
|
||||
"""Test get_data with None input."""
|
||||
bt = BibTextTransformer()
|
||||
result = bt.get_data(None)
|
||||
assert result.data is None
|
||||
|
||||
def test_bibtexttransformer_get_data_ris(self):
|
||||
"""Test get_data with RIS format."""
|
||||
bt = BibTextTransformer(TransformerType.RIS)
|
||||
data = ["Some data", "TY - BOOK\nTI - Test"]
|
||||
result = bt.get_data(data)
|
||||
assert "TY -" in result.data
|
||||
|
||||
def test_bibtexttransformer_get_data_array(self):
|
||||
"""Test get_data with ARRAY format."""
|
||||
bt = BibTextTransformer(TransformerType.ARRAY)
|
||||
data = ["Some data", "[kid] => 123456"]
|
||||
result = bt.get_data(data)
|
||||
assert "[kid]" in result.data
|
||||
|
||||
def test_bibtexttransformer_get_data_coins(self):
|
||||
"""Test get_data with COinS format."""
|
||||
bt = BibTextTransformer(TransformerType.COinS)
|
||||
data = ["Some data", "ctx_ver=Z39.88"]
|
||||
result = bt.get_data(data)
|
||||
assert "ctx_ver" in result.data
|
||||
|
||||
def test_bibtexttransformer_get_data_bibtex(self):
|
||||
"""Test get_data with BibTeX format."""
|
||||
bt = BibTextTransformer(TransformerType.BibTeX)
|
||||
data = ["Some data", "@book{test2023,"]
|
||||
result = bt.get_data(data)
|
||||
assert "@book" in result.data
|
||||
|
||||
def test_bibtexttransformer_get_data_rds(self):
|
||||
"""Test get_data with RDS format."""
|
||||
bt = BibTextTransformer(TransformerType.RDS)
|
||||
data = ["Some data", "RDS ---------------------------------- test"]
|
||||
result = bt.get_data(data)
|
||||
assert "RDS" in result.data
|
||||
|
||||
def test_bibtexttransformer_return_data_none(self):
|
||||
"""Test return_data when data is None."""
|
||||
bt = BibTextTransformer()
|
||||
bt.get_data(None)
|
||||
result = bt.return_data()
|
||||
assert result is None
|
||||
|
||||
|
||||
class TestCoverFunction:
|
||||
"""Tests for the cover function."""
|
||||
|
||||
def test_cover_returns_content(self):
|
||||
"""Test cover function returns image content."""
|
||||
with patch("requests.get") as mock_get:
|
||||
mock_response = MagicMock()
|
||||
mock_response.content = b"fake_image_content"
|
||||
mock_get.return_value = mock_response
|
||||
|
||||
result = cover("9783123456789")
|
||||
assert result == b"fake_image_content"
|
||||
|
||||
def test_cover_url_format(self):
|
||||
"""Test cover function calls correct URL."""
|
||||
with patch("requests.get") as mock_get:
|
||||
mock_response = MagicMock()
|
||||
mock_response.content = b""
|
||||
mock_get.return_value = mock_response
|
||||
|
||||
cover("9783123456789")
|
||||
|
||||
called_url = mock_get.call_args[0][0]
|
||||
assert "9783123456789" in called_url
|
||||
assert "buchhandel.de/cover" in called_url
|
||||
|
||||
|
||||
class TestGetContentFunction:
|
||||
"""Tests for the get_content function."""
|
||||
|
||||
def test_get_content(self):
|
||||
"""Test get_content extracts text from div."""
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
html = '<html><div class="test-class"> Content Here </div></html>'
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
|
||||
result = get_content(soup, "test-class")
|
||||
assert result == "Content Here"
|
||||
Reference in New Issue
Block a user