chore: extract marcxml dataclasses from SRU classes and move to schemas folder

This commit is contained in:
2025-10-22 10:25:40 +02:00
parent de07c698af
commit 301525121e
2 changed files with 76 additions and 61 deletions

View File

@@ -0,0 +1,56 @@
from dataclasses import dataclass, field
from typing import List, Optional
# --- MARC XML structures ---
@dataclass
class ControlField:
tag: str
value: str
@dataclass
class SubField:
code: str
value: str
@dataclass
class DataField:
tag: str
ind1: str = " "
ind2: str = " "
subfields: List[SubField] = field(default_factory=list)
@dataclass
class MarcRecord:
leader: str
controlfields: List[ControlField] = field(default_factory=list)
datafields: List[DataField] = field(default_factory=list)
# --- SRU record wrapper ---
@dataclass
class Record:
recordSchema: str
recordPacking: str
recordData: MarcRecord
recordPosition: int
@dataclass
class EchoedSearchRequest:
version: str
query: str
maximumRecords: int
recordPacking: str
recordSchema: str
@dataclass
class SearchRetrieveResponse:
version: str
numberOfRecords: int
records: List[Record] = field(default_factory=list)
echoedSearchRetrieveRequest: Optional[EchoedSearchRequest] = None

View File

@@ -1,6 +1,5 @@
import re
import xml.etree.ElementTree as ET
from dataclasses import dataclass, field
from enum import Enum
from typing import Dict, Iterable, List, Optional, Tuple, Union
@@ -9,66 +8,21 @@ from requests.adapters import HTTPAdapter
# centralized logging used via src.shared.logging
from .schemas.bookdata import BookData
from .schemas.marcxml import (
ControlField,
DataField,
EchoedSearchRequest,
MarcRecord,
Record,
SearchRetrieveResponse,
SubField,
)
# -----------------------
# Dataclasses
# -----------------------
# --- MARC XML structures ---
@dataclass
class ControlField:
tag: str
value: str
@dataclass
class SubField:
code: str
value: str
@dataclass
class DataField:
tag: str
ind1: str = " "
ind2: str = " "
subfields: List[SubField] = field(default_factory=list)
@dataclass
class MarcRecord:
leader: str
controlfields: List[ControlField] = field(default_factory=list)
datafields: List[DataField] = field(default_factory=list)
# --- SRU record wrapper ---
@dataclass
class Record:
recordSchema: str
recordPacking: str
recordData: MarcRecord
recordPosition: int
@dataclass
class EchoedSearchRequest:
version: str
query: str
maximumRecords: int
recordPacking: str
recordSchema: str
@dataclass
class SearchRetrieveResponse:
version: str
numberOfRecords: int
records: List[Record] = field(default_factory=list)
echoedSearchRetrieveRequest: Optional[EchoedSearchRequest] = None
# -----------------------
# Parser
# -----------------------
@@ -433,11 +387,6 @@ class DNBData(Enum):
NAME = "DNB"
class SRUSite(Enum):
SWB = SWBData
DNB = DNBData
RVK_ALLOWED = r"[A-Z0-9.\-\/]" # conservative char set typically seen in RVK notations
@@ -562,8 +511,10 @@ class Api:
# Best-effort cleanup
self.close()
def get(self, query_args: Iterable[str]) -> List[Record]:
def get(self, query_args: Union[Iterable[str], str]) -> List[Record]:
# if any query_arg ends with =, remove it
if isinstance(query_args, str):
query_args = [query_args]
if self.site == "DNB":
args = [arg for arg in query_args if not arg.startswith("pica.")]
if args == []:
@@ -623,3 +574,11 @@ class SWB(Api):
self.url = SWBData.URL.value
self.prefix = SWBData.ARGSCHEMA.value
super().__init__(self.site, self.url, self.prefix)
class DNB(Api):
def __init__(self):
self.site = DNBData.NAME.value
self.url = DNBData.URL.value
self.prefix = DNBData.ARGSCHEMA.value
super().__init__(self.site, self.url, self.prefix)