diff --git a/src/bibapi/schemas/marcxml.py b/src/bibapi/schemas/marcxml.py new file mode 100644 index 0000000..526e6f7 --- /dev/null +++ b/src/bibapi/schemas/marcxml.py @@ -0,0 +1,56 @@ +from dataclasses import dataclass, field +from typing import List, Optional + + +# --- MARC XML structures --- +@dataclass +class ControlField: + tag: str + value: str + + +@dataclass +class SubField: + code: str + value: str + + +@dataclass +class DataField: + tag: str + ind1: str = " " + ind2: str = " " + subfields: List[SubField] = field(default_factory=list) + + +@dataclass +class MarcRecord: + leader: str + controlfields: List[ControlField] = field(default_factory=list) + datafields: List[DataField] = field(default_factory=list) + + +# --- SRU record wrapper --- +@dataclass +class Record: + recordSchema: str + recordPacking: str + recordData: MarcRecord + recordPosition: int + + +@dataclass +class EchoedSearchRequest: + version: str + query: str + maximumRecords: int + recordPacking: str + recordSchema: str + + +@dataclass +class SearchRetrieveResponse: + version: str + numberOfRecords: int + records: List[Record] = field(default_factory=list) + echoedSearchRetrieveRequest: Optional[EchoedSearchRequest] = None diff --git a/src/bibapi/sru.py b/src/bibapi/sru.py index 407435c..8cdda6a 100644 --- a/src/bibapi/sru.py +++ b/src/bibapi/sru.py @@ -1,6 +1,5 @@ import re import xml.etree.ElementTree as ET -from dataclasses import dataclass, field from enum import Enum from typing import Dict, Iterable, List, Optional, Tuple, Union @@ -9,66 +8,21 @@ from requests.adapters import HTTPAdapter # centralized logging used via src.shared.logging from .schemas.bookdata import BookData +from .schemas.marcxml import ( + ControlField, + DataField, + EchoedSearchRequest, + MarcRecord, + Record, + SearchRetrieveResponse, + SubField, +) # ----------------------- # Dataclasses # ----------------------- -# --- MARC XML structures --- -@dataclass -class ControlField: - tag: str - value: str - - -@dataclass -class SubField: - code: str - value: str - - -@dataclass -class DataField: - tag: str - ind1: str = " " - ind2: str = " " - subfields: List[SubField] = field(default_factory=list) - - -@dataclass -class MarcRecord: - leader: str - controlfields: List[ControlField] = field(default_factory=list) - datafields: List[DataField] = field(default_factory=list) - - -# --- SRU record wrapper --- -@dataclass -class Record: - recordSchema: str - recordPacking: str - recordData: MarcRecord - recordPosition: int - - -@dataclass -class EchoedSearchRequest: - version: str - query: str - maximumRecords: int - recordPacking: str - recordSchema: str - - -@dataclass -class SearchRetrieveResponse: - version: str - numberOfRecords: int - records: List[Record] = field(default_factory=list) - echoedSearchRetrieveRequest: Optional[EchoedSearchRequest] = None - - # ----------------------- # Parser # ----------------------- @@ -433,11 +387,6 @@ class DNBData(Enum): NAME = "DNB" -class SRUSite(Enum): - SWB = SWBData - DNB = DNBData - - RVK_ALLOWED = r"[A-Z0-9.\-\/]" # conservative char set typically seen in RVK notations @@ -562,8 +511,10 @@ class Api: # Best-effort cleanup self.close() - def get(self, query_args: Iterable[str]) -> List[Record]: + def get(self, query_args: Union[Iterable[str], str]) -> List[Record]: # if any query_arg ends with =, remove it + if isinstance(query_args, str): + query_args = [query_args] if self.site == "DNB": args = [arg for arg in query_args if not arg.startswith("pica.")] if args == []: @@ -623,3 +574,11 @@ class SWB(Api): self.url = SWBData.URL.value self.prefix = SWBData.ARGSCHEMA.value super().__init__(self.site, self.url, self.prefix) + + +class DNB(Api): + def __init__(self): + self.site = DNBData.NAME.value + self.url = DNBData.URL.value + self.prefix = DNBData.ARGSCHEMA.value + super().__init__(self.site, self.url, self.prefix)