rework arraydata transformer

new functions, fixes
add DicttoTable -> generates uniform template dict based on type of
entry
This commit is contained in:
WorldTeacher
2024-06-26 16:36:25 +02:00
parent 070847a097
commit ed0acb7863

View File

@@ -34,7 +34,7 @@ class Item:
locationhref: str | None = dataclass_field(default_factory=str)
location: str | None = dataclass_field(default_factory=str)
def from_dict(self, data: dict) -> self:
def from_dict(self, data: dict):
"""Import data from dict"""
data = data["items"]
for entry in data:
@@ -50,7 +50,7 @@ class RDS_AVAIL_DATA:
library_sigil: str = dataclass_field(default_factory=str)
items: List[Item] = dataclass_field(default_factory=list)
def import_from_dict(self, data: str) -> self:
def import_from_dict(self, data: str):
"""Import data from dict"""
edata = json.loads(data)
# library sigil is first key
@@ -123,16 +123,18 @@ class ARRAYData:
def transform(self, data: str) -> BookData:
def _get_line(source: str, search: str) -> str:
try:
return (
data = (
source.split(search)[1]
.split("\n")[0]
.strip()
.replace("=>", "")
.strip()
)
return data
except Exception:
logger.log_exception("ARRAYData.transform failed")
print(f"ARRAYData.transform failed, {source}, {search}")
logger.log_exception(f"ARRAYData.transform failed, {source}, {search}")
return ""
def _get_list_entry(source: str, search: str, entry: str) -> str:
@@ -160,18 +162,61 @@ class ARRAYData:
isbn = []
return isbn
def _get_signature(data):
try:
sig_data = (
data.split("[loksatz]")[1]
.split("[0] => ")[1]
.split("\n")[0]
.strip()
)
signature_data = eval(sig_data)
return signature_data["signatur"]
except Exception as e:
return None
def _get_author(data):
try:
array = data.split("[au_display_short]")[1].split(")\n")[0].strip()
except Exception as e:
return ""
entries = array.split("\n")
authors = []
hg_present = False
verf_present = False
lines = []
for entry in entries:
if "=>" in entry:
line = entry.split("=>")[1].strip()
if "[HerausgeberIn]" in line:
hg_present = True
if "[VerfasserIn]" in line:
verf_present = True
lines.append(line)
for line in lines:
if hg_present and verf_present:
if "[HerausgeberIn]" in line:
authors.append(line.split("[")[0].strip())
elif verf_present:
if "[VerfasserIn]" in line:
authors.append(line.split("[")[0].strip())
else:
pass
return ";".join(authors)
return BookData(
ppn=_get_line(data, "[kid]"),
title=_get_line(data, "[ti_long]").split("/")[0].strip(),
author=_get_list_entry(data, "[au]", "[0]"),
author=_get_author(data),
edition=_get_list_entry(data, "[ausgabe]", "[0]").replace(",", ""),
link=f"https://rds.ibs-bw.de/phfreiburg/link?kid={_get_line(data,'[kid]')}",
isbn=_get_isbn(data),
# [self._get_list_entry(data,"[isbn]","[0]"),self._get_list_entry(data,"[is]","[1]")],
language=_get_list_entry(data, "[la_facet]", "[0]"),
publisher=_get_list_entry(data, "[hg]", "[0]"),
year=_get_line(data, "[py]"),
publisher=_get_list_entry(data, "[pu]", "[0]"),
year=_get_list_entry(data, "[py_display]", "[0]"),
pages=_get_list_entry(data, "[umfang]", "[0]").split(":")[0].strip(),
signature=_get_signature(data),
place=_get_list_entry(data, "[pp]", "[0]"),
)
@@ -301,6 +346,101 @@ class RDSData:
return {"rds_availability": self.retlist[0], "rds_data": self.retlist[1]}
class DictToTable:
def __init__(self):
self.work_author = None
self.section_author = None
self.year = None
self.edition = None
self.work_title = None
self.chapter_title = None
self.location = None
self.publisher = None
self.signature = None
self.type = None
self.pages = None
self.issue = None
self.isbn = None
def makeResult(self):
data = {
"work_author": self.work_author,
"section_author": self.section_author,
"year": self.year,
"edition": self.edition,
"work_title": self.work_title,
"chapter_title": self.chapter_title,
"location": self.location,
"publisher": self.publisher,
"signature": self.signature,
"issue": self.issue,
"pages": self.pages,
"isbn": self.isbn,
"type": self.type,
}
data = {k: v for k, v in data.items() if v is not None}
return data
def reset(self):
for key in self.__dict__:
setattr(self, key, None)
def transform(self, data: dict):
mode = data["mode"]
self.reset()
if mode == "book":
return self.book_assign(data)
elif mode == "hg":
return self.hg_assign(data)
elif mode == "zs":
return self.zs_assign(data)
else:
return None
def book_assign(self, data):
self.type = "book"
self.work_author = data["book_author"]
self.signature = data["book_signature"]
self.location = data["book_place"]
self.year = data["book_year"]
self.work_title = data["book_title"]
self.edition = data["book_edition"]
self.pages = data["book_pages"]
self.publisher = data["book_publisher"]
self.isbn = data["book_isbn"]
return self.makeResult()
def hg_assign(self, data):
self.type = "hg"
self.section_author = data["hg_author"]
self.work_author = data["hg_editor"]
self.year = data["hg_year"]
self.work_title = data["hg_title"]
self.publisher = data["hg_publisher"]
self.location = data["hg_place"]
self.edition = data["hg_edition"]
self.chapter_title = data["hg_chaptertitle"]
self.pages = data["hg_pages"]
self.signature = data["hg_signature"]
self.isbn = data["hg_isbn"]
return self.makeResult()
def zs_assign(self, data):
self.type = "zs"
self.section_author = data["zs_author"]
self.chapter_title = data["zs_chapter_title"]
self.location = data["zs_place"]
self.issue = data["zs_issue"]
self.pages = data["zs_pages"]
self.publisher = data["zs_publisher"]
self.isbn = data["zs_isbn"]
self.year = data["zs_year"]
self.signature = data["zs_signature"]
self.work_title = data["zs_title"]
return self.makeResult()
if __name__ == "__main__":
with open("daiadata", "r") as f:
data = f.read()