rework arraydata transformer

new functions, fixes
add DicttoTable -> generates uniform template dict based on type of
entry
This commit is contained in:
WorldTeacher
2024-06-26 16:36:25 +02:00
parent 070847a097
commit ed0acb7863

View File

@@ -34,7 +34,7 @@ class Item:
locationhref: str | None = dataclass_field(default_factory=str) locationhref: str | None = dataclass_field(default_factory=str)
location: str | None = dataclass_field(default_factory=str) location: str | None = dataclass_field(default_factory=str)
def from_dict(self, data: dict) -> self: def from_dict(self, data: dict):
"""Import data from dict""" """Import data from dict"""
data = data["items"] data = data["items"]
for entry in data: for entry in data:
@@ -50,7 +50,7 @@ class RDS_AVAIL_DATA:
library_sigil: str = dataclass_field(default_factory=str) library_sigil: str = dataclass_field(default_factory=str)
items: List[Item] = dataclass_field(default_factory=list) items: List[Item] = dataclass_field(default_factory=list)
def import_from_dict(self, data: str) -> self: def import_from_dict(self, data: str):
"""Import data from dict""" """Import data from dict"""
edata = json.loads(data) edata = json.loads(data)
# library sigil is first key # library sigil is first key
@@ -123,16 +123,18 @@ class ARRAYData:
def transform(self, data: str) -> BookData: def transform(self, data: str) -> BookData:
def _get_line(source: str, search: str) -> str: def _get_line(source: str, search: str) -> str:
try: try:
return ( data = (
source.split(search)[1] source.split(search)[1]
.split("\n")[0] .split("\n")[0]
.strip() .strip()
.replace("=>", "") .replace("=>", "")
.strip() .strip()
) )
return data
except Exception: except Exception:
logger.log_exception("ARRAYData.transform failed") print(f"ARRAYData.transform failed, {source}, {search}")
logger.log_exception(f"ARRAYData.transform failed, {source}, {search}")
return "" return ""
def _get_list_entry(source: str, search: str, entry: str) -> str: def _get_list_entry(source: str, search: str, entry: str) -> str:
@@ -160,18 +162,61 @@ class ARRAYData:
isbn = [] isbn = []
return isbn return isbn
def _get_signature(data):
try:
sig_data = (
data.split("[loksatz]")[1]
.split("[0] => ")[1]
.split("\n")[0]
.strip()
)
signature_data = eval(sig_data)
return signature_data["signatur"]
except Exception as e:
return None
def _get_author(data):
try:
array = data.split("[au_display_short]")[1].split(")\n")[0].strip()
except Exception as e:
return ""
entries = array.split("\n")
authors = []
hg_present = False
verf_present = False
lines = []
for entry in entries:
if "=>" in entry:
line = entry.split("=>")[1].strip()
if "[HerausgeberIn]" in line:
hg_present = True
if "[VerfasserIn]" in line:
verf_present = True
lines.append(line)
for line in lines:
if hg_present and verf_present:
if "[HerausgeberIn]" in line:
authors.append(line.split("[")[0].strip())
elif verf_present:
if "[VerfasserIn]" in line:
authors.append(line.split("[")[0].strip())
else:
pass
return ";".join(authors)
return BookData( return BookData(
ppn=_get_line(data, "[kid]"), ppn=_get_line(data, "[kid]"),
title=_get_line(data, "[ti_long]").split("/")[0].strip(), title=_get_line(data, "[ti_long]").split("/")[0].strip(),
author=_get_list_entry(data, "[au]", "[0]"), author=_get_author(data),
edition=_get_list_entry(data, "[ausgabe]", "[0]").replace(",", ""), edition=_get_list_entry(data, "[ausgabe]", "[0]").replace(",", ""),
link=f"https://rds.ibs-bw.de/phfreiburg/link?kid={_get_line(data,'[kid]')}", link=f"https://rds.ibs-bw.de/phfreiburg/link?kid={_get_line(data,'[kid]')}",
isbn=_get_isbn(data), isbn=_get_isbn(data),
# [self._get_list_entry(data,"[isbn]","[0]"),self._get_list_entry(data,"[is]","[1]")], # [self._get_list_entry(data,"[isbn]","[0]"),self._get_list_entry(data,"[is]","[1]")],
language=_get_list_entry(data, "[la_facet]", "[0]"), language=_get_list_entry(data, "[la_facet]", "[0]"),
publisher=_get_list_entry(data, "[hg]", "[0]"), publisher=_get_list_entry(data, "[pu]", "[0]"),
year=_get_line(data, "[py]"), year=_get_list_entry(data, "[py_display]", "[0]"),
pages=_get_list_entry(data, "[umfang]", "[0]").split(":")[0].strip(), pages=_get_list_entry(data, "[umfang]", "[0]").split(":")[0].strip(),
signature=_get_signature(data),
place=_get_list_entry(data, "[pp]", "[0]"),
) )
@@ -301,6 +346,101 @@ class RDSData:
return {"rds_availability": self.retlist[0], "rds_data": self.retlist[1]} return {"rds_availability": self.retlist[0], "rds_data": self.retlist[1]}
class DictToTable:
def __init__(self):
self.work_author = None
self.section_author = None
self.year = None
self.edition = None
self.work_title = None
self.chapter_title = None
self.location = None
self.publisher = None
self.signature = None
self.type = None
self.pages = None
self.issue = None
self.isbn = None
def makeResult(self):
data = {
"work_author": self.work_author,
"section_author": self.section_author,
"year": self.year,
"edition": self.edition,
"work_title": self.work_title,
"chapter_title": self.chapter_title,
"location": self.location,
"publisher": self.publisher,
"signature": self.signature,
"issue": self.issue,
"pages": self.pages,
"isbn": self.isbn,
"type": self.type,
}
data = {k: v for k, v in data.items() if v is not None}
return data
def reset(self):
for key in self.__dict__:
setattr(self, key, None)
def transform(self, data: dict):
mode = data["mode"]
self.reset()
if mode == "book":
return self.book_assign(data)
elif mode == "hg":
return self.hg_assign(data)
elif mode == "zs":
return self.zs_assign(data)
else:
return None
def book_assign(self, data):
self.type = "book"
self.work_author = data["book_author"]
self.signature = data["book_signature"]
self.location = data["book_place"]
self.year = data["book_year"]
self.work_title = data["book_title"]
self.edition = data["book_edition"]
self.pages = data["book_pages"]
self.publisher = data["book_publisher"]
self.isbn = data["book_isbn"]
return self.makeResult()
def hg_assign(self, data):
self.type = "hg"
self.section_author = data["hg_author"]
self.work_author = data["hg_editor"]
self.year = data["hg_year"]
self.work_title = data["hg_title"]
self.publisher = data["hg_publisher"]
self.location = data["hg_place"]
self.edition = data["hg_edition"]
self.chapter_title = data["hg_chaptertitle"]
self.pages = data["hg_pages"]
self.signature = data["hg_signature"]
self.isbn = data["hg_isbn"]
return self.makeResult()
def zs_assign(self, data):
self.type = "zs"
self.section_author = data["zs_author"]
self.chapter_title = data["zs_chapter_title"]
self.location = data["zs_place"]
self.issue = data["zs_issue"]
self.pages = data["zs_pages"]
self.publisher = data["zs_publisher"]
self.isbn = data["zs_isbn"]
self.year = data["zs_year"]
self.signature = data["zs_signature"]
self.work_title = data["zs_title"]
return self.makeResult()
if __name__ == "__main__": if __name__ == "__main__":
with open("daiadata", "r") as f: with open("daiadata", "r") as f:
data = f.read() data = f.read()