From ed0acb7863e0d32293b190b07e28efaeba3acc2c Mon Sep 17 00:00:00 2001 From: WorldTeacher <41587052+WorldTeacher@users.noreply.github.com> Date: Wed, 26 Jun 2024 16:36:25 +0200 Subject: [PATCH] rework arraydata transformer new functions, fixes add DicttoTable -> generates uniform template dict based on type of entry --- src/transformers/transformers.py | 154 +++++++++++++++++++++++++++++-- 1 file changed, 147 insertions(+), 7 deletions(-) diff --git a/src/transformers/transformers.py b/src/transformers/transformers.py index c79f748..463e695 100644 --- a/src/transformers/transformers.py +++ b/src/transformers/transformers.py @@ -34,7 +34,7 @@ class Item: locationhref: str | None = dataclass_field(default_factory=str) location: str | None = dataclass_field(default_factory=str) - def from_dict(self, data: dict) -> self: + def from_dict(self, data: dict): """Import data from dict""" data = data["items"] for entry in data: @@ -50,7 +50,7 @@ class RDS_AVAIL_DATA: library_sigil: str = dataclass_field(default_factory=str) items: List[Item] = dataclass_field(default_factory=list) - def import_from_dict(self, data: str) -> self: + def import_from_dict(self, data: str): """Import data from dict""" edata = json.loads(data) # library sigil is first key @@ -123,16 +123,18 @@ class ARRAYData: def transform(self, data: str) -> BookData: def _get_line(source: str, search: str) -> str: try: - return ( + data = ( source.split(search)[1] .split("\n")[0] .strip() .replace("=>", "") .strip() ) + return data except Exception: - logger.log_exception("ARRAYData.transform failed") + print(f"ARRAYData.transform failed, {source}, {search}") + logger.log_exception(f"ARRAYData.transform failed, {source}, {search}") return "" def _get_list_entry(source: str, search: str, entry: str) -> str: @@ -160,18 +162,61 @@ class ARRAYData: isbn = [] return isbn + def _get_signature(data): + try: + sig_data = ( + data.split("[loksatz]")[1] + .split("[0] => ")[1] + .split("\n")[0] + .strip() + ) + signature_data = eval(sig_data) + return signature_data["signatur"] + except Exception as e: + return None + + def _get_author(data): + try: + array = data.split("[au_display_short]")[1].split(")\n")[0].strip() + except Exception as e: + return "" + entries = array.split("\n") + authors = [] + hg_present = False + verf_present = False + lines = [] + for entry in entries: + if "=>" in entry: + line = entry.split("=>")[1].strip() + if "[HerausgeberIn]" in line: + hg_present = True + if "[VerfasserIn]" in line: + verf_present = True + lines.append(line) + for line in lines: + if hg_present and verf_present: + if "[HerausgeberIn]" in line: + authors.append(line.split("[")[0].strip()) + elif verf_present: + if "[VerfasserIn]" in line: + authors.append(line.split("[")[0].strip()) + else: + pass + return ";".join(authors) return BookData( ppn=_get_line(data, "[kid]"), title=_get_line(data, "[ti_long]").split("/")[0].strip(), - author=_get_list_entry(data, "[au]", "[0]"), + author=_get_author(data), edition=_get_list_entry(data, "[ausgabe]", "[0]").replace(",", ""), link=f"https://rds.ibs-bw.de/phfreiburg/link?kid={_get_line(data,'[kid]')}", isbn=_get_isbn(data), # [self._get_list_entry(data,"[isbn]","[0]"),self._get_list_entry(data,"[is]","[1]")], language=_get_list_entry(data, "[la_facet]", "[0]"), - publisher=_get_list_entry(data, "[hg]", "[0]"), - year=_get_line(data, "[py]"), + publisher=_get_list_entry(data, "[pu]", "[0]"), + year=_get_list_entry(data, "[py_display]", "[0]"), pages=_get_list_entry(data, "[umfang]", "[0]").split(":")[0].strip(), + signature=_get_signature(data), + place=_get_list_entry(data, "[pp]", "[0]"), ) @@ -301,6 +346,101 @@ class RDSData: return {"rds_availability": self.retlist[0], "rds_data": self.retlist[1]} +class DictToTable: + def __init__(self): + self.work_author = None + self.section_author = None + self.year = None + self.edition = None + self.work_title = None + self.chapter_title = None + self.location = None + self.publisher = None + self.signature = None + self.type = None + self.pages = None + self.issue = None + self.isbn = None + + def makeResult(self): + data = { + "work_author": self.work_author, + "section_author": self.section_author, + "year": self.year, + "edition": self.edition, + "work_title": self.work_title, + "chapter_title": self.chapter_title, + "location": self.location, + "publisher": self.publisher, + "signature": self.signature, + "issue": self.issue, + "pages": self.pages, + "isbn": self.isbn, + "type": self.type, + } + data = {k: v for k, v in data.items() if v is not None} + return data + + def reset(self): + for key in self.__dict__: + setattr(self, key, None) + + def transform(self, data: dict): + mode = data["mode"] + self.reset() + if mode == "book": + return self.book_assign(data) + elif mode == "hg": + return self.hg_assign(data) + elif mode == "zs": + return self.zs_assign(data) + else: + return None + + def book_assign(self, data): + self.type = "book" + self.work_author = data["book_author"] + self.signature = data["book_signature"] + self.location = data["book_place"] + self.year = data["book_year"] + self.work_title = data["book_title"] + self.edition = data["book_edition"] + self.pages = data["book_pages"] + self.publisher = data["book_publisher"] + self.isbn = data["book_isbn"] + return self.makeResult() + + def hg_assign(self, data): + self.type = "hg" + self.section_author = data["hg_author"] + self.work_author = data["hg_editor"] + self.year = data["hg_year"] + self.work_title = data["hg_title"] + self.publisher = data["hg_publisher"] + self.location = data["hg_place"] + self.edition = data["hg_edition"] + self.chapter_title = data["hg_chaptertitle"] + self.pages = data["hg_pages"] + self.signature = data["hg_signature"] + self.isbn = data["hg_isbn"] + return self.makeResult() + + def zs_assign(self, data): + self.type = "zs" + self.section_author = data["zs_author"] + self.chapter_title = data["zs_chapter_title"] + self.location = data["zs_place"] + self.issue = data["zs_issue"] + self.pages = data["zs_pages"] + self.publisher = data["zs_publisher"] + self.isbn = data["zs_isbn"] + + self.year = data["zs_year"] + self.signature = data["zs_signature"] + self.work_title = data["zs_title"] + return self.makeResult() + + if __name__ == "__main__": with open("daiadata", "r") as f: data = f.read()