rework arraydata transformer

new functions, fixes add DicttoTable -> generates uniform template dict based on type of entry
2024-06-26 16:36:25 +02:00
parent 070847a097
commit ed0acb7863
1 changed files with 147 additions and 7 deletions
--- a/src/transformers/transformers.py
+++ b/src/transformers/transformers.py
@@ -34,7 +34,7 @@ class Item:
    locationhref: str | None = dataclass_field(default_factory=str)
    location: str | None = dataclass_field(default_factory=str)

-    def from_dict(self, data: dict) -> self:
+    def from_dict(self, data: dict):
        """Import data from dict"""
        data = data["items"]
        for entry in data:
@@ -50,7 +50,7 @@ class RDS_AVAIL_DATA:
    library_sigil: str = dataclass_field(default_factory=str)
    items: List[Item] = dataclass_field(default_factory=list)

-    def import_from_dict(self, data: str) -> self:
+    def import_from_dict(self, data: str):
        """Import data from dict"""
        edata = json.loads(data)
        # library sigil is first key
@@ -123,16 +123,18 @@ class ARRAYData:
    def transform(self, data: str) -> BookData:
        def _get_line(source: str, search: str) -> str:
            try:
-                return (
+                data = (
                    source.split(search)[1]
                    .split("\n")[0]
                    .strip()
                    .replace("=>", "")
                    .strip()
                )
+                return data

            except Exception:
-                logger.log_exception("ARRAYData.transform failed")
+                print(f"ARRAYData.transform failed, {source}, {search}")
+                logger.log_exception(f"ARRAYData.transform failed, {source}, {search}")
                return ""

        def _get_list_entry(source: str, search: str, entry: str) -> str:
@@ -160,18 +162,61 @@ class ARRAYData:
                isbn = []
                return isbn

+        def _get_signature(data):
+            try:
+                sig_data = (
+                    data.split("[loksatz]")[1]
+                    .split("[0] => ")[1]
+                    .split("\n")[0]
+                    .strip()
+                )
+                signature_data = eval(sig_data)
+                return signature_data["signatur"]
+            except Exception as e:
+                return None
+
+        def _get_author(data):
+            try:
+                array = data.split("[au_display_short]")[1].split(")\n")[0].strip()
+            except Exception as e:
+                return ""
+            entries = array.split("\n")
+            authors = []
+            hg_present = False
+            verf_present = False
+            lines = []
+            for entry in entries:
+                if "=>" in entry:
+                    line = entry.split("=>")[1].strip()
+                    if "[HerausgeberIn]" in line:
+                        hg_present = True
+                    if "[VerfasserIn]" in line:
+                        verf_present = True
+                    lines.append(line)
+            for line in lines:
+                if hg_present and verf_present:
+                    if "[HerausgeberIn]" in line:
+                        authors.append(line.split("[")[0].strip())
+                elif verf_present:
+                    if "[VerfasserIn]" in line:
+                        authors.append(line.split("[")[0].strip())
+                else:
+                    pass
+            return ";".join(authors)
        return BookData(
            ppn=_get_line(data, "[kid]"),
            title=_get_line(data, "[ti_long]").split("/")[0].strip(),
-            author=_get_list_entry(data, "[au]", "[0]"),
+            author=_get_author(data),
            edition=_get_list_entry(data, "[ausgabe]", "[0]").replace(",", ""),
            link=f"https://rds.ibs-bw.de/phfreiburg/link?kid={_get_line(data,'[kid]')}",
            isbn=_get_isbn(data),
            # [self._get_list_entry(data,"[isbn]","[0]"),self._get_list_entry(data,"[is]","[1]")],
            language=_get_list_entry(data, "[la_facet]", "[0]"),
-            publisher=_get_list_entry(data, "[hg]", "[0]"),
-            year=_get_line(data, "[py]"),
+            publisher=_get_list_entry(data, "[pu]", "[0]"),
+            year=_get_list_entry(data, "[py_display]", "[0]"),
            pages=_get_list_entry(data, "[umfang]", "[0]").split(":")[0].strip(),
+            signature=_get_signature(data),
+            place=_get_list_entry(data, "[pp]", "[0]"),
        )


@@ -301,6 +346,101 @@ class RDSData:
            return {"rds_availability": self.retlist[0], "rds_data": self.retlist[1]}


+class DictToTable:
+    def __init__(self):
+        self.work_author = None
+        self.section_author = None
+        self.year = None
+        self.edition = None
+        self.work_title = None
+        self.chapter_title = None
+        self.location = None
+        self.publisher = None
+        self.signature = None
+        self.type = None
+        self.pages = None
+        self.issue = None
+        self.isbn = None
+
+    def makeResult(self):
+        data = {
+            "work_author": self.work_author,
+            "section_author": self.section_author,
+            "year": self.year,
+            "edition": self.edition,
+            "work_title": self.work_title,
+            "chapter_title": self.chapter_title,
+            "location": self.location,
+            "publisher": self.publisher,
+            "signature": self.signature,
+            "issue": self.issue,
+            "pages": self.pages,
+            "isbn": self.isbn,
+            "type": self.type,
+        }
+        data = {k: v for k, v in data.items() if v is not None}
+        return data
+
+    def reset(self):
+        for key in self.__dict__:
+            setattr(self, key, None)
+
+    def transform(self, data: dict):
+        mode = data["mode"]
+        self.reset()
+        if mode == "book":
+            return self.book_assign(data)
+        elif mode == "hg":
+            return self.hg_assign(data)
+        elif mode == "zs":
+            return self.zs_assign(data)
+        else:
+            return None
+
+    def book_assign(self, data):
+        self.type = "book"
+        self.work_author = data["book_author"]
+        self.signature = data["book_signature"]
+        self.location = data["book_place"]
+        self.year = data["book_year"]
+        self.work_title = data["book_title"]
+        self.edition = data["book_edition"]
+        self.pages = data["book_pages"]
+        self.publisher = data["book_publisher"]
+        self.isbn = data["book_isbn"]
+        return self.makeResult()
+
+    def hg_assign(self, data):
+        self.type = "hg"
+        self.section_author = data["hg_author"]
+        self.work_author = data["hg_editor"]
+        self.year = data["hg_year"]
+        self.work_title = data["hg_title"]
+        self.publisher = data["hg_publisher"]
+        self.location = data["hg_place"]
+        self.edition = data["hg_edition"]
+        self.chapter_title = data["hg_chaptertitle"]
+        self.pages = data["hg_pages"]
+        self.signature = data["hg_signature"]
+        self.isbn = data["hg_isbn"]
+        return self.makeResult()
+
+    def zs_assign(self, data):
+        self.type = "zs"
+        self.section_author = data["zs_author"]
+        self.chapter_title = data["zs_chapter_title"]
+        self.location = data["zs_place"]
+        self.issue = data["zs_issue"]
+        self.pages = data["zs_pages"]
+        self.publisher = data["zs_publisher"]
+        self.isbn = data["zs_isbn"]
+
+        self.year = data["zs_year"]
+        self.signature = data["zs_signature"]
+        self.work_title = data["zs_title"]
+        return self.makeResult()
+
+
 if __name__ == "__main__":
    with open("daiadata", "r") as f:
        data = f.read()