chore(codebase): import fixes, restructuring

This commit is contained in:
2025-12-01 14:24:24 +01:00
parent 6523ad655c
commit 05289ef244
58 changed files with 2692 additions and 6574 deletions

View File

@@ -1,23 +1,27 @@
import csv
from pathlib import Path
from charset_normalizer import detect
from src.core.models import Book, SemapDocument
def csv_to_list(path: str) -> list[str]:
"""
Extracts the data from a csv file and returns it as a pandas dataframe
"""
def csv_to_list(path: str) -> SemapDocument:
"""Extract the data from a csv file and return it as a minimal SemapDocument."""
encoding = detect(open(path, "rb").read())["encoding"]
with open(path, newline="", encoding=encoding) as csvfile:
with Path(path).open(newline="", encoding=encoding) as csvfile:
# if decoder fails to map, assign ""
reader = csv.reader(csvfile, delimiter=";", quotechar="|")
ret = []
for row in reader:
ret.append(row[0].replace('"', ""))
return ret
books = [Book(signature=row) for row in ret]
return SemapDocument(books=books)
if __name__ == "__main__":
text = csv_to_list("C:/Users/aky547/Desktop/semap/71.csv")
text = csv_to_list("C:/Users/aky547/Desktop/semap/sap71.csv")
# remove linebreaks
# #print(text)
print(text)