import csv from pathlib import Path from charset_normalizer import detect from src.core.models import Book, SemapDocument def csv_to_list(path: str) -> SemapDocument: """Extract the data from a csv file and return it as a minimal SemapDocument.""" encoding = detect(open(path, "rb").read())["encoding"] with Path(path).open(newline="", encoding=encoding) as csvfile: # if decoder fails to map, assign "" reader = csv.reader(csvfile, delimiter=";", quotechar="|") ret = [] for row in reader: ret.append(row[0].replace('"', "")) books = [Book(signature=row) for row in ret] return SemapDocument(books=books) if __name__ == "__main__": text = csv_to_list("C:/Users/aky547/Desktop/semap/sap71.csv") # remove linebreaks print(text)