Files
SemesterapparatsManager/src/parsers/csv_parser.py

29 lines
870 B
Python

import csv
from pathlib import Path
from charset_normalizer import detect
from src.core.models import Book, SemapDocument
def csv_to_list(path: str) -> SemapDocument:
"""Extract the data from a csv file and return it as a minimal SemapDocument."""
encoding = detect(open(path, "rb").read())["encoding"]
with Path(path).open(newline="", encoding=encoding) as csvfile:
# if decoder fails to map, assign ""
reader = csv.reader(csvfile, delimiter=";", quotechar="|")
ret = []
for row in reader:
ret.append(row[0].replace('"', ""))
books = [Book(signature=row) for row in ret]
return SemapDocument(books=books)
if __name__ == "__main__":
text = csv_to_list("C:/Users/aky547/Desktop/semap/sap71.csv")
# remove linebreaks
# debug: print result when running as script
# print(text)