28 lines
819 B
Python
28 lines
819 B
Python
import csv
|
|
from pathlib import Path
|
|
|
|
from charset_normalizer import detect
|
|
|
|
from src.core.models import Book, SemapDocument
|
|
|
|
|
|
def csv_to_list(path: str) -> SemapDocument:
|
|
"""Extract the data from a csv file and return it as a minimal SemapDocument."""
|
|
encoding = detect(open(path, "rb").read())["encoding"]
|
|
with Path(path).open(newline="", encoding=encoding) as csvfile:
|
|
# if decoder fails to map, assign ""
|
|
reader = csv.reader(csvfile, delimiter=";", quotechar="|")
|
|
ret = []
|
|
for row in reader:
|
|
ret.append(row[0].replace('"', ""))
|
|
|
|
books = [Book(signature=row) for row in ret]
|
|
|
|
return SemapDocument(books=books)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
text = csv_to_list("C:/Users/aky547/Desktop/semap/sap71.csv")
|
|
# remove linebreaks
|
|
print(text)
|