chore(codebase): import fixes, restructuring
This commit is contained in:
@@ -1,23 +1,27 @@
|
||||
import csv
|
||||
from pathlib import Path
|
||||
|
||||
from charset_normalizer import detect
|
||||
|
||||
from src.core.models import Book, SemapDocument
|
||||
|
||||
def csv_to_list(path: str) -> list[str]:
|
||||
"""
|
||||
Extracts the data from a csv file and returns it as a pandas dataframe
|
||||
"""
|
||||
|
||||
def csv_to_list(path: str) -> SemapDocument:
|
||||
"""Extract the data from a csv file and return it as a minimal SemapDocument."""
|
||||
encoding = detect(open(path, "rb").read())["encoding"]
|
||||
with open(path, newline="", encoding=encoding) as csvfile:
|
||||
with Path(path).open(newline="", encoding=encoding) as csvfile:
|
||||
# if decoder fails to map, assign ""
|
||||
reader = csv.reader(csvfile, delimiter=";", quotechar="|")
|
||||
ret = []
|
||||
for row in reader:
|
||||
ret.append(row[0].replace('"', ""))
|
||||
return ret
|
||||
|
||||
books = [Book(signature=row) for row in ret]
|
||||
|
||||
return SemapDocument(books=books)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
text = csv_to_list("C:/Users/aky547/Desktop/semap/71.csv")
|
||||
text = csv_to_list("C:/Users/aky547/Desktop/semap/sap71.csv")
|
||||
# remove linebreaks
|
||||
# #print(text)
|
||||
print(text)
|
||||
|
||||
Reference in New Issue
Block a user