46 lines
1.1 KiB
Python
46 lines
1.1 KiB
Python
import csv
|
|
|
|
import pandas as pd
|
|
from docx import Document
|
|
|
|
|
|
def csv_to_list(path: str) -> list[str]:
|
|
"""
|
|
Extracts the data from a csv file and returns it as a pandas dataframe
|
|
"""
|
|
with open(path, newline="") as csvfile:
|
|
reader = csv.reader(csvfile, delimiter=";", quotechar="|")
|
|
data = []
|
|
for row in reader:
|
|
for i in range(len(row)):
|
|
row[i] = row[i].replace('"', "")
|
|
data.append(row)
|
|
ret = []
|
|
for i in data:
|
|
ret.append(i[0])
|
|
return ret
|
|
|
|
|
|
def word_docx_to_csv(path) -> pd.DataFrame:
|
|
doc = Document(path)
|
|
tables = doc.tables
|
|
|
|
m_data = []
|
|
for table in tables:
|
|
data = []
|
|
for row in table.rows:
|
|
row_data = []
|
|
for cell in row.cells:
|
|
text = cell.text
|
|
text = text.replace("\n", "")
|
|
row_data.append(text)
|
|
data.append(row_data)
|
|
df = pd.DataFrame(data)
|
|
df.columns = df.iloc[0]
|
|
df = df.iloc[1:]
|
|
|
|
m_data.append(df)
|
|
|
|
df = m_data[2]
|
|
return df
|