add elsa word parser

2024-05-24 10:07:44 +02:00
parent c3ef7aedce
commit f204ed2b30
1 changed files with 56 additions and 26 deletions
--- a/src/logic/wordparser.py
+++ b/src/logic/wordparser.py
@@ -24,3 +24,33 @@ def word_docx_to_csv(path) -> pd.DataFrame:
    df = m_data[2]
    return df
 def elsa_word_to_csv(path) -> list[tuple]:
    doc = Document(path)
    tables = doc.tables
    m_data = []
    for table in tables:
        data = []
        for row in table.rows:
            row_data = []
            for cell in row.cells:
                text = cell.text
                text = text.replace("\n", "")
                text = text.replace("\u2002", "")
                row_data.append(text)
            data.append(row_data)
            df = pd.DataFrame(data)
        df.columns = df.iloc[0]
        df = df.iloc[1:]
        m_data.append(df)
    df = m_data[0]
    # split df to rows
    data = [
        row
        for row in df.itertuples(index=False, name=None)
        if row != ("", "", "", "", "", "", "", "", "")
    ]
    return data