diff --git a/src/logic/wordparser.py b/src/logic/wordparser.py index 6f3131c..99fba0c 100644 --- a/src/logic/wordparser.py +++ b/src/logic/wordparser.py @@ -1,6 +1,6 @@ import pandas as pd from docx import Document - +letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" def word_docx_to_csv(path) -> pd.DataFrame: doc = Document(path) @@ -28,6 +28,8 @@ def word_docx_to_csv(path) -> pd.DataFrame: def elsa_word_to_csv(path) -> list[tuple]: doc = Document(path) + # print all lines in doc + doctype = [para.text for para in doc.paragraphs if para.text != ""][-1] tables = doc.tables m_data = [] @@ -53,4 +55,11 @@ def elsa_word_to_csv(path) -> list[tuple]: if row != ("", "", "", "", "", "", "", "", "") ] - return data + return data, doctype + + +if __name__ == "__main__": + else_df = elsa_word_to_csv( + "c:/Users/aky547/Desktop/semap/formularsemhrsg2023_Bestellung Sahrai_Hurrelmann et al.Referenzwerk Prävention.docx" + ) + print(else_df) \ No newline at end of file