add costom data parser

This commit is contained in:
WorldTeacher
2024-06-03 12:27:19 +02:00
parent f053d5e1c4
commit 2bcfe3121f

97
src/transform.py Normal file
View File

@@ -0,0 +1,97 @@
import datetime
import json
now = datetime.datetime.now().strftime("%Y-%m-%d")
def createDateList(start_date, end_date):
start = datetime.datetime.strptime(start_date, "%Y-%m-%d")
end = datetime.datetime.strptime(end_date, "%Y-%m-%d")
# add a day to the end date to include it in the list
end += datetime.timedelta(days=1)
# remove a day from the start date to include it in the list
start -= datetime.timedelta(days=1)
date_generated = [
start + datetime.timedelta(days=x) for x in range(0, (end - start).days)
]
date_generated = [date.strftime("%Y-%m-%d") for date in date_generated]
return date_generated
class Transform:
def __init__(self, data_source):
self.data_source = data_source
self.data = None
def load_data(self):
with open(self.data_source, "r") as file:
self.data = json.load(file)
return self
def transform_data(
self, addMissing=False, start_date="2024-04-05", end_date=now, split=False
):
"""Take the raw data and transform it into a format that can be used by the diagram generator.
There should be one entry per sensor, where x is represented by date and y is represented by the number of activations. If the activations are equal or less than 1 in the on-state, set the sensor data to 0 for that day
"""
sensors = []
for key, value in self.data.items():
sensors.append({"id": key, "value": value})
temp = {}
start_date = start_date
end_date = end_date
# create a list of all dates between the start and end date
all_dates = createDateList(start_date, end_date)
for sensor in sensors:
tmp = {}
name = sensor["id"]
tmp[name] = {"x": [], "y": []}
sensor_data = sensor["value"]
sensor_dates = list(sensor_data.keys())
print(len(all_dates))
for date in all_dates:
# print(date)
if date not in sensor_dates:
# print("Date not in sensor data", name, date)
if addMissing:
tmp[name]["x"].append(date)
tmp[name]["y"].append(0)
else:
on_state = len(sensor_data[date]["on"])
off_state = len(sensor_data[date]["off"])
activations = (on_state + off_state) / 2
if on_state != off_state:
# print("Error: On and off states are not equal", name, date)
tmp[name]["x"].append(date)
tmp[name]["y"].append(activations if activations > 1 else 0)
else:
# add the date to the tmp dictionary as x and the number of activations, divided by two as y
activations = (on_state + off_state) / 2
tmp[name]["x"].append(date)
tmp[name]["y"].append(activations if activations > 1 else 0)
# print(tmp)
# if split:
# #remove all values that are not in the range of the start and end date
# for i in range(len(tmp[name]["x"])):
# if tmp[name]["x"][i] <= start_date or tmp[name]["x"][i] >= end_date:
# print("Removing value", tmp[name]["x"][i], tmp[name]["y"][i])
# tmp[name]["x"][i] = None
# tmp[name]["y"][i] = None
# tmp[name]["x"] = [x for x in tmp[name]["x"] if x]
# tmp[name]["y"] = [y for y in tmp[name]["y"] if y]
temp.update(tmp)
return temp
if __name__ == "__main__":
transform = Transform("data.json")
transform.load_data()
result = transform.transform_data(True, end_date="2024-04-10", split=True)
print(result)
# print(createDateList("2024-01-01", "2024-03-04"))