From 2bcfe3121fb4888f35938ecb5490544cb50d4e47 Mon Sep 17 00:00:00 2001 From: WorldTeacher <41587052+WorldTeacher@users.noreply.github.com> Date: Mon, 3 Jun 2024 12:27:19 +0200 Subject: [PATCH] add costom data parser --- src/transform.py | 97 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 src/transform.py diff --git a/src/transform.py b/src/transform.py new file mode 100644 index 0000000..d2a5843 --- /dev/null +++ b/src/transform.py @@ -0,0 +1,97 @@ +import datetime +import json + +now = datetime.datetime.now().strftime("%Y-%m-%d") + + +def createDateList(start_date, end_date): + start = datetime.datetime.strptime(start_date, "%Y-%m-%d") + end = datetime.datetime.strptime(end_date, "%Y-%m-%d") + # add a day to the end date to include it in the list + end += datetime.timedelta(days=1) + # remove a day from the start date to include it in the list + start -= datetime.timedelta(days=1) + date_generated = [ + start + datetime.timedelta(days=x) for x in range(0, (end - start).days) + ] + date_generated = [date.strftime("%Y-%m-%d") for date in date_generated] + return date_generated + + +class Transform: + def __init__(self, data_source): + self.data_source = data_source + self.data = None + + def load_data(self): + with open(self.data_source, "r") as file: + self.data = json.load(file) + return self + + def transform_data( + self, addMissing=False, start_date="2024-04-05", end_date=now, split=False + ): + """Take the raw data and transform it into a format that can be used by the diagram generator. + There should be one entry per sensor, where x is represented by date and y is represented by the number of activations. If the activations are equal or less than 1 in the on-state, set the sensor data to 0 for that day + """ + + sensors = [] + for key, value in self.data.items(): + sensors.append({"id": key, "value": value}) + temp = {} + + start_date = start_date + end_date = end_date + # create a list of all dates between the start and end date + all_dates = createDateList(start_date, end_date) + for sensor in sensors: + tmp = {} + name = sensor["id"] + tmp[name] = {"x": [], "y": []} + sensor_data = sensor["value"] + sensor_dates = list(sensor_data.keys()) + print(len(all_dates)) + + for date in all_dates: + # print(date) + if date not in sensor_dates: + # print("Date not in sensor data", name, date) + if addMissing: + tmp[name]["x"].append(date) + tmp[name]["y"].append(0) + + else: + on_state = len(sensor_data[date]["on"]) + off_state = len(sensor_data[date]["off"]) + activations = (on_state + off_state) / 2 + if on_state != off_state: + # print("Error: On and off states are not equal", name, date) + tmp[name]["x"].append(date) + tmp[name]["y"].append(activations if activations > 1 else 0) + else: + + # add the date to the tmp dictionary as x and the number of activations, divided by two as y + activations = (on_state + off_state) / 2 + tmp[name]["x"].append(date) + tmp[name]["y"].append(activations if activations > 1 else 0) + # print(tmp) + # if split: + # #remove all values that are not in the range of the start and end date + # for i in range(len(tmp[name]["x"])): + # if tmp[name]["x"][i] <= start_date or tmp[name]["x"][i] >= end_date: + # print("Removing value", tmp[name]["x"][i], tmp[name]["y"][i]) + # tmp[name]["x"][i] = None + # tmp[name]["y"][i] = None + # tmp[name]["x"] = [x for x in tmp[name]["x"] if x] + # tmp[name]["y"] = [y for y in tmp[name]["y"] if y] + temp.update(tmp) + + return temp + + +if __name__ == "__main__": + transform = Transform("data.json") + transform.load_data() + result = transform.transform_data(True, end_date="2024-04-10", split=True) + print(result) + # print(createDateList("2024-01-01", "2024-03-04"))