-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathredd_preprocessing.py
108 lines (82 loc) · 3.04 KB
/
redd_preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import warnings
import pandas as pd
from IPython.display import display
import networkx as nx
import matplotlib.pyplot as plt
from scipy.signal import medfilt
# pd.options.display.max_columns = 20
warnings.filterwarnings("ignore")
import glob
from sklearn import preprocessing
import networkx as nx
import numpy as np
global label
def read_label():
label = {}
for i in range(1, 7):
hi = './low_freq/house_{}/labels.dat'.format(i)
label[i] = {}
with open(hi) as f:
for line in f:
splitted_line = line.split(' ')
label[i][int(splitted_line[0])] = splitted_line[1].strip() + '_' + splitted_line[0] + '_house' + str(i)
return label
labels = read_label()
for i in range(1, 7):
print('House {}: '.format(i), labels[i], '\n')
def read_merge_data(house):
path = './low_freq/house_{}/'.format(house)
file = path + 'channel_1.dat'
df = pd.read_table(file, sep=' ', names=['unix_time', labels[house][1]],
dtype={'unix_time': 'int64', labels[house][1]: 'float64'})
num_apps = len(glob.glob(path + 'channel*'))
for i in range(2, num_apps + 1):
file = path + 'channel_{}.dat'.format(i)
data = pd.read_table(file, sep=' ', names=['unix_time', labels[house][i]],
dtype={'unix_time': 'int64', labels[house][i]: 'float64'})
df = pd.merge(df, data, how='inner', on='unix_time')
df['timestamp'] = df['unix_time'].astype("datetime64[s]")
df = df.set_index(df['timestamp'].values)
df.drop(['unix_time', 'timestamp'], axis=1, inplace=True)
return df
def write_appliance_files(df):
for i in df.columns:
df[i].to_csv('./data/3T/' + str(i) + '.csv', index=False)
def median_filter(x):
print(x)
x = x.to_numpy()
y = medfilt(x)
return y
def standardization(df):
x = df.values # returns a numpy array
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
df = pd.DataFrame(x_scaled, columns=df.columns.to_list())
return df
def standar_scaler(df):
from sklearn.preprocessing import StandardScaler
trans = StandardScaler(df)
x_scaled = trans.fit_transform(df)
df = pd.DataFrame(x_scaled, columns=df.columns.to_list())
return df
def cyclical_encoding():
import math
df[i]["x_norm"] = 2 * math.pi * df[i]["kitchen_outlets_house_2_x"] / df[2]["kitchen_outlets_house_2_x"].max()
df["cos_x"] = np.cos(df[2]["x_norm"])
df = {}
for i in range(1, 7):
df[i] = read_merge_data(i)
for i in range(1, 3):
print('House {} data has shape: '.format(i), df[i].shape)
display(df[i].tail(3))
house2 = df[2].transform(lambda x: median_filter(x))
house2 = standardization(house2)
house2.to_csv('/home/leonidas/PycharmProjects/GNN_based_NILM/data/House.csv')
write_appliance_files(df[i])
for i in df.keys():
print(df[i].shape[0])
df[i] = df[i].resample('1T').sum()
print(df[i].shape[0])
df[i].transform(lambda x: median_filter(x))
df[i] = standardization(df[i])
write_appliance_files(df[i])