Commit a0bca966 authored by Carlos A. Iglesias's avatar Carlos A. Iglesias
Browse files

Renombrado mosaik como simulator

parent 5bf79bd0
mosaik-demo @ 5f3f4d8c
Subproject commit 5f3f4d8c34ec614e3486e889b482be6e1a5977a4
securegrid-demo @ bcf7386e
Subproject commit bcf7386e7096595f761f39c3b3332d86250546e5
import mosaik_api
import random
meta = {
'models': {
'Attack': {
'public': True,
'params': ['target_attr'],
'attrs': ['P_out_val'],
},
},
}
attackPercentageValue = 0
attackTime = 5
class Attack(mosaik_api.Simulator):
def __init__(self):
super().__init__(meta)
self.units = {}
def init(self, sid, step_size=15*60):
self.sid = sid
self.step_size = step_size
return self.meta
def create(self, num, model, **model_params):
n_units = len(self.units)
entities = []
for i in range(n_units, n_units + num):
eid = 'Attack-%d' % i
self.units[eid] = model_params
entities.append({'eid': eid, 'type': model })
return entities
def step(self, time, inputs):
commands = {}
progress = yield self.mosaik.get_progress()
if progress >attackTime:
for eid, attrs in inputs.items():
# measure = 0
for attr, vals in attrs.items():
if attr == 'P_out_val':
for src_id, val in vals.items():
target_id = src_id
values = val
if eid not in commands:
commands[eid] = {}
target_attr = self.units[eid]['target_attr']
if target_id not in commands[eid]:
commands[eid][target_id] = {}
commands[eid][target_id][target_attr] = attackPercentageValue
# print("COMMANDS", commands)
yield self.mosaik.set_data(commands)
return time + self.step_size
def main(self):
return mosaik_api.start_simulation(Attack(), 'example attack')
if __name__ == '__main__':
main()
import logging
import mosaik_api
import model_house
import random
logger = logging.getLogger('householdsim')
meta = {
'models': {
'ResidentialLoads': {
'public': True,
'params': [
'sim_start', # The start time for the simulation:
# 'YYYY-MM-DD HH:ss'
'profile_file', # Name of file with household data
'grid_name', # Name of the grid to load
],
'attrs': [],
},
'House': {
'public': False,
'params': [],
'attrs': [
'P_out', # Active power [W]
'num', # House number starting at 1
'node_id', # ID of node the house has to be connected to
'num_hh', # Number of separate households within the house
'num_res', # Number of residents per household
],
},
},
}
def eid(hid):
return 'House_%s' % hid
class HouseholdSim(mosaik_api.Simulator):
def __init__(self):
super().__init__(meta)
self.model = None
self.houses_by_eid = {}
self.pos_loads = None
self._file_cache = {}
self._offset = 0
self._cache = {}
def init(self, sid, pos_loads=True):
logger.debug('Loads will be %s numbers.' %
('positive' if pos_loads else 'negative'))
self.pos_loads = 1 if pos_loads else -1
return self.meta
def create(self, num, model, sim_start, profile_file, grid_name):
if num != 1 or self.model:
raise ValueError('Can only create one set of houses.')
logger.info('Creating houses for %s from "%s"' %
(grid_name, profile_file))
if profile_file.endswith('gz'):
import gzip
pf = gzip.open(profile_file, 'rt')
else:
pf = open(profile_file, 'rt')
try:
self.model = model_house.HouseModel(pf, grid_name)
self.houses_by_eid = {
eid(i): house for i, house in enumerate(self.model.houses)
}
except KeyError:
raise ValueError('Invalid grid name "%s".' % grid_name)
# A time offset in minutes from the simulation start to the start
# of the profiles.
self._offset = self.model.get_delta(sim_start)
return [{
'eid': 'resid_0',
'type': 'ResidentialLoads',
'rel': [],
'children': [{'eid': eid(i), 'type': 'House', 'rel': []}
for i, _ in enumerate(self.model.houses)],
}]
def step(self, time, inputs):
# "time" is in seconds. Convert to minutes and add the offset
# if sim start > start date of the profiles.
houses = []
# print("INPUTS", inputs)
for key in inputs.keys():
houses.append(key[key.find("_"):][1:])
# print("HOUSES", houses)
if len(inputs)!=0:
newValue = list(list(list(inputs.values())[0].values())[0].values())[0]
minutes = time // 60 + self._offset
cache = {}
data = self.model.get(minutes)
# print("DATA", data)
for hid, d in enumerate(data):
d *= self.pos_loads # Flip sign if necessary
cache[eid(hid)] = d
for house in houses:
# cache[eid(house)] = random.randint(50, 200)
# cache[eid(house)] = d*0.3
cache[eid(house)] = newValue*d/100
self._cache = cache
return (minutes + self.model.resolution) * 60 # seconds
def get_data(self, outputs):
data = {}
for eid, attrs in outputs.items():
data[eid] = {}
for attr in attrs:
if attr == 'P_out':
val = self._cache[eid]
else:
val = self.houses_by_eid[eid][attr]
data[eid][attr] = val
return data
def main():
return mosaik_api.start_simulation(HouseholdSim(), 'Household simulation')
"""
"""
import json
import arrow
DATE_FORMAT = 'YYYY-MM-DD HH:mm'
"""Date format used to convert strings to dates."""
class HouseModel:
"""The HouseModel processes and prepares the load profiles and their
associated meta data to allow and easier access to it.
"""
def __init__(self, data, lv_grid):
# Process meta data
assert next(data).startswith('# meta')
meta = json.loads(next(data))
self.start = arrow.get(meta['start_date'], DATE_FORMAT)
"""The start date of the profile data."""
self.resolution = meta['resolution']
"""The time resolution of the data in minutes."""
self.unit = meta['unit']
"""The unit used for the load profiles (e.g., *W*)."""
self.num_profiles = meta['num_profiles']
"""The number of load profiles in the file."""
# Obtain id lists
assert next(data).startswith('# id_list')
id_list_lines = []
for line in data:
if line.startswith('# attrs'):
break
id_list_lines.append(line)
id_lists = json.loads(''.join(id_list_lines))
self.node_ids = id_lists[lv_grid]
"""List of power grid node IDs for which to create houses."""
# Enable pre-processing of the data
self._data = self._get_line(data)
# Obtain static attributes and create list of house info dicts
attrs = {}
for attr, *vals in self._data:
if attr.startswith('# profiles'):
break
attrs[attr] = [int(val) for val in vals]
#: List of house info dicts
self.houses = [
{
'num': i + 1,
'node_id': n,
'num_hh': attrs['num_hh'][i % self.num_profiles],
'num_res': attrs['num_residents'][i % self.num_profiles],
} for i, n in enumerate(self.node_ids)
]
# Helpers for get()
self._last_date = None
self._cache = None
def get(self, minutes):
"""Get the current load for all houses for *minutes* minutes since
:attr:`start`.
If the model uses a 15min resolution and minutes not multiple of 15,
the next smaller multiple of 15 will be used. For example, if you
pass ``minutes=23``, you'll get the value for ``15``.
"""
# Trim "minutes" to multiples of "self.resolution"
# Example: res=15, minutes=40 -> minutes == 30
minutes = minutes // self.resolution * self.resolution
target_date = self.start.replace(minutes=minutes)
if target_date != self._last_date:
# If target date not already reached, search data until we find it:
for date, *values in self._data:
date = arrow.get(date, DATE_FORMAT)
if date == target_date:
# Found target date, cache results:
values = list(map(float, values))
self._cache = [values[i % self.num_profiles]
for i, _ in enumerate(self.houses)]
self._last_date = date
break
else:
# We've reached the end of our data file if the for loop
# normally finishes.
raise IndexError('Target date "%s" (%s minutes from start) '
'out of range.' % (target_date, minutes))
return self._cache
def get_delta(self, date):
"""Get the amount of minutes between *date* and :attr:`start`.
The date needs to be a strings formated like :data:`DATE_FORMAT`.
Raise a :exc:`ValueError` if *date* is smaller than :attr:`start`.
"""
date = arrow.get(date, DATE_FORMAT)
if date < self.start:
raise ValueError('date must >= "%s".' %
self.start.format(DATE_FORMAT))
dt = date - self.start
minutes = (dt.days * 1440) + (dt.seconds // 60)
return minutes
def _get_line(self, iterator):
for line in iterator:
yield [item.strip() for item in line.split(',')]
import pandas as pd
import numpy as np
import operator
import csv
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error
# from keras_anomaly_detection.library.plot_utils import visualize_reconstruction_error
from keras_anomaly_detection.library.convolutional import Conv1DAutoEncoder
DO_TRAINING = False
def main():
data_dir_path = './data'
model_dir_path = './models'
normal_data_path = '/normal/houses_concatenated.pkl'
attack_data_path = '/anomaly_20/labels_df/house_0.pkl'
#Read Normal Data (No Attacks)
houses = pd.read_pickle(data_dir_path + normal_data_path)
print(houses.head())
houses = houses.drop("attacked", axis=1)
houses_m = houses.as_matrix()
scaler = MinMaxScaler()
houses_m_n = scaler.fit_transform(houses_m)
# scaled_anomaly_data = scaler.transform(anomaly_data)
train, test = train_test_split(houses_m_n, test_size=0.3)
ae = Conv1DAutoEncoder()
# fit the data and save model into model_dir_path
if DO_TRAINING:
ae.fit(train, model_dir_path=model_dir_path)
# load back the model saved in model_dir_path detect anomaly
ae.load_model(model_dir_path)
predicted_train = ae.predict_mine(train)
mae_train = mae(train, predicted_train)
rsme_train = rsme(train, predicted_train)
print("MAE_TRAIN", mae_train, "MEAN", np.mean(mae_train))
print("RSME_TRAIN", rsme_train, "MEAN", np.mean(rsme_train))
predicted_test = ae.predict_mine(test)
mae_test = mae(test, predicted_test)
rsme_test = rsme(test, predicted_test)
print("MAE_TEST", mae_test, "MEAN", np.mean(mae_test))
print("RSME_NORMAL", rsme_test, "MEAN", np.mean(rsme_test))
print("NORMALIZED TEST MAE", np.mean(np.divide(mae_test, mae_train)))
print("NORMALIZED TEST RSME", np.mean(np.divide(rsme_test, rsme_train)))
final_fs = []
df_house_normal = pd.read_pickle(data_dir_path + normal_data_path)
labels_normal = df_house_normal["attacked"]
df_house_normal = df_house_normal.drop("attacked", axis=1)
scaled_house_normal = scaler.transform(df_house_normal)
# Read Attack Data
df_house_anomaly = pd.read_pickle(data_dir_path + attack_data_path)
labels_anomaly = df_house_anomaly["attacked"]
df_house_anomaly = df_house_anomaly.drop("attacked", axis=1)
scaled_house_anomaly = scaler.transform(df_house_anomaly)
normal_rsme = []
for i in range(len(houses_m_n)):
out_normal = ae.predict_mine(houses_m_n[i:i+1])
rsme_normal = np.sqrt(mean_squared_error(houses_m_n[i:i+1], out_normal, multioutput='raw_values'))
rsme_normal_mean = np.mean(rsme_normal)
normal_rsme.append(rsme_normal_mean)
anomaly_rsme = []
for i in range(len(scaled_house_anomaly)):
out_anomaly = ae.predict_mine(scaled_house_anomaly[i:i+1])
rsme_anomaly = np.sqrt(mean_squared_error(scaled_house_anomaly[i:i+1], out_anomaly, multioutput='raw_values'))
rsme_anomaly_mean = np.mean(rsme_anomaly)
anomaly_rsme.append(rsme_anomaly_mean)
normalized_rsme = np.divide(anomaly_rsme, np.mean(normal_rsme))
plt.plot(normalized_rsme, marker='o', linestyle='', ms=3.5)
plt.show()
final_labels = window_error(normalized_rsme, labels_anomaly)
outfile = open('predicted_labels.csv','w')
out = csv.writer(outfile)
out.writerows(map(lambda x: [x], final_labels))
outfile.close()
print("FINAL Labels", final_labels)
print("CLASSIFICATION REPORT",metrics.classification_report(labels_anomaly, final_labels) )
def get_th_opt(normalized_rsme, labels_anomaly, flag):
j=2
predicted_labels = {}
labels = labels_anomaly
f_scores_th = {}
prueba = {}
while j < 30:
predicted_labels[j] = []
for i in normalized_rsme:
if i >= j:
predicted_labels[j].append(1)
else:
predicted_labels[j].append(0)
f_scores_th[j] = metrics.f1_score(labels, predicted_labels[j], average='macro')
j = round(j + 0.1, 2)
f1_max, th_opt = max(zip(f_scores_th.values(), f_scores_th.keys()))
if flag:
f1_good, f_preds = filter_error(labels_anomaly, predicted_labels[th_opt])
return f1_good, f_preds , th_opt
return f1_max , th_opt
def window_error(normalized_rsme, labels_anomaly):
window_len = 25
f1s = {}
for i in range(0,window_len):
window_arr = []
for j in range(0, len(normalized_rsme)):
if j < i:
s = np.sum(normalized_rsme[0:j+1])
else:
s = np.sum(normalized_rsme[j-i : j])
window_arr.append(s)
f1_max, preds, th_opt = get_th_opt(window_arr, labels_anomaly, True)
f1s[f1_max] = preds
print("F1MAX", max(f1s.items(), key=operator.itemgetter(0))[1])
return max(f1s.items(), key=operator.itemgetter(0))[1]
def filter_error(anomaly, predicted):
win = 50
fs = {}
for j in range(30, win):
final_predictions = []
for i in range(0, len(predicted)):
if i < j:
final_predictions.append(predicted[i])
else:
addition = np.sum(predicted[i-j : i])
if addition > 0.2*j:
final_predictions.append(1)
else:
final_predictions.append(0)
f1 = metrics.f1_score(anomaly, final_predictions, average='macro')
# print("F1", f1)
fs[f1] = final_predictions
f1_good = max(fs.items(), key=operator.itemgetter(0))[0]
f_preds = max(fs.items(), key=operator.itemgetter(0))[1]
return f1_good, f_preds
def anomaly_houses_4():
# Gets the graph with the error of all the houses
print("------------NORMAL-MAE---------------------------------------")
for i in range(0,38):
df_house_anomaly = pd.read_csv(data_dir_path + '/normal_f_csv/house_{}_row.csv'.format(i), header=None)
scaled_house_anomaly = scaler.transform(df_house_anomaly)
out_anomaly = ae.predict_mine(scaled_house_anomaly)
mae_anomaly = mean_absolute_error(scaled_house_anomaly, out_anomaly, multioutput='raw_values')
n_mae_anomaly = np.divide(mae_anomaly, mae_train)
print(i, np.mean(mae_anomaly), np.mean(n_mae_anomaly))
print("-------------NORMAL-RSME--------------------------------------")
for i in range(0,38):
df_house_anomaly = pd.read_csv(data_dir_path + '/normal_f_csv/house_{}_row.csv'.format(i), header=None)
scaled_house_anomaly = scaler.transform(df_house_anomaly)
out_anomaly = ae.predict_mine(scaled_house_anomaly)
rsme_anomaly = np.sqrt(mean_squared_error(scaled_house_anomaly, out_anomaly, multioutput='raw_values'))
n_rsme_anomaly = np.divide(rsme_anomaly, rsme_train)
print(i, np.mean(rsme_anomaly), np.mean(n_rsme_anomaly))
print("------------ANOMALY-MAE---------------------------------------")
mae_values = []
for i in range(0,38):
df_house_anomaly = pd.read_csv(data_dir_path + '/anomaly_4_f_csv/house_{}_anomaly.csv'.format(i), header=None)
scaled_house_anomaly = scaler.transform(df_house_anomaly)
out_anomaly = ae.predict_mine(scaled_house_anomaly)
mae_anomaly = mean_absolute_error(scaled_house_anomaly, out_anomaly, multioutput='raw_values')
n_mae_anomaly = np.divide(mae_anomaly, mae_train)
mae_values.append(np.mean(n_mae_anomaly))
print(i, np.mean(mae_anomaly), np.mean(n_mae_anomaly))
visualize_reconstruction_error(mae_values, 2)
print("-------------ANOMALY-RSME--------------------------------------")
rsme_values = []
for i in range(0,38):
df_house_anomaly = pd.read_csv(data_dir_path + '/anomaly_4_f_csv/house_{}_anomaly.csv'.format(i), header=None)
scaled_house_anomaly = scaler.transform(df_house_anomaly)
out_anomaly = ae.predict_mine(scaled_house_anomaly)
rsme_anomaly = np.sqrt(mean_squared_error(scaled_house_anomaly, out_anomaly, multioutput='raw_values'))
n_rsme_anomaly = np.divide(rsme_anomaly, rsme_train)
rsme_values.append(np.mean(n_rsme_anomaly))
print(i, np.mean(rsme_anomaly), np.mean(n_rsme_anomaly))
visualize_reconstruction_error(rsme_values, 2)
def mae(original, predicted):
return mean_absolute_error(original, predicted, multioutput='raw_values')
def rsme(original, predicted):
return np.sqrt(mean_squared_error(original, predicted, multioutput='raw_values'))
if __name__ == '__main__':
main()
Supports Markdown
0%