-
Notifications
You must be signed in to change notification settings - Fork 12
/
evaluator.py
356 lines (286 loc) · 14.6 KB
/
evaluator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
# This script reads the replay files and evaluates the performance.
import yaml
import os
import pickle
from copy import deepcopy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import time
from ev2gym.utilities.arg_parser import arg_parser
from ev2gym.models import ev2gym_env
from ev2gym.baselines.heuristics import RoundRobin, ChargeAsLateAsPossible, ChargeAsFastAsPossible
from ev2gym.baselines.heuristics import ChargeAsFastAsPossibleToDesiredCapacity
from ev2gym.baselines.mpc.ocmf_mpc import OCMF_V2G, OCMF_G2V
from ev2gym.baselines.mpc.eMPC import eMPC_V2G, eMPC_G2V
from ev2gym.baselines.mpc.V2GProfitMax import V2GProfitMaxOracle, V2GProfitMaxLoadsOracle
from stable_baselines3 import PPO, A2C, DDPG, SAC, TD3
from sb3_contrib import TQC, TRPO, ARS, RecurrentPPO
from ev2gym.baselines.gurobi_models.tracking_error import PowerTrackingErrorrMin
from ev2gym.baselines.gurobi_models.profit_max import V2GProfitMaxOracleGB
from ev2gym.rl_agent.reward import SquaredTrackingErrorReward
from ev2gym.rl_agent.reward import profit_maximization, ProfitMax_TrPenalty_UserIncentives
from ev2gym.rl_agent.state import V2G_profit_max, PublicPST, V2G_profit_max_loads
from ev2gym.visuals.evaluator_plot import plot_total_power, plot_comparable_EV_SoC
from ev2gym.visuals.evaluator_plot import plot_total_power_V2G, plot_actual_power_vs_setpoint
from ev2gym.visuals.evaluator_plot import plot_comparable_EV_SoC_single, plot_prices
import gymnasium as gym
import torch
def evaluator():
device = "cuda" if torch.cuda.is_available() else "cpu"
args = arg_parser()
config = yaml.load(open(args.config_file, 'r'), Loader=yaml.FullLoader)
number_of_charging_stations = config["number_of_charging_stations"]
n_transformers = config["number_of_transformers"]
timescale = config["timescale"]
simulation_length = config["simulation_length"]
n_test_cycles = args.n_test_cycles
scenario = args.config_file.split("/")[-1].split(".")[0]
eval_replay_path = f'./replay/{number_of_charging_stations}cs_{n_transformers}tr_{scenario}/'
print(f'Looking for replay files in {eval_replay_path}')
try:
eval_replay_files = [f for f in os.listdir(
eval_replay_path) if os.path.isfile(os.path.join(eval_replay_path, f))]
print(f'Found {len(eval_replay_files)} replay files in {eval_replay_path}')
if n_test_cycles > len(eval_replay_files):
n_test_cycles = len(eval_replay_files)
replay_to_print = 1
replay_to_print = min(replay_to_print, len(eval_replay_files)-1)
replays_exist = True
except:
n_test_cycles = args.n_test_cycles
replays_exist = False
print(f'Number of test cycles: {n_test_cycles}')
if args.config_file == "ev2gym/example_config_files/V2GProfitMax.yaml":
reward_function = profit_maximization
state_function = V2G_profit_max
elif args.config_file == "ev2gym/example_config_files/PublicPST.yaml":
reward_function = SquaredTrackingErrorReward
state_function = PublicPST
elif args.config_file == "ev2gym/example_config_files/V2G_MPC.yaml":
reward_function = profit_maximization
state_function = V2G_profit_max
elif args.config_file == "ev2gym/example_config_files/V2GProfitPlusLoads.yaml":
reward_function = ProfitMax_TrPenalty_UserIncentives
state_function = V2G_profit_max_loads
else:
raise ValueError('Unknown config file')
def generate_replay(evaluation_name):
env = ev2gym_env.EV2Gym(
config_file=args.config_file,
generate_rnd_game=True,
save_replay=True,
replay_save_path=f"replay/{evaluation_name}/",
)
replay_path = f"replay/{evaluation_name}/replay_{env.sim_name}.pkl"
for _ in range(env.simulation_length):
actions = np.ones(env.cs)
new_state, reward, done, truncated, _ = env.step(
actions, visualize=False) # takes action
if done:
break
return replay_path
# Algorithms to compare:
algorithms = [
ChargeAsFastAsPossible,
ChargeAsLateAsPossible,
# PPO, A2C, DDPG, SAC, TD3, TQC, TRPO, ARS, RecurrentPPO,
# SAC,
# TQC,
# # TD3,
# # ARS,
# # RecurrentPPO,
RoundRobin,
# eMPC_V2G,
# # V2GProfitMaxLoadsOracle,
# V2GProfitMaxOracleGB,
# V2GProfitMaxOracle,
# PowerTrackingErrorrMin
]
# algorithms = [
# # ChargeAsFastAsPossibleToDesiredCapacity,
# 'OCMF_V2G_10',
# # 'OCMF_V2G_20',
# 'OCMF_V2G_30',
# 'OCMF_G2V_10',
# # # 'OCMF_G2V_20',
# 'OCMF_G2V_30',
# 'eMPC_V2G_10',
# # # 'eMPC_V2G_20',
# 'eMPC_V2G_30',
# 'eMPC_G2V_10',
# 'eMPC_G2V_30',
# # eMPC_V2G,
# # eMPC_G2V,
# ]
evaluation_name = f'eval_{number_of_charging_stations}cs_{n_transformers}tr_{scenario}_{len(algorithms)}_algos' +\
f'_{n_test_cycles}_exp_' +\
f'{datetime.datetime.now().strftime("%Y_%m_%d_%f")}'
# make a directory for the evaluation
save_path = f'./results/{evaluation_name}/'
os.makedirs(save_path, exist_ok=True)
os.system(f'cp {args.config_file} {save_path}')
if not replays_exist:
eval_replay_files = [generate_replay(
evaluation_name) for _ in range(n_test_cycles)]
plot_results_dict = {}
counter = 0
for algorithm in algorithms:
print(' +------- Evaluating', algorithm, " -------+")
for k in range(n_test_cycles):
print(f' Test cycle {k+1}/{n_test_cycles} -- {algorithm}')
counter += 1
h = -1
if replays_exist:
replay_path = eval_replay_path + eval_replay_files[k]
else:
replay_path = eval_replay_files[k]
if algorithm in [PPO, A2C, DDPG, SAC, TD3, TQC, TRPO, ARS, RecurrentPPO]:
gym.envs.register(id='evs-v0', entry_point='ev2gym.ev_city:ev2gym',
kwargs={'config_file': args.config_file,
'generate_rnd_game': True,
'state_function': state_function,
'reward_function': reward_function,
'load_from_replay_path': replay_path,
})
env = gym.make('evs-v0')
if algorithm == RecurrentPPO:
load_path = f'./saved_models/{number_of_charging_stations}cs_{scenario}/' + \
f"rppo_{reward_function.__name__}_{state_function.__name__}"
else:
load_path = f'./saved_models/{number_of_charging_stations}cs_{scenario}/' + \
f"{algorithm.__name__.lower()}_{reward_function.__name__}_{state_function.__name__}"
# initialize the timer
timer = time.time()
model = algorithm.load(load_path, env, device=device)
env = model.get_env()
state = env.reset()
else:
env = ev2gym_env.EV2Gym(
config_file=args.config_file,
load_from_replay_path=replay_path,
generate_rnd_game=True,
state_function=state_function,
reward_function=reward_function,
)
# initialize the timer
timer = time.time()
state = env.reset()
try:
if type(algorithm) == str:
if algorithm.split('_')[0] in ['OCMF', 'eMPC']:
h = int(algorithm.split('_')[2])
algorithm = algorithm.split(
'_')[0] + '_' + algorithm.split('_')[1]
print(
f'Algorithm: {algorithm} with control horizon {h}')
if algorithm == 'OCMF_V2G':
model = OCMF_V2G(env=env, control_horizon=h)
algorithm = OCMF_V2G
elif algorithm == 'OCMF_G2V':
model = OCMF_G2V(env=env, control_horizon=h)
algorithm = OCMF_G2V
elif algorithm == 'eMPC_V2G':
model = eMPC_V2G(env=env, control_horizon=h)
algorithm = eMPC_V2G
elif algorithm == 'eMPC_G2V':
model = eMPC_G2V(env=env, control_horizon=h)
algorithm = eMPC_G2V
else:
model = algorithm(env=env,
replay_path=replay_path,
verbose=False)
except Exception as error:
print(error)
print(
f'Error in {algorithm} with replay {replay_path}')
continue
rewards = []
for i in range(simulation_length):
print(f' Step {i+1}/{simulation_length} -- {algorithm}')
################# Evaluation ##############################
if algorithm in [PPO, A2C, DDPG, SAC, TD3, TQC, TRPO, ARS, RecurrentPPO]:
action, _ = model.predict(state, deterministic=True)
obs, reward, done, stats = env.step(action)
if i == simulation_length - 2:
saved_env = deepcopy(env.get_attr('env')[0])
stats = stats[0]
else:
actions = model.get_action(env=env)
new_state, reward, done, _, stats = env.step(
actions, visualize=False) # takes action
############################################################
rewards.append(reward)
if done:
results_i = pd.DataFrame({'run': k,
'Algorithm': algorithm.__name__,
'control_horizon': h,
'discharge_price_factor': config['discharge_price_factor'],
'total_ev_served': stats['total_ev_served'],
'total_profits': stats['total_profits'],
'total_energy_charged': stats['total_energy_charged'],
'total_energy_discharged': stats['total_energy_discharged'],
'average_user_satisfaction': stats['average_user_satisfaction'],
'power_tracker_violation': stats['power_tracker_violation'],
'tracking_error': stats['tracking_error'],
'energy_tracking_error': stats['energy_tracking_error'],
'energy_user_satisfaction': stats['energy_user_satisfaction'],
'total_transformer_overload': stats['total_transformer_overload'],
'battery_degradation': stats['battery_degradation'],
'battery_degradation_calendar': stats['battery_degradation_calendar'],
'battery_degradation_cycling': stats['battery_degradation_cycling'],
'total_reward': sum(rewards),
'time': time.time() - timer,
# 'time_gb': model.total_exec_time,
}, index=[counter])
if counter == 1:
results = results_i
else:
results = pd.concat([results, results_i])
if algorithm in [PPO, A2C, DDPG, SAC, TD3, TQC, TRPO, ARS, RecurrentPPO]:
env = saved_env
plot_results_dict[algorithm.__name__] = deepcopy(env)
break
# save the plot_results_dict to a pickle file
with open(save_path + 'plot_results_dict.pkl', 'wb') as f:
pickle.dump(plot_results_dict, f)
# save the results to a csv file
results.to_csv(save_path + 'data.csv')
# Group the results by algorithm and print the average and the standard deviation of the statistics
results_grouped = results.groupby('Algorithm').agg(['mean', 'std'])
# savethe latex results in a txt file
with open(save_path + 'results_grouped.txt', 'w') as f:
f.write(results_grouped.to_latex())
# results_grouped.to_csv('results_grouped.csv')
# print(results_grouped[['tracking_error', 'energy_tracking_error']])
print(results_grouped[['total_transformer_overload', 'time']])
# input('Press Enter to continue')
return
algorithm_names = []
for algorithm in algorithms:
# if class has attribute .name, use it
if hasattr(algorithm, 'algo_name'):
algorithm_names.append(algorithm.algo_name)
else:
algorithm_names.append(algorithm.__name__)
plot_total_power(results_path=save_path + 'plot_results_dict.pkl',
save_path=save_path,
algorithm_names=algorithm_names)
plot_comparable_EV_SoC(results_path=save_path + 'plot_results_dict.pkl',
save_path=save_path,
algorithm_names=algorithm_names)
plot_actual_power_vs_setpoint(results_path=save_path + 'plot_results_dict.pkl',
save_path=save_path,
algorithm_names=algorithm_names)
plot_total_power_V2G(results_path=save_path + 'plot_results_dict.pkl',
save_path=save_path,
algorithm_names=algorithm_names)
plot_comparable_EV_SoC_single(results_path=save_path + 'plot_results_dict.pkl',
save_path=save_path,
algorithm_names=algorithm_names)
plot_prices(results_path=save_path + 'plot_results_dict.pkl',
save_path=save_path,
algorithm_names=algorithm_names)
if __name__ == "__main__":
evaluator()