From cb223084eeefb78a35fbad4627be304eaa409e3d Mon Sep 17 00:00:00 2001 From: ltwmori Date: Fri, 3 May 2024 18:48:10 +0500 Subject: [PATCH 01/14] feat: add multi agent env --- loraenv/loraenv/envs/multiagent_env.py | 105 +++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 loraenv/loraenv/envs/multiagent_env.py diff --git a/loraenv/loraenv/envs/multiagent_env.py b/loraenv/loraenv/envs/multiagent_env.py new file mode 100644 index 0000000..f546233 --- /dev/null +++ b/loraenv/loraenv/envs/multiagent_env.py @@ -0,0 +1,105 @@ +from pettingzoo import AECEnv +from pettingzoo.utils import agent_selector +from gymnasium import spaces +import numpy as np +import simpy +from simulator.lora_simulator import LoraSimulator +import simulator.consts as consts +import simulator.utils as utils +from ray.rllib.env import PettingZooEnv +from ray.tune.registry import register_env + +class LoRaEnvPZ(AECEnv): + """ + Multi-agent LoRa Network Simulation for PettingZoo. + """ + metadata = {"render_modes": ["human"]} + + def __init__(self, nodes_count=10, data_size=16, avg_wake_up_time=30, sim_time=3600): + super().__init__() + + self.nodes_count = nodes_count + self.data_size = data_size + self.avg_wake_up_time = avg_wake_up_time + self.sim_time = sim_time + + self.simpy_env = simpy.Environment() + self.simulator = LoraSimulator(nodes_count, data_size, avg_wake_up_time * 1000, sim_time * 1000, self.simpy_env) + + self.agents = [f"agent_{i}" for i in range(nodes_count)] + self.possible_agents = self.agents[:] + self.agent_name_mapping = dict(zip(self.agents, range(len(self.agents)))) + + self.action_spaces = {agent: spaces.Discrete(3) for agent in self.agents} + self.observation_spaces = { + agent: spaces.Dict({ + "prr": spaces.Box(low=0, high=1, shape=(), dtype=np.float64), + "rssi": spaces.Box(low=-200, high=0, shape=(), dtype=np.float64), + "sf": spaces.Box(low=7, high=12, shape=(), dtype=np.int64) + }) for agent in self.agents + } + + self._agent_selector = agent_selector(self.agents) + self.agent_selection = self._agent_selector.next() + self.rewards = {agent: 0 for agent in self.agents} + self.dones = {agent: False for agent in self.agents} + self.infos = {agent: {} for agent in self.agents} + + def observe(self, agent): + idx = self.agent_name_mapping[agent] + return { + "prr": consts.nodes[idx].prr_value, + "rssi": consts.nodes[idx].rssi_value, + "sf": consts.nodes[idx].sf_value + } + + def step(self, action): + if self.dones[self.agent_selection]: + return self._was_done_step(action) + + agent_index = self.agent_name_mapping[self.agent_selection] + self.simulator.update_node_behavior(agent_index, action) + self.simpy_env.run(until=self.current_step * 1000) + + # Update rewards and observations + self.rewards[self.agent_selection] = self._calculate_reward(agent_index) + self._accumulate_rewards() + + # Check if simulation is done + self.current_step += 1 + if self.current_step >= self.sim_time: + for agent in self.agents: + self.dones[agent] = True + + self.agent_selection = self._agent_selector.next() + + def reset(self, **kwargs): + self.simpy_env = simpy.Environment() + self.simulator = LoraSimulator(self.nodes_count, self.data_size, self.avg_wake_up_time * 1000, self.sim_time * 1000, self.simpy_env) + self.simulator.add_nodes() + self.current_step = 0 + + self.rewards = {agent: 0 for agent in self.agents} + self.dones = {agent: False for agent in self.agents} + self.infos = {agent: {} for agent in self.agents} + self._agent_selector = agent_selector(self.agents) + self.agent_selection = self._agent_selector.next() + + def render(self, mode="human"): + if mode == "human": + print({agent: self.observe(agent) for agent in self.agents}) + + def _calculate_reward(self, agent_index): + lambda_value = 0.0001 + mean_prr = consts.nodes[agent_index].calculate_prr() + retransmission_penalty = lambda_value * consts.nodes[agent_index].packets_sent_count + return mean_prr - retransmission_penalty + +# Wrap the PettingZoo environment for RLlib +def env_creator(env_config): + return LoRaEnvPZ(**env_config) + +# Register the environment with RLlib +register_env("lora_multi_agent", lambda config: PettingZooEnv(env_creator(config))) + +# This registered environment can now be used in RLlib with the environment name "lora_multi_agent" From 9c7c373247dbd082cfb0ddfb51000500ceb2d143 Mon Sep 17 00:00:00 2001 From: ltwmori Date: Thu, 9 May 2024 15:39:26 +0500 Subject: [PATCH 02/14] feat: add multiagent functions --- loraenv/loraenv/__init__.py | 15 ++-- loraenv/loraenv/envs/LoRaEnvParallel.py | 93 +++++++++++++++++++++ loraenv/loraenv/envs/__init__.py | 3 +- loraenv/loraenv/envs/multiagent_env.py | 105 ------------------------ main2.py | 58 +++++++++++++ 5 files changed, 163 insertions(+), 111 deletions(-) create mode 100644 loraenv/loraenv/envs/LoRaEnvParallel.py delete mode 100644 loraenv/loraenv/envs/multiagent_env.py create mode 100644 main2.py diff --git a/loraenv/loraenv/__init__.py b/loraenv/loraenv/__init__.py index c81cddd..3ac5ae7 100644 --- a/loraenv/loraenv/__init__.py +++ b/loraenv/loraenv/__init__.py @@ -1,6 +1,11 @@ -from gymnasium.envs.registration import register +# from gymnasium.envs.registration import register + +# register( +# id='loraenv/LoRa-v0', +# entry_point='loraenv.envs:LoRaEnv', +# ) + + +from pettingzoo.utils import from_parallel +from pettingzoo.test import api_test -register( - id='loraenv/LoRa-v0', - entry_point='loraenv.envs:LoRaEnv', -) \ No newline at end of file diff --git a/loraenv/loraenv/envs/LoRaEnvParallel.py b/loraenv/loraenv/envs/LoRaEnvParallel.py new file mode 100644 index 0000000..954452d --- /dev/null +++ b/loraenv/loraenv/envs/LoRaEnvParallel.py @@ -0,0 +1,93 @@ +import simpy +import numpy as np +from gymnasium import spaces +from pettingzoo import ParallelEnv +from pettingzoo.utils import parallel_to_aec +from simulator.lora_simulator import LoraSimulator +import simulator.consts as consts + +class LoRaEnvParallel(ParallelEnv): + metadata = {'render_modes': ['human'], 'name': 'lora_v1'} + + def __init__(self, nodes_count=10, data_size=16, avg_wake_up_time=30, sim_time=3600, render_mode=None): + self.possible_agents = [f"agent_{i}" for i in range(nodes_count)] + self.agent_name_mapping = dict(zip(self.possible_agents, range(len(self.possible_agents)))) + self.render_mode = render_mode + self.nodes_count = nodes_count + self.data_size = data_size + self.avg_wake_up_time = avg_wake_up_time + self.sim_time = sim_time + + def observation_space(self, agent): + return spaces.Dict({ + "prr": spaces.Box(low=0, high=1, shape=(), dtype=np.float32), + "rssi": spaces.Box(low=-200, high=0, shape=(), dtype=np.float32), + "sf": spaces.Box(low=7, high=12, shape=(), dtype=np.int64) + }) + + def action_space(self, agent): + return spaces.Discrete(3) + + def reset(self, **kwargs): + self.simpy_env = simpy.Environment() + self.simulator = LoraSimulator(self.nodes_count, self.data_size, self.avg_wake_up_time * 1000, self.sim_time * 1000, self.simpy_env) + self.simulator.add_nodes() + self.agents = self.possible_agents[:] + self.current_step = 0 + self.dones = {agent: False for agent in self.agents} # Initialize dones for all agents + observations = {agent: self.observe(agent) for agent in self.agents} + return observations + + + def step(self, actions): + for agent in actions: + agent_index = self.agent_name_mapping[agent] + if not self.dones[agent]: + self.simulator.update_node_behavior(agent_index, actions[agent]) + + self.simpy_env.run(until=self.current_step * 1000) + self.current_step += 1 + + observations = {agent: self.observe(agent) for agent in self.agents} + rewards = {agent: self._calculate_reward(self.agent_name_mapping[agent]) for agent in self.agents} + dones = {agent: self.current_step >= self.sim_time for agent in self.agents} + infos = {agent: {} for agent in self.agents} + + if self.render_mode == 'human': + self.render() + + return observations, rewards, dones, dones, infos + + def observe(self, agent): + idx = self.agent_name_mapping[agent] + return { + "prr": consts.nodes[idx].prr_value, + "rssi": consts.nodes[idx].rssi_value, + "sf": consts.nodes[idx].sf_value + } + + def render(self): + if self.render_mode == 'human': + print({agent: self.observe(agent) for agent in self.agents}) + + def _calculate_reward(self, agent_index): + lambda_value = 0.0001 + mean_prr = consts.nodes[agent_index].calculate_prr() + retransmission_penalty = lambda_value * consts.nodes[agent_index].packets_sent_count + return mean_prr - retransmission_penalty + +# To support the AEC API from this parallel environment +def env_creator(env_config): + env = LoRaEnvParallel(**env_config) + env = parallel_to_aec(env) + return env + +# Example usage +env = LoRaEnvParallel(render_mode="human") +observations = env.reset() +while True: + actions = {agent: env.action_space(agent).sample() for agent in env.agents} + observations, rewards, dones, _, infos = env.step(actions) + if all(dones.values()): + break +env.close() diff --git a/loraenv/loraenv/envs/__init__.py b/loraenv/loraenv/envs/__init__.py index 795fbe6..2c61fa5 100644 --- a/loraenv/loraenv/envs/__init__.py +++ b/loraenv/loraenv/envs/__init__.py @@ -1 +1,2 @@ -from loraenv.envs.environment import LoRaEnv +# from loraenv.envs.environment import LoRaEnv +from loraenv.envs.environment import LoRaEnvParallel diff --git a/loraenv/loraenv/envs/multiagent_env.py b/loraenv/loraenv/envs/multiagent_env.py deleted file mode 100644 index f546233..0000000 --- a/loraenv/loraenv/envs/multiagent_env.py +++ /dev/null @@ -1,105 +0,0 @@ -from pettingzoo import AECEnv -from pettingzoo.utils import agent_selector -from gymnasium import spaces -import numpy as np -import simpy -from simulator.lora_simulator import LoraSimulator -import simulator.consts as consts -import simulator.utils as utils -from ray.rllib.env import PettingZooEnv -from ray.tune.registry import register_env - -class LoRaEnvPZ(AECEnv): - """ - Multi-agent LoRa Network Simulation for PettingZoo. - """ - metadata = {"render_modes": ["human"]} - - def __init__(self, nodes_count=10, data_size=16, avg_wake_up_time=30, sim_time=3600): - super().__init__() - - self.nodes_count = nodes_count - self.data_size = data_size - self.avg_wake_up_time = avg_wake_up_time - self.sim_time = sim_time - - self.simpy_env = simpy.Environment() - self.simulator = LoraSimulator(nodes_count, data_size, avg_wake_up_time * 1000, sim_time * 1000, self.simpy_env) - - self.agents = [f"agent_{i}" for i in range(nodes_count)] - self.possible_agents = self.agents[:] - self.agent_name_mapping = dict(zip(self.agents, range(len(self.agents)))) - - self.action_spaces = {agent: spaces.Discrete(3) for agent in self.agents} - self.observation_spaces = { - agent: spaces.Dict({ - "prr": spaces.Box(low=0, high=1, shape=(), dtype=np.float64), - "rssi": spaces.Box(low=-200, high=0, shape=(), dtype=np.float64), - "sf": spaces.Box(low=7, high=12, shape=(), dtype=np.int64) - }) for agent in self.agents - } - - self._agent_selector = agent_selector(self.agents) - self.agent_selection = self._agent_selector.next() - self.rewards = {agent: 0 for agent in self.agents} - self.dones = {agent: False for agent in self.agents} - self.infos = {agent: {} for agent in self.agents} - - def observe(self, agent): - idx = self.agent_name_mapping[agent] - return { - "prr": consts.nodes[idx].prr_value, - "rssi": consts.nodes[idx].rssi_value, - "sf": consts.nodes[idx].sf_value - } - - def step(self, action): - if self.dones[self.agent_selection]: - return self._was_done_step(action) - - agent_index = self.agent_name_mapping[self.agent_selection] - self.simulator.update_node_behavior(agent_index, action) - self.simpy_env.run(until=self.current_step * 1000) - - # Update rewards and observations - self.rewards[self.agent_selection] = self._calculate_reward(agent_index) - self._accumulate_rewards() - - # Check if simulation is done - self.current_step += 1 - if self.current_step >= self.sim_time: - for agent in self.agents: - self.dones[agent] = True - - self.agent_selection = self._agent_selector.next() - - def reset(self, **kwargs): - self.simpy_env = simpy.Environment() - self.simulator = LoraSimulator(self.nodes_count, self.data_size, self.avg_wake_up_time * 1000, self.sim_time * 1000, self.simpy_env) - self.simulator.add_nodes() - self.current_step = 0 - - self.rewards = {agent: 0 for agent in self.agents} - self.dones = {agent: False for agent in self.agents} - self.infos = {agent: {} for agent in self.agents} - self._agent_selector = agent_selector(self.agents) - self.agent_selection = self._agent_selector.next() - - def render(self, mode="human"): - if mode == "human": - print({agent: self.observe(agent) for agent in self.agents}) - - def _calculate_reward(self, agent_index): - lambda_value = 0.0001 - mean_prr = consts.nodes[agent_index].calculate_prr() - retransmission_penalty = lambda_value * consts.nodes[agent_index].packets_sent_count - return mean_prr - retransmission_penalty - -# Wrap the PettingZoo environment for RLlib -def env_creator(env_config): - return LoRaEnvPZ(**env_config) - -# Register the environment with RLlib -register_env("lora_multi_agent", lambda config: PettingZooEnv(env_creator(config))) - -# This registered environment can now be used in RLlib with the environment name "lora_multi_agent" diff --git a/main2.py b/main2.py new file mode 100644 index 0000000..738cff2 --- /dev/null +++ b/main2.py @@ -0,0 +1,58 @@ +import gymnasium as gym +# import numpy as np +from stable_baselines3 import DQN +from stable_baselines3.common.callbacks import BaseCallback +import matplotlib.pyplot as plt + +# Assuming you have an appropriate multi-agent version of your environment registered in Gym +class RewardLoggerCallback(BaseCallback): + def __init__(self, check_freq): + super(RewardLoggerCallback, self).__init__() + self.check_freq = check_freq + self.episode_rewards = [] + + def _on_step(self) -> bool: + if self.n_calls % self.check_freq == 0: + rewards = self.training_env.get_attr('rewards') + self.episode_rewards.append(rewards) + return True + +if __name__ == "__main__": + nodes_count = 10 # Example parameter + data_size = 16 # Example parameter + avg_wake_up_time = 30 # Example parameter + sim_time = 3600 # Example parameter + + env = gym.make("loraenv/LoRaMulti-v0", config={ + "nodes_count": nodes_count, + "data_size": data_size, + "avg_wake_up_time": avg_wake_up_time, + "sim_time": sim_time, + }) + + models = {agent: DQN("MlpPolicy", env, verbose=1) for agent in env.possible_agents} + + # Train each model + for agent, model in models.items(): + callback = RewardLoggerCallback(check_freq=100) + model.learn(total_timesteps=int(sim_time * 100), callback=callback) + model.save(f"{agent}_lora_model") + plt.figure(figsize=(10, 5)) + plt.plot(callback.episode_rewards, marker="o", linestyle="-") + plt.title(f"Total Reward per Episode During Training for {agent}") + plt.xlabel("Episode") + plt.ylabel("Total Reward") + plt.grid(True) + plt.savefig(f"{agent}_training_rewards.png") + + # Example evaluation phase for one agent + test_env = env + obs = test_env.reset() + done = {agent: False for agent in test_env.possible_agents} + while not all(done.values()): + actions = {agent: models[agent].predict(obs[agent], deterministic=True)[0] for agent in test_env.possible_agents} + obs, rewards, dones, _ = test_env.step(actions) + done = dones + + # Close environment + env.close() From 776ebcf1a63d9ff81daf8a20bd183e9f1ea7e0d6 Mon Sep 17 00:00:00 2001 From: ltwmori Date: Thu, 9 May 2024 18:19:06 +0500 Subject: [PATCH 03/14] fix: fixing env for multiagent --- loraenv/loraenv.egg-info/PKG-INFO | 9 +-- loraenv/loraenv.egg-info/SOURCES.txt | 1 + loraenv/loraenv.egg-info/requires.txt | 6 +- loraenv/loraenv.egg-info/top_level.txt | 2 +- loraenv/loraenv/__init__.py | 18 +++-- loraenv/loraenv/envs/LoRaEnvParallel.py | 93 ------------------------- main2.py | 79 +++++++++++---------- main3.py | 62 +++++++++++++++++ multienv/multienv_v0.py | 7 ++ simulator/lora_simulator.py | 7 +- 10 files changed, 134 insertions(+), 150 deletions(-) delete mode 100644 loraenv/loraenv/envs/LoRaEnvParallel.py create mode 100644 main3.py create mode 100644 multienv/multienv_v0.py diff --git a/loraenv/loraenv.egg-info/PKG-INFO b/loraenv/loraenv.egg-info/PKG-INFO index 8f7a00e..90bef7d 100644 --- a/loraenv/loraenv.egg-info/PKG-INFO +++ b/loraenv/loraenv.egg-info/PKG-INFO @@ -1,6 +1,3 @@ -Metadata-Version: 2.1 -Name: loraenv -Version: 0.0.1 -Requires-Dist: gymnasium -Requires-Dist: numpy -Requires-Dist: matplotlib +Metadata-Version: 2.1 +Name: loraenv +Version: 0.0.1 diff --git a/loraenv/loraenv.egg-info/SOURCES.txt b/loraenv/loraenv.egg-info/SOURCES.txt index 85c918d..beee9af 100644 --- a/loraenv/loraenv.egg-info/SOURCES.txt +++ b/loraenv/loraenv.egg-info/SOURCES.txt @@ -5,5 +5,6 @@ loraenv.egg-info/SOURCES.txt loraenv.egg-info/dependency_links.txt loraenv.egg-info/requires.txt loraenv.egg-info/top_level.txt +loraenv/envs/LoRaEnvParallel.py loraenv/envs/__init__.py loraenv/envs/environment.py \ No newline at end of file diff --git a/loraenv/loraenv.egg-info/requires.txt b/loraenv/loraenv.egg-info/requires.txt index d39ffd8..77cff3c 100644 --- a/loraenv/loraenv.egg-info/requires.txt +++ b/loraenv/loraenv.egg-info/requires.txt @@ -1,3 +1,3 @@ -gymnasium -numpy -matplotlib +gymnasium +numpy +matplotlib diff --git a/loraenv/loraenv.egg-info/top_level.txt b/loraenv/loraenv.egg-info/top_level.txt index bca9dd9..ac96a7e 100644 --- a/loraenv/loraenv.egg-info/top_level.txt +++ b/loraenv/loraenv.egg-info/top_level.txt @@ -1 +1 @@ -loraenv +loraenv diff --git a/loraenv/loraenv/__init__.py b/loraenv/loraenv/__init__.py index 3ac5ae7..58ca7d5 100644 --- a/loraenv/loraenv/__init__.py +++ b/loraenv/loraenv/__init__.py @@ -1,11 +1,15 @@ -# from gymnasium.envs.registration import register +# # from gymnasium.envs.registration import register + +# # register( +# # id='loraenv/LoRa-v0', +# # entry_point='loraenv.envs:LoRaEnv', +# # ) -# register( -# id='loraenv/LoRa-v0', -# entry_point='loraenv.envs:LoRaEnv', -# ) +# from pettingzoo.utils import from_parallel -from pettingzoo.utils import from_parallel -from pettingzoo.test import api_test +# register( +# id='loraenv/LoRaMulti-v0', +# entry_point='loraenv.envs:LoRaEnvParallel', +# ) \ No newline at end of file diff --git a/loraenv/loraenv/envs/LoRaEnvParallel.py b/loraenv/loraenv/envs/LoRaEnvParallel.py deleted file mode 100644 index 954452d..0000000 --- a/loraenv/loraenv/envs/LoRaEnvParallel.py +++ /dev/null @@ -1,93 +0,0 @@ -import simpy -import numpy as np -from gymnasium import spaces -from pettingzoo import ParallelEnv -from pettingzoo.utils import parallel_to_aec -from simulator.lora_simulator import LoraSimulator -import simulator.consts as consts - -class LoRaEnvParallel(ParallelEnv): - metadata = {'render_modes': ['human'], 'name': 'lora_v1'} - - def __init__(self, nodes_count=10, data_size=16, avg_wake_up_time=30, sim_time=3600, render_mode=None): - self.possible_agents = [f"agent_{i}" for i in range(nodes_count)] - self.agent_name_mapping = dict(zip(self.possible_agents, range(len(self.possible_agents)))) - self.render_mode = render_mode - self.nodes_count = nodes_count - self.data_size = data_size - self.avg_wake_up_time = avg_wake_up_time - self.sim_time = sim_time - - def observation_space(self, agent): - return spaces.Dict({ - "prr": spaces.Box(low=0, high=1, shape=(), dtype=np.float32), - "rssi": spaces.Box(low=-200, high=0, shape=(), dtype=np.float32), - "sf": spaces.Box(low=7, high=12, shape=(), dtype=np.int64) - }) - - def action_space(self, agent): - return spaces.Discrete(3) - - def reset(self, **kwargs): - self.simpy_env = simpy.Environment() - self.simulator = LoraSimulator(self.nodes_count, self.data_size, self.avg_wake_up_time * 1000, self.sim_time * 1000, self.simpy_env) - self.simulator.add_nodes() - self.agents = self.possible_agents[:] - self.current_step = 0 - self.dones = {agent: False for agent in self.agents} # Initialize dones for all agents - observations = {agent: self.observe(agent) for agent in self.agents} - return observations - - - def step(self, actions): - for agent in actions: - agent_index = self.agent_name_mapping[agent] - if not self.dones[agent]: - self.simulator.update_node_behavior(agent_index, actions[agent]) - - self.simpy_env.run(until=self.current_step * 1000) - self.current_step += 1 - - observations = {agent: self.observe(agent) for agent in self.agents} - rewards = {agent: self._calculate_reward(self.agent_name_mapping[agent]) for agent in self.agents} - dones = {agent: self.current_step >= self.sim_time for agent in self.agents} - infos = {agent: {} for agent in self.agents} - - if self.render_mode == 'human': - self.render() - - return observations, rewards, dones, dones, infos - - def observe(self, agent): - idx = self.agent_name_mapping[agent] - return { - "prr": consts.nodes[idx].prr_value, - "rssi": consts.nodes[idx].rssi_value, - "sf": consts.nodes[idx].sf_value - } - - def render(self): - if self.render_mode == 'human': - print({agent: self.observe(agent) for agent in self.agents}) - - def _calculate_reward(self, agent_index): - lambda_value = 0.0001 - mean_prr = consts.nodes[agent_index].calculate_prr() - retransmission_penalty = lambda_value * consts.nodes[agent_index].packets_sent_count - return mean_prr - retransmission_penalty - -# To support the AEC API from this parallel environment -def env_creator(env_config): - env = LoRaEnvParallel(**env_config) - env = parallel_to_aec(env) - return env - -# Example usage -env = LoRaEnvParallel(render_mode="human") -observations = env.reset() -while True: - actions = {agent: env.action_space(agent).sample() for agent in env.agents} - observations, rewards, dones, _, infos = env.step(actions) - if all(dones.values()): - break -env.close() diff --git a/main2.py b/main2.py index 738cff2..4c2cce1 100644 --- a/main2.py +++ b/main2.py @@ -1,13 +1,18 @@ import gymnasium as gym -# import numpy as np from stable_baselines3 import DQN from stable_baselines3.common.callbacks import BaseCallback import matplotlib.pyplot as plt -# Assuming you have an appropriate multi-agent version of your environment registered in Gym +from multienv.multienv_v0 import LoRaEnvParallel +from pettingzoo.utils import parallel_to_aec +from pettingzoo.utils.wrappers import BaseWrapper +from stable_baselines3.common.env_util import make_vec_env +from supersuit import pad_action_space_v0, pad_observations_v0 +import supersuit as ss + class RewardLoggerCallback(BaseCallback): - def __init__(self, check_freq): - super(RewardLoggerCallback, self).__init__() + def __init__(self, check_freq, verbose=1): + super(RewardLoggerCallback, self).__init__(verbose) self.check_freq = check_freq self.episode_rewards = [] @@ -23,36 +28,36 @@ def _on_step(self) -> bool: avg_wake_up_time = 30 # Example parameter sim_time = 3600 # Example parameter - env = gym.make("loraenv/LoRaMulti-v0", config={ - "nodes_count": nodes_count, - "data_size": data_size, - "avg_wake_up_time": avg_wake_up_time, - "sim_time": sim_time, - }) - - models = {agent: DQN("MlpPolicy", env, verbose=1) for agent in env.possible_agents} - - # Train each model - for agent, model in models.items(): - callback = RewardLoggerCallback(check_freq=100) - model.learn(total_timesteps=int(sim_time * 100), callback=callback) - model.save(f"{agent}_lora_model") - plt.figure(figsize=(10, 5)) - plt.plot(callback.episode_rewards, marker="o", linestyle="-") - plt.title(f"Total Reward per Episode During Training for {agent}") - plt.xlabel("Episode") - plt.ylabel("Total Reward") - plt.grid(True) - plt.savefig(f"{agent}_training_rewards.png") - - # Example evaluation phase for one agent - test_env = env - obs = test_env.reset() - done = {agent: False for agent in test_env.possible_agents} - while not all(done.values()): - actions = {agent: models[agent].predict(obs[agent], deterministic=True)[0] for agent in test_env.possible_agents} - obs, rewards, dones, _ = test_env.step(actions) - done = dones - - # Close environment - env.close() + env = LoRaEnvParallel(nodes_count=nodes_count, data_size=data_size, avg_wake_up_time=avg_wake_up_time, sim_time=sim_time) + aec_env = env + # wrapped_env = pad_action_space(pad_observations(aec_env)) + + # vec_env = pettingzoo_env_to_vec_env_v1(wrapped_env) + wrapped_env = pad_observations_v0(pad_action_space_v0(aec_env)) + + vec_env = ss.pettingzoo_env_to_vec_env_v1(wrapped_env) + # aec_env = ss.concat_vec_envs_v1(aec_env, 8, num_cpus=1, base_class="stable_baselines3") + + model = DQN("MultiInputPolicy", vec_env, verbose=1) + reward_logger = RewardLoggerCallback(check_freq=100) + + # Training Phase + model.learn(total_timesteps=100, callback=reward_logger) + model.save("dqn_lora_model") + + # Plot the rewards collected during the training + plt.figure(figsize=(10, 5)) + plt.plot(reward_logger.episode_rewards, marker="o", linestyle="-") + plt.title("Total Reward per Episode During Training") + plt.xlabel("Episode") + plt.ylabel("Total Reward") + plt.grid(True) + plt.savefig("training_rewards.png") + + # Evaluation Phase + obs = vec_env.reset() + done = False + while not done: + action, _states = model.predict(obs, deterministic=True) + obs, reward, done, info = vec_env.step(action) + vec_env.render() diff --git a/main3.py b/main3.py new file mode 100644 index 0000000..1e957ae --- /dev/null +++ b/main3.py @@ -0,0 +1,62 @@ +import os +import ray +from ray import tune +from ray.rllib.algorithms.dqn import DQNConfig +from ray.rllib.env import PettingZooEnv +from ray.tune.registry import register_env + +from multienv.multienv_v0 import env + +if __name__ == "__main__": + ray.init() + + # Register the environment + register_env("LoRaEnvParallel", lambda config: PettingZooEnv(env(config))) + + # Create a test environment to get observation and action spaces + test_env = PettingZooEnv(env({ + "nodes_count": 10, + "data_size": 16, + "avg_wake_up_time": 30, + "sim_time": 3600 + })) + obs_space = test_env.observation_space(test_env.possible_agents[0]) + act_space = test_env.action_space(test_env.possible_agents[0]) + + config = ( + DQNConfig() + .environment(env="LoRaEnvParallel", env_config={ + "nodes_count": 10, + "data_size": 16, + "avg_wake_up_time": 30, + "sim_time": 3600 + }) + .rollouts(num_rollout_workers=1, rollout_fragment_length=30) + .training( + train_batch_size=200, + hiddens=[], + dueling=False, + ) + .multi_agent( + policies={agent: (None, obs_space, act_space, {}) for agent in test_env.possible_agents}, + policy_mapping_fn=(lambda agent_id, *args, **kwargs: agent_id), + ) + .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0"))) + .framework(framework="torch") + .exploration( + exploration_config={ + "type": "EpsilonGreedy", + "initial_epsilon": 0.1, + "final_epsilon": 0.0, + "epsilon_timesteps": 100000, + } + ) + ) + + tune.run( + "DQN", + name="DQN_LoRaEnvParallel", + stop={"timesteps_total": 1000000}, + checkpoint_freq=10, + config=config.to_dict(), + ) diff --git a/multienv/multienv_v0.py b/multienv/multienv_v0.py new file mode 100644 index 0000000..2ad9081 --- /dev/null +++ b/multienv/multienv_v0.py @@ -0,0 +1,7 @@ +from multienv.env.multienv import LoRaEnvParallel +from pettingzoo.utils import parallel_to_aec + +def env(**kwargs): + env_cur = LoRaEnvParallel(**kwargs) + env_cur = parallel_to_aec(env_cur) + return env_cur \ No newline at end of file diff --git a/simulator/lora_simulator.py b/simulator/lora_simulator.py index cb21e37..df1a62d 100644 --- a/simulator/lora_simulator.py +++ b/simulator/lora_simulator.py @@ -19,9 +19,10 @@ def add_nodes(self): for i in range(self.nodes_count): consts.nodes.append(EndNode(i, self.env, data_gateway)) - def update_nodes_behavior(self, action): - for node in consts.nodes: - node.perform_action(action) + def update_nodes_behavior(self, index, action): + # for node in consts.nodes: + # node.perform_action(action) + consts.nodes[index].perform_action(action) def start_simulation(self): from simulator.frame import Frame From 29428de868db19bff2b2ca1c778bf0b9531224de Mon Sep 17 00:00:00 2001 From: ltwmori Date: Fri, 10 May 2024 15:57:36 +0500 Subject: [PATCH 04/14] feat: tests --- main2.py | 66 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 34 insertions(+), 32 deletions(-) diff --git a/main2.py b/main2.py index 4c2cce1..3aa659c 100644 --- a/main2.py +++ b/main2.py @@ -3,6 +3,7 @@ from stable_baselines3.common.callbacks import BaseCallback import matplotlib.pyplot as plt +from pettingzoo.test import parallel_api_test from multienv.multienv_v0 import LoRaEnvParallel from pettingzoo.utils import parallel_to_aec from pettingzoo.utils.wrappers import BaseWrapper @@ -29,35 +30,36 @@ def _on_step(self) -> bool: sim_time = 3600 # Example parameter env = LoRaEnvParallel(nodes_count=nodes_count, data_size=data_size, avg_wake_up_time=avg_wake_up_time, sim_time=sim_time) - aec_env = env - # wrapped_env = pad_action_space(pad_observations(aec_env)) - - # vec_env = pettingzoo_env_to_vec_env_v1(wrapped_env) - wrapped_env = pad_observations_v0(pad_action_space_v0(aec_env)) - - vec_env = ss.pettingzoo_env_to_vec_env_v1(wrapped_env) - # aec_env = ss.concat_vec_envs_v1(aec_env, 8, num_cpus=1, base_class="stable_baselines3") - - model = DQN("MultiInputPolicy", vec_env, verbose=1) - reward_logger = RewardLoggerCallback(check_freq=100) - - # Training Phase - model.learn(total_timesteps=100, callback=reward_logger) - model.save("dqn_lora_model") - - # Plot the rewards collected during the training - plt.figure(figsize=(10, 5)) - plt.plot(reward_logger.episode_rewards, marker="o", linestyle="-") - plt.title("Total Reward per Episode During Training") - plt.xlabel("Episode") - plt.ylabel("Total Reward") - plt.grid(True) - plt.savefig("training_rewards.png") - - # Evaluation Phase - obs = vec_env.reset() - done = False - while not done: - action, _states = model.predict(obs, deterministic=True) - obs, reward, done, info = vec_env.step(action) - vec_env.render() + parallel_api_test(env, num_cycles=1000) + # aec_env = env + # # wrapped_env = pad_action_space(pad_observations(aec_env)) + + # # vec_env = pettingzoo_env_to_vec_env_v1(wrapped_env) + # wrapped_env = pad_observations_v0(pad_action_space_v0(aec_env)) + + # vec_env = ss.pettingzoo_env_to_vec_env_v1(wrapped_env) + # # aec_env = ss.concat_vec_envs_v1(aec_env, 8, num_cpus=1, base_class="stable_baselines3") + + # model = DQN("MultiInputPolicy", vec_env, verbose=1) + # reward_logger = RewardLoggerCallback(check_freq=100) + + # # Training Phase + # model.learn(total_timesteps=100, callback=reward_logger) + # model.save("dqn_lora_model") + + # # Plot the rewards collected during the training + # plt.figure(figsize=(10, 5)) + # plt.plot(reward_logger.episode_rewards, marker="o", linestyle="-") + # plt.title("Total Reward per Episode During Training") + # plt.xlabel("Episode") + # plt.ylabel("Total Reward") + # plt.grid(True) + # plt.savefig("training_rewards.png") + + # # Evaluation Phase + # obs = vec_env.reset() + # done = False + # while not done: + # action, _states = model.predict(obs, deterministic=True) + # obs, reward, done, info = vec_env.step(action) + # vec_env.render() From 74cd5eca4b36608d922ab01eeebc4bbd6ed73b20 Mon Sep 17 00:00:00 2001 From: ltwmori Date: Wed, 15 May 2024 18:22:43 +0500 Subject: [PATCH 05/14] feat: added code --- main2.py | 143 +++++++++++++++++++++++++++++++++++-------------------- main3.py | 47 +++++++++++------- 2 files changed, 122 insertions(+), 68 deletions(-) diff --git a/main2.py b/main2.py index 3aa659c..b372bf7 100644 --- a/main2.py +++ b/main2.py @@ -1,65 +1,106 @@ +import sys import gymnasium as gym -from stable_baselines3 import DQN -from stable_baselines3.common.callbacks import BaseCallback +import loraenv +import simulator.utils as utils import matplotlib.pyplot as plt -from pettingzoo.test import parallel_api_test -from multienv.multienv_v0 import LoRaEnvParallel -from pettingzoo.utils import parallel_to_aec -from pettingzoo.utils.wrappers import BaseWrapper -from stable_baselines3.common.env_util import make_vec_env -from supersuit import pad_action_space_v0, pad_observations_v0 -import supersuit as ss +import numpy as np +import simulator.consts as consts -class RewardLoggerCallback(BaseCallback): - def __init__(self, check_freq, verbose=1): - super(RewardLoggerCallback, self).__init__(verbose) - self.check_freq = check_freq - self.episode_rewards = [] +from simulator.lora_simulator import LoraSimulator +from reward_caller_callback import RewardLoggerCallback - def _on_step(self) -> bool: - if self.n_calls % self.check_freq == 0: - rewards = self.training_env.get_attr('rewards') - self.episode_rewards.append(rewards) - return True +from stable_baselines3 import PPO if __name__ == "__main__": - nodes_count = 10 # Example parameter - data_size = 16 # Example parameter - avg_wake_up_time = 30 # Example parameter - sim_time = 3600 # Example parameter + if len(sys.argv) == 5: + nodes_count = int(sys.argv[1]) + data_size = int(sys.argv[2]) + avg_wake_up_time = int(sys.argv[3]) + sim_time = int(sys.argv[4]) - env = LoRaEnvParallel(nodes_count=nodes_count, data_size=data_size, avg_wake_up_time=avg_wake_up_time, sim_time=sim_time) - parallel_api_test(env, num_cycles=1000) - # aec_env = env - # # wrapped_env = pad_action_space(pad_observations(aec_env)) + # Gymnasium environment + gym_env = gym.make( + "loraenv/LoRa-v0", + nodes_count=nodes_count, + data_size=data_size, + avg_wake_up_time=avg_wake_up_time, + sim_time=sim_time, + ) - # # vec_env = pettingzoo_env_to_vec_env_v1(wrapped_env) - # wrapped_env = pad_observations_v0(pad_action_space_v0(aec_env)) + train = True + if train: + # Create new model + model = PPO("MultiInputPolicy", gym_env, verbose=1) + reward_logger = RewardLoggerCallback() - # vec_env = ss.pettingzoo_env_to_vec_env_v1(wrapped_env) - # # aec_env = ss.concat_vec_envs_v1(aec_env, 8, num_cpus=1, base_class="stable_baselines3") + # Training Phase + # -------------- + utils.logging = False + utils.log(f"!-- TRAINING START --!") + # Calculate total timesteps for training + episodes = 10 + total_timesteps = ( + sim_time * episodes + ) # Assuming 1 timestep = 1 second in simulation + model.learn( + total_timesteps=total_timesteps, + log_interval=4, + progress_bar=True, + callback=reward_logger, + ) + model.save("lora_model") + utils.log(f"!-- TRAINING END --!") - # model = DQN("MultiInputPolicy", vec_env, verbose=1) - # reward_logger = RewardLoggerCallback(check_freq=100) + # Plot the rewards collected during the training + plt.figure(figsize=(10, 5)) + plt.plot(reward_logger.episode_rewards, marker="o", linestyle="-") + plt.title("Total Reward per Episode During Training") + plt.xlabel("Episode") + plt.ylabel("Total Reward") + plt.grid(True) + plt.savefig("training_phase.png") - # # Training Phase - # model.learn(total_timesteps=100, callback=reward_logger) - # model.save("dqn_lora_model") + # Evaluation Phase + # ---------------- + model = PPO.load("lora_model") + utils.log(f"!-- EVALUATION START --!") + obs, info = gym_env.reset() + rewards_per_evaluation = [[] for _ in range(nodes_count)] # List to hold rewards for each node + total_rewards_per_node = [0] * nodes_count # List to hold total rewards for each node - # # Plot the rewards collected during the training - # plt.figure(figsize=(10, 5)) - # plt.plot(reward_logger.episode_rewards, marker="o", linestyle="-") - # plt.title("Total Reward per Episode During Training") - # plt.xlabel("Episode") - # plt.ylabel("Total Reward") - # plt.grid(True) - # plt.savefig("training_rewards.png") + done = False + while True: + action, _states = model.predict(obs, deterministic=True) + obs, reward, done, terminated, info = gym_env.step(action) + for i in range(nodes_count): + rewards_per_evaluation[i].append(reward[i]) # Log each reward for each node + total_rewards_per_node[i] += reward[i] # Sum rewards for each node - # # Evaluation Phase - # obs = vec_env.reset() - # done = False - # while not done: - # action, _states = model.predict(obs, deterministic=True) - # obs, reward, done, info = vec_env.step(action) - # vec_env.render() + if done or terminated: + utils.show_final_statistics() + utils.log(f"!-- EVALUATION END --!") + break + + # Plot the rewards collected during the evaluation for each node + plt.figure(figsize=(10, 5)) + for i in range(nodes_count): + plt.plot( + range(1, len(rewards_per_evaluation[i]) + 1), + rewards_per_evaluation[i], + marker="o", + linestyle="-", + label=f'Node {i+1}' + ) + plt.title("Rewards per Step During Evaluation for Each Node") + plt.xlabel("Step") + plt.ylabel("Reward") + plt.legend() + plt.grid(True) + plt.savefig("evaluation_phase_per_node.png") + + else: + print( + "usage: ./main " + ) + exit(-1) \ No newline at end of file diff --git a/main3.py b/main3.py index 1e957ae..9e5ffe8 100644 --- a/main3.py +++ b/main3.py @@ -4,24 +4,33 @@ from ray.rllib.algorithms.dqn import DQNConfig from ray.rllib.env import PettingZooEnv from ray.tune.registry import register_env +import logging from multienv.multienv_v0 import env +logging.basicConfig(level=logging.INFO) + if __name__ == "__main__": ray.init() # Register the environment - register_env("LoRaEnvParallel", lambda config: PettingZooEnv(env(config))) + def create_env(config): + env_instance = env() + logging.info(f"Custom Env possible_agents: {env_instance.possible_agents}") + return PettingZooEnv(env_instance) + + register_env("LoRaEnvParallel", create_env) # Create a test environment to get observation and action spaces - test_env = PettingZooEnv(env({ - "nodes_count": 10, - "data_size": 16, - "avg_wake_up_time": 30, - "sim_time": 3600 - })) - obs_space = test_env.observation_space(test_env.possible_agents[0]) - act_space = test_env.action_space(test_env.possible_agents[0]) + test_env = create_env({}) + logging.info(f"Wrapped Env possible_agents: {test_env.env.possible_agents}") + + # Check if possible_agents exists + if hasattr(test_env.env, 'possible_agents'): + obs_space = test_env.env.observation_space(test_env.env.possible_agents[0]) + act_space = test_env.env.action_space(test_env.env.possible_agents[0]) + else: + raise AttributeError("The environment does not have 'possible_agents' attribute.") config = ( DQNConfig() @@ -38,7 +47,7 @@ dueling=False, ) .multi_agent( - policies={agent: (None, obs_space, act_space, {}) for agent in test_env.possible_agents}, + policies={agent: (None, obs_space, act_space, {}) for agent in test_env.env.possible_agents}, policy_mapping_fn=(lambda agent_id, *args, **kwargs: agent_id), ) .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0"))) @@ -53,10 +62,14 @@ ) ) - tune.run( - "DQN", - name="DQN_LoRaEnvParallel", - stop={"timesteps_total": 1000000}, - checkpoint_freq=10, - config=config.to_dict(), - ) + try: + tune.run( + "DQN", + name="DQN_LoRaEnvParallel", + stop={"timesteps_total": 1000000}, + checkpoint_freq=10, + config=config.to_dict(), + ) + except Exception as e: + logging.error(f"An error occurred during training: {e}") + raise From b51f7412a49893f8ca5767061f29cda9993b72f6 Mon Sep 17 00:00:00 2001 From: ltwmori Date: Thu, 16 May 2024 11:48:37 +0500 Subject: [PATCH 06/14] pushing env to git --- .gitignore | 6 +- multienv/env/multienv.py | 132 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+), 5 deletions(-) create mode 100644 multienv/env/multienv.py diff --git a/.gitignore b/.gitignore index 0530986..b2ac8c5 100644 --- a/.gitignore +++ b/.gitignore @@ -120,11 +120,7 @@ celerybeat.pid *.sage.py # Environments -.env -.venv -env/ -venv/ -ENV/ + env.bak/ venv.bak/ diff --git a/multienv/env/multienv.py b/multienv/env/multienv.py new file mode 100644 index 0000000..1ac9fd9 --- /dev/null +++ b/multienv/env/multienv.py @@ -0,0 +1,132 @@ +import simpy +import numpy as np +from gymnasium import spaces +from pettingzoo import ParallelEnv +from pettingzoo.utils import parallel_to_aec +from simulator.lora_simulator import LoraSimulator +import simulator.consts as consts +import simulator.utils as utils +from copy import copy +import functools +import logging + +class LoRaEnvParallel(ParallelEnv): + metadata = {'render_modes': ['human'], 'name': 'multiagent_v0'} + + def __init__(self, nodes_count=10, data_size=16, avg_wake_up_time=30, sim_time=3600, render_mode=None): + self.possible_agents = [f"agent_{i}" for i in range(nodes_count)] + self.agent_name_mapping = dict(zip(self.possible_agents, range(len(self.possible_agents)))) + self.render_mode = render_mode + self.nodes_count = nodes_count + self.data_size = data_size + self.avg_wake_up_time = avg_wake_up_time + self.sim_time = sim_time + + # Setup simulator environment + self.simpy_env = simpy.Environment() + self.simulator = LoraSimulator( + self.nodes_count, + self.data_size, + self.avg_wake_up_time * 1000, + self.sim_time * 1000, + self.simpy_env, + ) + + self.current_step = 0 + self.done = False + self.truncated = False + + @functools.lru_cache(maxsize=None) + def observation_space(self, agent): + return spaces.Dict({ + "prr": spaces.Box(low=0, high=1, shape=(), dtype=np.float32), + "rssi": spaces.Box(low=-200, high=0, shape=(), dtype=np.float32), + "sf": spaces.Box(low=7, high=12, shape=(), dtype=np.int64) + }) + + @functools.lru_cache(maxsize=None) + def action_space(self, agent): + return spaces.Discrete(3) + + def reset(self, **kwargs): + logging.info("Resetting environment.") + self.agents = copy(self.possible_agents) + self.simpy_env = simpy.Environment() + self.simulator = LoraSimulator( + self.nodes_count, + self.data_size, + self.avg_wake_up_time * 1000, + self.sim_time * 1000, + self.simpy_env, + ) + self.current_step = 0 + self.done = False + self.truncated = False + utils.reset_simulator() + self.simulator.add_nodes() + observations = {agent: self.observe(agent) for agent in self.possible_agents} + infos = {agent: {} for agent in self.possible_agents} + logging.info("Environment reset complete.") + return observations, infos + + def step(self, actions): + logging.info(f"Step {self.current_step} with actions {actions}.") + if self.current_step == 0: + self.simulator.start_simulation() + if self.current_step >= self.sim_time: + self.done = True + reward = self._calculate_reward() + observations = {agent: self.observe(agent) for agent in self.possible_agents} + infos = {agent: {} for agent in self.possible_agents} + logging.info("Simulation done.") + return observations, reward, self.done, infos + + for agent in actions: + agent_index = self.agent_name_mapping[agent] + if not self.done: + self.simulator.update_nodes_behavior(agent_index, actions[agent]) + + self.current_step += 1 + timestep = self.current_step * 1000 + for i in range(self.nodes_count): + utils.log( + f"!-- UPLINK NUMBER FOR STEP [{self.current_step}] FOR NODE {i}: {actions[f'agent_{i}'] + 1} --!", + self.simpy_env, + ) + self.simulator.env.run(until=timestep) + + observations = {agent: self.observe(agent) for agent in self.possible_agents} + rewards = {agent: self._calculate_reward(self.agent_name_mapping[agent]) for agent in self.possible_agents} + self.done = self.current_step >= self.sim_time + infos = {agent: {} for agent in self.possible_agents} + + if self.render_mode == 'human': + self.render() + + dones = {agent: self.done for agent in self.possible_agents} + truckations = {agent: self.truncated for agent in self.possible_agents} + + return observations, rewards, dones, truckations, infos + + def observe(self, agent): + idx = self.agent_name_mapping[agent] + return { + "prr": consts.nodes[idx].prr_value, + "rssi": consts.nodes[idx].rssi_value, + "sf": consts.nodes[idx].sf_value + } + + def render(self): + if self.render_mode == 'human': + print({agent: self.observe(agent) for agent in self.possible_agents}) + + def _calculate_reward(self, agent_index): + lambda_value = 0.0001 + mean_prr = consts.nodes[agent_index].calculate_prr() + retransmission_penalty = lambda_value * consts.nodes[agent_index].packets_sent_count + return mean_prr - retransmission_penalty + +def env_creator(env_config): + env_instance = LoRaEnvParallel(**env_config) + env_instance = parallel_to_aec(env_instance) + return env_instance From bb76173a514cebe913d20ec32d59aebe8af98432 Mon Sep 17 00:00:00 2001 From: ltwmori Date: Thu, 16 May 2024 14:12:35 +0500 Subject: [PATCH 07/14] fix: minor changes in the code --- main3.py | 65 ++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 18 deletions(-) diff --git a/main3.py b/main3.py index 9e5ffe8..ab8cdd0 100644 --- a/main3.py +++ b/main3.py @@ -5,24 +5,26 @@ from ray.rllib.env import PettingZooEnv from ray.tune.registry import register_env import logging +import sys from multienv.multienv_v0 import env logging.basicConfig(level=logging.INFO) -if __name__ == "__main__": - ray.init() - - # Register the environment - def create_env(config): - env_instance = env() - logging.info(f"Custom Env possible_agents: {env_instance.possible_agents}") - return PettingZooEnv(env_instance) +# Function to create the environment +def create_env(config): + logging.info(f"Creating environment with config: {config}") + env_instance = env(**config) + logging.info(f"Custom Env possible_agents: {env_instance.possible_agents}") + return PettingZooEnv(env_instance) +# Function to train the environment +def train_fn(config): + logging.info("Registering environment.") register_env("LoRaEnvParallel", create_env) # Create a test environment to get observation and action spaces - test_env = create_env({}) + test_env = create_env(config) logging.info(f"Wrapped Env possible_agents: {test_env.env.possible_agents}") # Check if possible_agents exists @@ -32,15 +34,10 @@ def create_env(config): else: raise AttributeError("The environment does not have 'possible_agents' attribute.") - config = ( + algo_config = ( DQNConfig() - .environment(env="LoRaEnvParallel", env_config={ - "nodes_count": 10, - "data_size": 16, - "avg_wake_up_time": 30, - "sim_time": 3600 - }) - .rollouts(num_rollout_workers=1, rollout_fragment_length=30) + .environment(env="LoRaEnvParallel", env_config=config) + .env_runners(num_env_runners=1, rollout_fragment_length=30) .training( train_batch_size=200, hiddens=[], @@ -68,8 +65,40 @@ def create_env(config): name="DQN_LoRaEnvParallel", stop={"timesteps_total": 1000000}, checkpoint_freq=10, - config=config.to_dict(), + config=algo_config.to_dict(), ) except Exception as e: logging.error(f"An error occurred during training: {e}") raise + +if __name__ == "__main__": + if len(sys.argv) != 5: + print("Usage: python3 main3.py ") + sys.exit(1) + + nodes_count = int(sys.argv[1]) + data_size = int(sys.argv[2]) + avg_wake_up_time = int(sys.argv[3]) + sim_time = int(sys.argv[4]) + + ray.init() + + try: + analysis = tune.run( + train_fn, + config={ + "nodes_count": nodes_count, + "data_size": data_size, + "avg_wake_up_time": avg_wake_up_time, + "sim_time": sim_time + }, + metric="episode_reward_mean", + mode="max" + ) + + print("Best checkpoint:", analysis.best_checkpoint) + + with analysis.best_checkpoint.as_directory() as tmpdir: + trainer = DQNConfig.load_from_checkpoint(tmpdir) + except Exception as e: + logging.error(f"An error occurred during the Ray Tune run: {e}") From 3c797dab5ebef45c00bbd4e29524786d54c4c694 Mon Sep 17 00:00:00 2001 From: DeTrix2712 Date: Thu, 16 May 2024 14:56:09 +0500 Subject: [PATCH 08/14] fix: RLlib fix --- main3.py | 80 +++++++++++++++++++++----------------------------------- 1 file changed, 30 insertions(+), 50 deletions(-) diff --git a/main3.py b/main3.py index ab8cdd0..2b41b89 100644 --- a/main3.py +++ b/main3.py @@ -2,41 +2,50 @@ import ray from ray import tune from ray.rllib.algorithms.dqn import DQNConfig +from ray.rllib.algorithms.dqn import DQN from ray.rllib.env import PettingZooEnv from ray.tune.registry import register_env import logging -import sys from multienv.multienv_v0 import env logging.basicConfig(level=logging.INFO) -# Function to create the environment -def create_env(config): - logging.info(f"Creating environment with config: {config}") - env_instance = env(**config) - logging.info(f"Custom Env possible_agents: {env_instance.possible_agents}") - return PettingZooEnv(env_instance) +if __name__ == "__main__": + ray.init() + + # Register the environment + def create_env(config): + env_instance = env() + logging.info(f"Custom Env possible_agents: {env_instance.possible_agents}") + return PettingZooEnv(env_instance) -# Function to train the environment -def train_fn(config): - logging.info("Registering environment.") register_env("LoRaEnvParallel", create_env) # Create a test environment to get observation and action spaces - test_env = create_env(config) + test_env = create_env({}) logging.info(f"Wrapped Env possible_agents: {test_env.env.possible_agents}") # Check if possible_agents exists - if hasattr(test_env.env, 'possible_agents'): + if hasattr(test_env.env, "possible_agents"): obs_space = test_env.env.observation_space(test_env.env.possible_agents[0]) act_space = test_env.env.action_space(test_env.env.possible_agents[0]) else: - raise AttributeError("The environment does not have 'possible_agents' attribute.") + raise AttributeError( + "The environment does not have 'possible_agents' attribute." + ) - algo_config = ( + config = ( DQNConfig() - .environment(env="LoRaEnvParallel", env_config=config) + .environment( + env="LoRaEnvParallel", + env_config={ + "nodes_count": 10, + "data_size": 16, + "avg_wake_up_time": 30, + "sim_time": 3600, + }, + ) .env_runners(num_env_runners=1, rollout_fragment_length=30) .training( train_batch_size=200, @@ -44,7 +53,10 @@ def train_fn(config): dueling=False, ) .multi_agent( - policies={agent: (None, obs_space, act_space, {}) for agent in test_env.env.possible_agents}, + policies={ + agent: (None, obs_space, act_space, {}) + for agent in test_env.env.possible_agents + }, policy_mapping_fn=(lambda agent_id, *args, **kwargs: agent_id), ) .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0"))) @@ -61,44 +73,12 @@ def train_fn(config): try: tune.run( - "DQN", + DQN, name="DQN_LoRaEnvParallel", stop={"timesteps_total": 1000000}, checkpoint_freq=10, - config=algo_config.to_dict(), + config=config.to_dict(), ) except Exception as e: logging.error(f"An error occurred during training: {e}") raise - -if __name__ == "__main__": - if len(sys.argv) != 5: - print("Usage: python3 main3.py ") - sys.exit(1) - - nodes_count = int(sys.argv[1]) - data_size = int(sys.argv[2]) - avg_wake_up_time = int(sys.argv[3]) - sim_time = int(sys.argv[4]) - - ray.init() - - try: - analysis = tune.run( - train_fn, - config={ - "nodes_count": nodes_count, - "data_size": data_size, - "avg_wake_up_time": avg_wake_up_time, - "sim_time": sim_time - }, - metric="episode_reward_mean", - mode="max" - ) - - print("Best checkpoint:", analysis.best_checkpoint) - - with analysis.best_checkpoint.as_directory() as tmpdir: - trainer = DQNConfig.load_from_checkpoint(tmpdir) - except Exception as e: - logging.error(f"An error occurred during the Ray Tune run: {e}") From 32bcfbd1a64f550aef969295038913bf22694d13 Mon Sep 17 00:00:00 2001 From: DeTrix2712 Date: Thu, 16 May 2024 15:44:23 +0500 Subject: [PATCH 09/14] fix: minor fixes --- multienv/env/multienv.py | 60 ++++++++++++++++++++++++++-------------- simulator/singleton.py | 12 +++++--- 2 files changed, 48 insertions(+), 24 deletions(-) diff --git a/multienv/env/multienv.py b/multienv/env/multienv.py index 1ac9fd9..a297ac7 100644 --- a/multienv/env/multienv.py +++ b/multienv/env/multienv.py @@ -10,12 +10,22 @@ import functools import logging -class LoRaEnvParallel(ParallelEnv): - metadata = {'render_modes': ['human'], 'name': 'multiagent_v0'} - def __init__(self, nodes_count=10, data_size=16, avg_wake_up_time=30, sim_time=3600, render_mode=None): +class LoRaEnvParallel(ParallelEnv): + metadata = {"render_modes": ["human"], "name": "multiagent_v0"} + + def __init__( + self, + nodes_count=10, + data_size=16, + avg_wake_up_time=30, + sim_time=3600, + render_mode=None, + ): self.possible_agents = [f"agent_{i}" for i in range(nodes_count)] - self.agent_name_mapping = dict(zip(self.possible_agents, range(len(self.possible_agents)))) + self.agent_name_mapping = dict( + zip(self.possible_agents, range(len(self.possible_agents))) + ) self.render_mode = render_mode self.nodes_count = nodes_count self.data_size = data_size @@ -38,11 +48,13 @@ def __init__(self, nodes_count=10, data_size=16, avg_wake_up_time=30, sim_time=3 @functools.lru_cache(maxsize=None) def observation_space(self, agent): - return spaces.Dict({ - "prr": spaces.Box(low=0, high=1, shape=(), dtype=np.float32), - "rssi": spaces.Box(low=-200, high=0, shape=(), dtype=np.float32), - "sf": spaces.Box(low=7, high=12, shape=(), dtype=np.int64) - }) + return spaces.Dict( + { + "prr": spaces.Box(low=0.0, high=1.0, shape=(), dtype=np.float32), + "rssi": spaces.Box(low=-200.0, high=0.0, shape=(), dtype=np.float32), + "sf": spaces.Box(low=7, high=12, shape=(), dtype=np.int64), + } + ) @functools.lru_cache(maxsize=None) def action_space(self, agent): @@ -76,11 +88,13 @@ def step(self, actions): if self.current_step >= self.sim_time: self.done = True reward = self._calculate_reward() - observations = {agent: self.observe(agent) for agent in self.possible_agents} + observations = { + agent: self.observe(agent) for agent in self.possible_agents + } infos = {agent: {} for agent in self.possible_agents} logging.info("Simulation done.") return observations, reward, self.done, infos - + for agent in actions: agent_index = self.agent_name_mapping[agent] if not self.done: @@ -96,36 +110,42 @@ def step(self, actions): self.simulator.env.run(until=timestep) observations = {agent: self.observe(agent) for agent in self.possible_agents} - rewards = {agent: self._calculate_reward(self.agent_name_mapping[agent]) for agent in self.possible_agents} + rewards = { + agent: self._calculate_reward(self.agent_name_mapping[agent]) + for agent in self.possible_agents + } self.done = self.current_step >= self.sim_time infos = {agent: {} for agent in self.possible_agents} - if self.render_mode == 'human': + if self.render_mode == "human": self.render() dones = {agent: self.done for agent in self.possible_agents} - truckations = {agent: self.truncated for agent in self.possible_agents} + truncations = {agent: self.truncated for agent in self.possible_agents} - return observations, rewards, dones, truckations, infos + return observations, rewards, dones, truncations, infos def observe(self, agent): idx = self.agent_name_mapping[agent] return { - "prr": consts.nodes[idx].prr_value, - "rssi": consts.nodes[idx].rssi_value, - "sf": consts.nodes[idx].sf_value + "prr": np.array(consts.nodes[idx].prr_value, dtype=np.float32), + "rssi": np.array(consts.nodes[idx].rssi_value, dtype=np.float32), + "sf": np.array(consts.nodes[idx].sf_value, dtype=np.int64), } def render(self): - if self.render_mode == 'human': + if self.render_mode == "human": print({agent: self.observe(agent) for agent in self.possible_agents}) def _calculate_reward(self, agent_index): lambda_value = 0.0001 mean_prr = consts.nodes[agent_index].calculate_prr() - retransmission_penalty = lambda_value * consts.nodes[agent_index].packets_sent_count + retransmission_penalty = ( + lambda_value * consts.nodes[agent_index].packets_sent_count + ) return mean_prr - retransmission_penalty + def env_creator(env_config): env_instance = LoRaEnvParallel(**env_config) env_instance = parallel_to_aec(env_instance) diff --git a/simulator/singleton.py b/simulator/singleton.py index 58d39c3..c7481e2 100644 --- a/simulator/singleton.py +++ b/simulator/singleton.py @@ -19,10 +19,14 @@ def __init__(self): self.sim_time = int(sys.argv[4]) else: - print( - "usage: ./main " - ) - exit(-1) + self.nodes_count = 10 + self.data_size = 16 + self.avg_wake_up_time = 30 * 1000 + self.sim_time = 3600 * 1000 + # print( + # "usage: ./main " + # ) + # exit(-1) class DataGatewaySingleton: From 0588526bad424a2d2ffa3f69725e7ddeba37a1ea Mon Sep 17 00:00:00 2001 From: ltwmori Date: Thu, 16 May 2024 16:11:23 +0500 Subject: [PATCH 10/14] fix: something --- multienv/env/multienv.py | 2 +- simulator/entities.py | 17 ++++++++++------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/multienv/env/multienv.py b/multienv/env/multienv.py index a297ac7..6ddcff4 100644 --- a/multienv/env/multienv.py +++ b/multienv/env/multienv.py @@ -128,7 +128,7 @@ def step(self, actions): def observe(self, agent): idx = self.agent_name_mapping[agent] return { - "prr": np.array(consts.nodes[idx].prr_value, dtype=np.float32), + "prr": np.array(consts.nodes[idx].calculate_prr(), dtype=np.float32), "rssi": np.array(consts.nodes[idx].rssi_value, dtype=np.float32), "sf": np.array(consts.nodes[idx].sf_value, dtype=np.int64), } diff --git a/simulator/entities.py b/simulator/entities.py index f24e8eb..8fdedbf 100644 --- a/simulator/entities.py +++ b/simulator/entities.py @@ -112,9 +112,17 @@ def __init__(self, node_id, env, gateway=None): self.packets_sent_count = 0 self.packets_received_count = 0 + self.x, self.y = EndNode.find_place_for_new_node() + self.dist = np.sqrt( + (self.x - consts.bsx) * (self.x - consts.bsx) + + (self.y - consts.bsy) * (self.y - consts.bsy) + ) + + self.sf = self.find_optimal_sf() + self.prr_value = 0 self.rssi_value = 0 - self.sf_value = 0 + self.sf_value = self.sf self.nr_lost = 0 self.nr_collisions = 0 @@ -139,13 +147,8 @@ def __init__(self, node_id, env, gateway=None): self.data_packet = None self.sack_packet_received = env.event() - self.x, self.y = EndNode.find_place_for_new_node() - self.dist = np.sqrt( - (self.x - consts.bsx) * (self.x - consts.bsx) - + (self.y - consts.bsy) * (self.y - consts.bsy) - ) - self.sf = self.find_optimal_sf() + def __str__(self): # return "EndNode: " + str(self.node_id) + " x: " + str(self.x) + " y: " + str(self.y) + " sf: " + str(self.sf) From 09da482e656a8cca34c218df7f0df851e2cfd3ac Mon Sep 17 00:00:00 2001 From: DeTrix2712 Date: Wed, 22 May 2024 13:30:48 +0500 Subject: [PATCH 11/14] fix: minor fixes --- main.py | 6 ++ main2.py | 24 ++++-- main3.py | 156 ++++++++++++++++++++++-------------- simulator/communications.py | 3 +- simulator/consts.py | 14 +++- simulator/entities.py | 10 +-- simulator/singleton.py | 27 ------- simulator/utils.py | 13 ++- 8 files changed, 136 insertions(+), 117 deletions(-) diff --git a/main.py b/main.py index 681e6bc..8f6d828 100644 --- a/main.py +++ b/main.py @@ -2,6 +2,7 @@ import gymnasium as gym import loraenv import simulator.utils as utils +import simulator.consts as consts import matplotlib.pyplot as plt from simulator.lora_simulator import LoraSimulator @@ -16,6 +17,11 @@ avg_wake_up_time = int(sys.argv[3]) sim_time = int(sys.argv[4]) + consts.nodes_count = nodes_count + consts.data_size = data_size + consts.avg_wake_up_time = avg_wake_up_time + consts.sim_time = sim_time + # Gymnasium environment gym_env = gym.make( "loraenv/LoRa-v0", diff --git a/main2.py b/main2.py index b372bf7..bb23d89 100644 --- a/main2.py +++ b/main2.py @@ -2,6 +2,7 @@ import gymnasium as gym import loraenv import simulator.utils as utils +import simulator.consts as consts import matplotlib.pyplot as plt import numpy as np @@ -19,6 +20,11 @@ avg_wake_up_time = int(sys.argv[3]) sim_time = int(sys.argv[4]) + consts.nodes_count = nodes_count + consts.data_size = data_size + consts.avg_wake_up_time = avg_wake_up_time + consts.sim_time = sim_time + # Gymnasium environment gym_env = gym.make( "loraenv/LoRa-v0", @@ -28,7 +34,7 @@ sim_time=sim_time, ) - train = True + train = False if train: # Create new model model = PPO("MultiInputPolicy", gym_env, verbose=1) @@ -66,15 +72,21 @@ model = PPO.load("lora_model") utils.log(f"!-- EVALUATION START --!") obs, info = gym_env.reset() - rewards_per_evaluation = [[] for _ in range(nodes_count)] # List to hold rewards for each node - total_rewards_per_node = [0] * nodes_count # List to hold total rewards for each node + rewards_per_evaluation = [ + [] for _ in range(nodes_count) + ] # List to hold rewards for each node + total_rewards_per_node = [ + 0 + ] * nodes_count # List to hold total rewards for each node done = False while True: action, _states = model.predict(obs, deterministic=True) obs, reward, done, terminated, info = gym_env.step(action) for i in range(nodes_count): - rewards_per_evaluation[i].append(reward[i]) # Log each reward for each node + rewards_per_evaluation[i].append( + reward[i] + ) # Log each reward for each node total_rewards_per_node[i] += reward[i] # Sum rewards for each node if done or terminated: @@ -90,7 +102,7 @@ rewards_per_evaluation[i], marker="o", linestyle="-", - label=f'Node {i+1}' + label=f"Node {i+1}", ) plt.title("Rewards per Step During Evaluation for Each Node") plt.xlabel("Step") @@ -103,4 +115,4 @@ print( "usage: ./main " ) - exit(-1) \ No newline at end of file + exit(-1) diff --git a/main3.py b/main3.py index 2b41b89..0dbdf9e 100644 --- a/main3.py +++ b/main3.py @@ -1,84 +1,116 @@ import os +import sys import ray +import logging +import simulator.consts as consts + from ray import tune from ray.rllib.algorithms.dqn import DQNConfig from ray.rllib.algorithms.dqn import DQN from ray.rllib.env import PettingZooEnv from ray.tune.registry import register_env -import logging from multienv.multienv_v0 import env logging.basicConfig(level=logging.INFO) if __name__ == "__main__": - ray.init() + if len(sys.argv) == 5: + nodes_count = int(sys.argv[1]) + data_size = int(sys.argv[2]) + avg_wake_up_time = int(sys.argv[3]) + sim_time = int(sys.argv[4]) - # Register the environment - def create_env(config): - env_instance = env() - logging.info(f"Custom Env possible_agents: {env_instance.possible_agents}") - return PettingZooEnv(env_instance) + consts.nodes_count = nodes_count + consts.data_size = data_size + consts.avg_wake_up_time = avg_wake_up_time + consts.sim_time = sim_time - register_env("LoRaEnvParallel", create_env) + ray.init() - # Create a test environment to get observation and action spaces - test_env = create_env({}) - logging.info(f"Wrapped Env possible_agents: {test_env.env.possible_agents}") + # Register the environment + def create_env(config): + env_instance = env( + nodes_count=nodes_count, + data_size=data_size, + avg_wake_up_time=avg_wake_up_time, + sim_time=sim_time, + ) + logging.info(f"Custom Env possible_agents: {env_instance.possible_agents}") + return PettingZooEnv(env_instance) - # Check if possible_agents exists - if hasattr(test_env.env, "possible_agents"): - obs_space = test_env.env.observation_space(test_env.env.possible_agents[0]) - act_space = test_env.env.action_space(test_env.env.possible_agents[0]) - else: - raise AttributeError( - "The environment does not have 'possible_agents' attribute." - ) + register_env("LoRaEnvParallel", create_env) - config = ( - DQNConfig() - .environment( - env="LoRaEnvParallel", - env_config={ - "nodes_count": 10, - "data_size": 16, - "avg_wake_up_time": 30, - "sim_time": 3600, - }, - ) - .env_runners(num_env_runners=1, rollout_fragment_length=30) - .training( - train_batch_size=200, - hiddens=[], - dueling=False, - ) - .multi_agent( - policies={ - agent: (None, obs_space, act_space, {}) - for agent in test_env.env.possible_agents - }, - policy_mapping_fn=(lambda agent_id, *args, **kwargs: agent_id), - ) - .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0"))) - .framework(framework="torch") - .exploration( - exploration_config={ - "type": "EpsilonGreedy", - "initial_epsilon": 0.1, - "final_epsilon": 0.0, - "epsilon_timesteps": 100000, + # Create a test environment to get observation and action spaces + test_env = create_env( + { + "nodes_count": nodes_count, + "data_size": data_size, + "avg_wake_up_time": avg_wake_up_time, + "sim_time": sim_time, } ) - ) + logging.info(f"Wrapped Env possible_agents: {test_env.env.possible_agents}") + + # Check if possible_agents exists + if hasattr(test_env.env, "possible_agents"): + obs_space = test_env.env.observation_space(test_env.env.possible_agents[0]) + act_space = test_env.env.action_space(test_env.env.possible_agents[0]) + else: + raise AttributeError( + "The environment does not have 'possible_agents' attribute." + ) + + config = ( + DQNConfig() + .environment( + env="LoRaEnvParallel", + env_config={ + "nodes_count": nodes_count, + "data_size": data_size, + "avg_wake_up_time": avg_wake_up_time, + "sim_time": sim_time, + }, + ) + .env_runners( + num_env_runners=1, + rollout_fragment_length=30, + exploration_config={ + "type": "EpsilonGreedy", + "initial_epsilon": 0.1, + "final_epsilon": 0.0, + "epsilon_timesteps": 100000, + }, + ) + .training( + train_batch_size=200, + hiddens=[], + dueling=False, + ) + .multi_agent( + policies={ + agent: (None, obs_space, act_space, {}) + for agent in test_env.env.possible_agents + }, + policy_mapping_fn=(lambda agent_id, *args, **kwargs: agent_id), + ) + .resources(num_gpus=1) + .framework(framework="torch") + ) - try: - tune.run( - DQN, - name="DQN_LoRaEnvParallel", - stop={"timesteps_total": 1000000}, - checkpoint_freq=10, - config=config.to_dict(), + try: + tune.run( + DQN, + name="DQN_LoRaEnvParallel", + stop={"timesteps_total": 1000000}, + checkpoint_freq=10, + config=config.to_dict(), + ) + except Exception as e: + logging.error(f"An error occurred during training: {e}") + raise + else: + print( + "usage: python main3.py " ) - except Exception as e: - logging.error(f"An error occurred during training: {e}") - raise + exit(-1) diff --git a/simulator/communications.py b/simulator/communications.py index 41fa209..f7a6bbb 100644 --- a/simulator/communications.py +++ b/simulator/communications.py @@ -6,7 +6,6 @@ import math import random from simulator.channels import Channels -from simulator.singleton import ArgumentSingleton class Packet: @@ -174,7 +173,7 @@ def __init__(self, sf=None, node=None): self.sf = sf self.bw = 125 self.freq = Channels.get_sf_freq(sf) - self.pl = ArgumentSingleton.get_instance().data_size + self.pl = consts.data_size self.rec_time = self.airtime() def update_statistics(self): diff --git a/simulator/consts.py b/simulator/consts.py index ad2c5c0..72eaed6 100644 --- a/simulator/consts.py +++ b/simulator/consts.py @@ -1,4 +1,5 @@ import numpy as np + from collections import defaultdict # CONSTANTS @@ -24,10 +25,6 @@ pow_cons = [75, 45, 30] V = 3.3 # voltage -# global -data_gateway = None -nodes = [] - coding_rate = 1 drifting_range = [-0.2, 0.2] mean = 0 # Mean of the normal distribution @@ -75,3 +72,12 @@ total_energy = 0 erx = 0 etx = 0 + +# Default values for parameters (to be overwritten) +nodes_count = 10 +data_size = 16 +avg_wake_up_time = 30 +sim_time = 3600 + +data_gateway = None +nodes = [] diff --git a/simulator/entities.py b/simulator/entities.py index 8fdedbf..ef54196 100644 --- a/simulator/entities.py +++ b/simulator/entities.py @@ -2,7 +2,6 @@ import numpy as np from simulator.singleton import ( DataGatewaySingleton, - ArgumentSingleton, ) import random, math from simulator.utils import * @@ -10,8 +9,6 @@ from simulator.broadcast_traffic import BroadcastTraffic from simulator.frame import Frame -args = ArgumentSingleton.get_instance() - class NetworkNode: def __init__(self, node_id=None): @@ -41,7 +38,7 @@ def __init__(self, node_id): def frame(self, sf): if sf > 6: return self.frames[sf - 7] - raise ValueError("sf must be greater than 6") + raise ValueError("SF must be greater than 6") def transmit_sack(self, env, sf): from simulator.communications import SackPacket @@ -147,9 +144,6 @@ def __init__(self, node_id, env, gateway=None): self.data_packet = None self.sack_packet_received = env.event() - - - def __str__(self): # return "EndNode: " + str(self.node_id) + " x: " + str(self.x) + " y: " + str(self.y) + " sf: " + str(self.sf) return f"node {self.node_id}: \t x {self.x:3f} \t y {self.y:3f} \t dist {self.dist:4.3f} \t SF {self.sf}" @@ -226,7 +220,7 @@ def transmit(self, env): while True: # calculating round start time - yield env.timeout(random.uniform(0.0, float(2 * args.avg_wake_up_time))) + yield env.timeout(random.uniform(0.0, float(2 * consts.avg_wake_up_time))) if self.waiting_first_sack: yield self.sack_packet_received self.waiting_first_sack = False diff --git a/simulator/singleton.py b/simulator/singleton.py index c7481e2..524470e 100644 --- a/simulator/singleton.py +++ b/simulator/singleton.py @@ -2,33 +2,6 @@ import simpy -class ArgumentSingleton: - _instance = None - - @staticmethod - def get_instance(): - if ArgumentSingleton._instance is None: - ArgumentSingleton._instance = ArgumentSingleton() - return ArgumentSingleton._instance - - def __init__(self): - if len(sys.argv) == 5: - self.nodes_count = int(sys.argv[1]) - self.data_size = int(sys.argv[2]) - self.avg_wake_up_time = int(sys.argv[3]) - self.sim_time = int(sys.argv[4]) - - else: - self.nodes_count = 10 - self.data_size = 16 - self.avg_wake_up_time = 30 * 1000 - self.sim_time = 3600 * 1000 - # print( - # "usage: ./main " - # ) - # exit(-1) - - class DataGatewaySingleton: _instance = None diff --git a/simulator/utils.py b/simulator/utils.py index 388c326..a217acd 100644 --- a/simulator/utils.py +++ b/simulator/utils.py @@ -2,10 +2,7 @@ import datetime import simulator.consts as consts -from simulator.singleton import ArgumentSingleton, DataGatewaySingleton - -args = ArgumentSingleton.get_instance() -nodes_count = args.nodes_count +from simulator.singleton import DataGatewaySingleton def get_log_filename(): @@ -123,7 +120,7 @@ def show_final_statistics(): f"{node.nr_lost:{max_length}} packets lost, " f"{node.nr_collisions:{max_length}} collisions" ) - log(f"Average PRR: {(sum / nodes_count):.3f}") + log(f"Average PRR: {(sum / consts.nodes_count):.3f}") log("\n!-- NETWORK STATISTICS --!\n") log(f"Data collisions: {consts.nr_data_collisions}") @@ -133,10 +130,10 @@ def show_final_statistics(): log(f"Transmitted SACK packets: {consts.nr_sack_sent}") log(f"Missed SACK packets: {consts.nr_sack_missed_count}") log(f"Data Retransmissions: {consts.nr_data_retransmissions}") - log(f"Average energy consumption (Rx): {(consts.erx / nodes_count):.3f} J") - log(f"Average energy consumption (Tx): {(consts.etx / nodes_count):.3f} J") + log(f"Average energy consumption (Rx): {(consts.erx / consts.nodes_count):.3f} J") + log(f"Average energy consumption (Tx): {(consts.etx / consts.nodes_count):.3f} J") log( - f"Average energy consumption per node: {consts.total_energy / nodes_count:.3f} J" + f"Average energy consumption per node: {consts.total_energy / consts.nodes_count:.3f} J" ) log( f"Network PRR (version 1): {(consts.nr_data_packets_sent - consts.nr_data_retransmissions) / consts.nr_data_packets_sent:.3f}" From 46db3964e4bb690d3f1f7af3ea4474a2d9cf674f Mon Sep 17 00:00:00 2001 From: ltwmori Date: Thu, 23 May 2024 15:24:44 +0500 Subject: [PATCH 12/14] feat: added tensorboard --- main3.py | 52 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/main3.py b/main3.py index 0dbdf9e..21e02ab 100644 --- a/main3.py +++ b/main3.py @@ -3,17 +3,47 @@ import ray import logging import simulator.consts as consts +import matplotlib.pyplot as plt from ray import tune from ray.rllib.algorithms.dqn import DQNConfig from ray.rllib.algorithms.dqn import DQN from ray.rllib.env import PettingZooEnv from ray.tune.registry import register_env +from ray.tune.logger import TBXLoggerCallback from multienv.multienv_v0 import env logging.basicConfig(level=logging.INFO) +def plot_metrics(df): + # Create a figure with 2 subplots arranged vertically + fig, ax = plt.subplots(2, 1, figsize=(10, 8)) + + # Plot mean episode reward on the first subplot + if 'episode_reward_mean' in df.columns: + df['episode_reward_mean'].plot(ax=ax[0]) + ax[0].set_title('Mean Episode Reward') + ax[0].set_xlabel('Training Iterations') + ax[0].set_ylabel('Reward') + else: + logging.warning("No 'episode_reward_mean' column found in results.") + ax[0].text(0.5, 0.5, 'No Data', horizontalalignment='center', verticalalignment='center') + + # Plot mean episode length on the second subplot + if 'episode_len_mean' in df.columns: + df['episode_len_mean'].plot(ax=ax[1]) + ax[1].set_title('Mean Episode Length') + ax[1].set_xlabel('Training Iterations') + ax[1].set_ylabel('Length') + else: + logging.warning("No 'episode_len_mean' column found in results.") + ax[1].text(0.5, 0.5, 'No Data', horizontalalignment='center', verticalalignment='center') + + # Adjust layout and display the plots + plt.tight_layout() + plt.show() + if __name__ == "__main__": if len(sys.argv) == 5: nodes_count = int(sys.argv[1]) @@ -79,7 +109,7 @@ def create_env(config): "type": "EpsilonGreedy", "initial_epsilon": 0.1, "final_epsilon": 0.0, - "epsilon_timesteps": 100000, + "epsilon_timesteps": 1000000, }, ) .training( @@ -94,18 +124,30 @@ def create_env(config): }, policy_mapping_fn=(lambda agent_id, *args, **kwargs: agent_id), ) - .resources(num_gpus=1) + .resources(num_gpus=0) .framework(framework="torch") ) try: - tune.run( + analysis = tune.run( DQN, name="DQN_LoRaEnvParallel", - stop={"timesteps_total": 1000000}, - checkpoint_freq=10, + stop={"timesteps_total": 1000}, + checkpoint_freq=100, + keep_checkpoints_num=5, + checkpoint_score_attr="training_iteration", config=config.to_dict(), + local_dir="~/ray_results", # Specify the directory for logging + callbacks=[TBXLoggerCallback()], + log_to_file=True, ) + + # Get the best trial + best_trial = analysis.get_best_trial("episode_reward_mean", mode="max") + + # Use TensorBoard to visualize results + print(f"Training completed. Use TensorBoard to visualize results: tensorboard --logdir {best_trial.local_path}") + except Exception as e: logging.error(f"An error occurred during training: {e}") raise From 0ed0f81edddddc0aa9948e21a8288e355fd47e8e Mon Sep 17 00:00:00 2001 From: ltwmori Date: Thu, 23 May 2024 17:44:24 +0500 Subject: [PATCH 13/14] feat: restored prev version to main 2 --- main2.py | 224 +++++++++++------- main3.py | 2 +- ...1716462842.MacBook-Air-Assel.local.44116.0 | Bin 0 -> 88 bytes ...1716462901.MacBook-Air-Assel.local.44462.0 | Bin 0 -> 88 bytes ...1716464109.MacBook-Air-Assel.local.50289.0 | Bin 0 -> 88 bytes ...1716464382.MacBook-Air-Assel.local.51656.0 | Bin 0 -> 88 bytes ...1716464601.MacBook-Air-Assel.local.52747.0 | Bin 0 -> 88 bytes ...1716464781.MacBook-Air-Assel.local.53639.0 | Bin 0 -> 88 bytes ...1716465092.MacBook-Air-Assel.local.55124.0 | Bin 0 -> 88 bytes ...1716465333.MacBook-Air-Assel.local.56249.0 | Bin 0 -> 88 bytes ...1716467317.MacBook-Air-Assel.local.65894.0 | Bin 0 -> 88 bytes 11 files changed, 134 insertions(+), 92 deletions(-) create mode 100644 ~/ray_results/events.out.tfevents.1716462842.MacBook-Air-Assel.local.44116.0 create mode 100644 ~/ray_results/events.out.tfevents.1716462901.MacBook-Air-Assel.local.44462.0 create mode 100644 ~/ray_results/events.out.tfevents.1716464109.MacBook-Air-Assel.local.50289.0 create mode 100644 ~/ray_results/events.out.tfevents.1716464382.MacBook-Air-Assel.local.51656.0 create mode 100644 ~/ray_results/events.out.tfevents.1716464601.MacBook-Air-Assel.local.52747.0 create mode 100644 ~/ray_results/events.out.tfevents.1716464781.MacBook-Air-Assel.local.53639.0 create mode 100644 ~/ray_results/events.out.tfevents.1716465092.MacBook-Air-Assel.local.55124.0 create mode 100644 ~/ray_results/events.out.tfevents.1716465333.MacBook-Air-Assel.local.56249.0 create mode 100644 ~/ray_results/events.out.tfevents.1716467317.MacBook-Air-Assel.local.65894.0 diff --git a/main2.py b/main2.py index bb23d89..085c9aa 100644 --- a/main2.py +++ b/main2.py @@ -1,17 +1,58 @@ +import os import sys -import gymnasium as gym -import loraenv -import simulator.utils as utils +import ray +import logging import simulator.consts as consts -import matplotlib.pyplot as plt -import numpy as np -import simulator.consts as consts +from ray import tune +from ray.rllib.algorithms.dqn import DQNConfig +from ray.rllib.algorithms.dqn import DQN +from ray.rllib.env import PettingZooEnv +from ray.tune.registry import register_env +from ray.tune.logger import TBXLoggerCallback +from torch.utils.tensorboard import SummaryWriter + +from multienv.multienv_v0 import env + +logging.basicConfig(level=logging.INFO) + +class CustomMetricsCallback(tune.Callback): + def __init__(self, logdir): + self.logdir = logdir + self.writer = SummaryWriter(log_dir=logdir) + + def on_episode_end(self, *, worker, base_env, policies, episode, **kwargs): + env = base_env.get_unwrapped()[0] + total_steps = episode.length + total_uplinks = sum(episode.custom_metrics[agent]["uplink_attempts"] for agent in env.possible_agents) + total_reward = sum(episode.reward[agent] for agent in env.possible_agents) + uplinks_per_node = {agent: episode.custom_metrics[agent]["uplink_attempts"] for agent in env.possible_agents} -from simulator.lora_simulator import LoraSimulator -from reward_caller_callback import RewardLoggerCallback + uplinks_per_step = total_uplinks / total_steps if total_steps > 0 else 0 + reward_per_uplink = total_reward / total_uplinks if total_uplinks > 0 else 0 -from stable_baselines3 import PPO + episode.custom_metrics["total_uplinks"] = total_uplinks + episode.custom_metrics["total_steps"] = total_steps + episode.custom_metrics["uplinks_per_step"] = uplinks_per_step + episode.custom_metrics["reward_per_uplink"] = reward_per_uplink + episode.custom_metrics["uplinks_per_node"] = uplinks_per_node + + logging.info(f"Episode {episode.episode_id} ended with {total_uplinks} total uplinks, " + f"{total_steps} steps, {uplinks_per_step:.4f} uplinks per step, " + f"{reward_per_uplink:.4f} reward per uplink, and {uplinks_per_node} uplink attempts.") + + # Log custom metrics to TensorBoard + self.writer.add_scalar("Metrics/Total_Uplinks", total_uplinks, episode.episode_id) + self.writer.add_scalar("Metrics/Reward_Per_Uplink", reward_per_uplink, episode.episode_id) + + for agent, uplinks in uplinks_per_node.items(): + self.writer.add_scalar(f"Metrics/Uplinks_Per_Node/{agent}", uplinks, episode.episode_id) + + def on_trial_end(self, iteration, trials, trial, **info): + self.writer.flush() + + def on_experiment_end(self, **kwargs): + self.writer.close() if __name__ == "__main__": if len(sys.argv) == 5: @@ -25,94 +66,95 @@ consts.avg_wake_up_time = avg_wake_up_time consts.sim_time = sim_time - # Gymnasium environment - gym_env = gym.make( - "loraenv/LoRa-v0", - nodes_count=nodes_count, - data_size=data_size, - avg_wake_up_time=avg_wake_up_time, - sim_time=sim_time, + ray.init() + + # Register the environment + def create_env(config): + env_instance = env( + nodes_count=nodes_count, + data_size=data_size, + avg_wake_up_time=avg_wake_up_time, + sim_time=sim_time, + ) + logging.info(f"Custom Env possible_agents: {env_instance.possible_agents}") + return PettingZooEnv(env_instance) + + register_env("LoRaEnvParallel", create_env) + + # Create a test environment to get observation and action spaces + test_env = create_env( + { + "nodes_count": nodes_count, + "data_size": data_size, + "avg_wake_up_time": avg_wake_up_time, + "sim_time": sim_time, + } ) + logging.info(f"Wrapped Env possible_agents: {test_env.env.possible_agents}") + + # Check if possible_agents exists + if hasattr(test_env.env, "possible_agents"): + obs_space = test_env.env.observation_space(test_env.env.possible_agents[0]) + act_space = test_env.env.action_space(test_env.env.possible_agents[0]) + else: + raise AttributeError( + "The environment does not have 'possible_agents' attribute." + ) - train = False - if train: - # Create new model - model = PPO("MultiInputPolicy", gym_env, verbose=1) - reward_logger = RewardLoggerCallback() - - # Training Phase - # -------------- - utils.logging = False - utils.log(f"!-- TRAINING START --!") - # Calculate total timesteps for training - episodes = 10 - total_timesteps = ( - sim_time * episodes - ) # Assuming 1 timestep = 1 second in simulation - model.learn( - total_timesteps=total_timesteps, - log_interval=4, - progress_bar=True, - callback=reward_logger, + config = ( + DQNConfig() + .environment( + env="LoRaEnvParallel", + env_config={ + "nodes_count": nodes_count, + "data_size": data_size, + "avg_wake_up_time": avg_wake_up_time, + "sim_time": sim_time, + }, ) - model.save("lora_model") - utils.log(f"!-- TRAINING END --!") - - # Plot the rewards collected during the training - plt.figure(figsize=(10, 5)) - plt.plot(reward_logger.episode_rewards, marker="o", linestyle="-") - plt.title("Total Reward per Episode During Training") - plt.xlabel("Episode") - plt.ylabel("Total Reward") - plt.grid(True) - plt.savefig("training_phase.png") - - # Evaluation Phase - # ---------------- - model = PPO.load("lora_model") - utils.log(f"!-- EVALUATION START --!") - obs, info = gym_env.reset() - rewards_per_evaluation = [ - [] for _ in range(nodes_count) - ] # List to hold rewards for each node - total_rewards_per_node = [ - 0 - ] * nodes_count # List to hold total rewards for each node - - done = False - while True: - action, _states = model.predict(obs, deterministic=True) - obs, reward, done, terminated, info = gym_env.step(action) - for i in range(nodes_count): - rewards_per_evaluation[i].append( - reward[i] - ) # Log each reward for each node - total_rewards_per_node[i] += reward[i] # Sum rewards for each node - - if done or terminated: - utils.show_final_statistics() - utils.log(f"!-- EVALUATION END --!") - break - - # Plot the rewards collected during the evaluation for each node - plt.figure(figsize=(10, 5)) - for i in range(nodes_count): - plt.plot( - range(1, len(rewards_per_evaluation[i]) + 1), - rewards_per_evaluation[i], - marker="o", - linestyle="-", - label=f"Node {i+1}", + .env_runners( + num_env_runners=1, + rollout_fragment_length=30, + exploration_config={ + "type": "EpsilonGreedy", + "initial_epsilon": 0.1, + "final_epsilon": 0.0, + "epsilon_timesteps": 100000, + }, ) - plt.title("Rewards per Step During Evaluation for Each Node") - plt.xlabel("Step") - plt.ylabel("Reward") - plt.legend() - plt.grid(True) - plt.savefig("evaluation_phase_per_node.png") + .training( + train_batch_size=200, + hiddens=[], + dueling=False, + ) + .multi_agent( + policies={ + agent: (None, obs_space, act_space, {}) + for agent in test_env.env.possible_agents + }, + policy_mapping_fn=(lambda agent_id, *args, **kwargs: agent_id), + ) + .resources(num_gpus=0) + .framework(framework="torch") + ) + try: + logdir = "~/ray_results" + tune.run( + DQN, + name="DQN_LoRaEnvParallel", + stop={"timesteps_total": 100000}, + checkpoint_freq=10, + config=config.to_dict(), + local_dir=logdir, # Specify the directory for logging + callbacks=[TBXLoggerCallback(), CustomMetricsCallback(logdir)], + log_to_file=True, + ) + except Exception as e: + logging.error(f"An error occurred during training: {e}") + raise else: print( - "usage: ./main " + "usage: python main3.py " ) exit(-1) diff --git a/main3.py b/main3.py index 21e02ab..dd377c1 100644 --- a/main3.py +++ b/main3.py @@ -132,7 +132,7 @@ def create_env(config): analysis = tune.run( DQN, name="DQN_LoRaEnvParallel", - stop={"timesteps_total": 1000}, + stop={"timesteps_total": 100000}, checkpoint_freq=100, keep_checkpoints_num=5, checkpoint_score_attr="training_iteration", diff --git a/~/ray_results/events.out.tfevents.1716462842.MacBook-Air-Assel.local.44116.0 b/~/ray_results/events.out.tfevents.1716462842.MacBook-Air-Assel.local.44116.0 new file mode 100644 index 0000000000000000000000000000000000000000..8d025e05caf28d666fc4f1ff432b947ef5e17077 GIT binary patch literal 88 zcmeZZfPjCKJmzwWs4TNPIr*le6mL>dVrHJ6YguYuiIq{19+yr@YF=@EQBrM*}QKepaQD#YMkzOiDReV}zPHH?vMEs-OCjk7{APWEh literal 0 HcmV?d00001 diff --git a/~/ray_results/events.out.tfevents.1716462901.MacBook-Air-Assel.local.44462.0 b/~/ray_results/events.out.tfevents.1716462901.MacBook-Air-Assel.local.44462.0 new file mode 100644 index 0000000000000000000000000000000000000000..465ab83c4cc619f47dc15580f4ca1ef8409e1b22 GIT binary patch literal 88 zcmeZZfPjCKJmzvH>E-&KoP5(!iZ`h!F*8rkwJbHS#L6g0k4vW{HLp0oC@DX&C`GTh hG&eV~s8X-ID6=HBNG}znDn2bUCp8`-a=EU|0ssr>AOHXW literal 0 HcmV?d00001 diff --git a/~/ray_results/events.out.tfevents.1716464109.MacBook-Air-Assel.local.50289.0 b/~/ray_results/events.out.tfevents.1716464109.MacBook-Air-Assel.local.50289.0 new file mode 100644 index 0000000000000000000000000000000000000000..2292004e70c3e33ff5781fd1fa71d0ca061b5d15 GIT binary patch literal 88 zcmeZZfPjCKJmzwSJPD{iHTkBa6mL>dVrHJ6YguYuiIq{19+yr@YF=@EQBrM*}QKepaQD#YMkzOiDReV}zPHH?vWZL1R2mnDZAkF{) literal 0 HcmV?d00001 diff --git a/~/ray_results/events.out.tfevents.1716464382.MacBook-Air-Assel.local.51656.0 b/~/ray_results/events.out.tfevents.1716464382.MacBook-Air-Assel.local.51656.0 new file mode 100644 index 0000000000000000000000000000000000000000..b2ab0df859f9acdfd440eec57106170320f886b5 GIT binary patch literal 88 zcmeZZfPjCKJmzvT^iSGdVrHJ6YguYuiIq{19+yr@YF=@EQBrM*}QKepaQD#YMkzOiDReV}zPHH?vdVrHJ6YguYuiIq{19+yr@YF=@EQBrM*}QKepaQD#YMkzOiDReV}zPHH?v#N5l*2mllwA1VL< literal 0 HcmV?d00001 diff --git a/~/ray_results/events.out.tfevents.1716465333.MacBook-Air-Assel.local.56249.0 b/~/ray_results/events.out.tfevents.1716465333.MacBook-Air-Assel.local.56249.0 new file mode 100644 index 0000000000000000000000000000000000000000..d2194fa659a2cdb643f9cbbf0ca7d1bbfcab7d98 GIT binary patch literal 88 zcmeZZfPjCKJmzwydVrHJ6YguYuiIq{19+yr@YF=@EQBrM*}QKepaQD#YMkzOiDReV}zPHH?vMB-XN6#zI8AX@+c literal 0 HcmV?d00001 diff --git a/~/ray_results/events.out.tfevents.1716467317.MacBook-Air-Assel.local.65894.0 b/~/ray_results/events.out.tfevents.1716467317.MacBook-Air-Assel.local.65894.0 new file mode 100644 index 0000000000000000000000000000000000000000..70b4a424d7dca9da0968baf4f2a41c47f7b289e6 GIT binary patch literal 88 zcmeZZfPjCKJmzxF_*p;q?Btt{QoKn;iJ5tNu4SotC00g3dR#gssd>fuMM?RIMJam4 hrMbC@MU{HxMVTe3MS7_qRq<(=IjQjwk^3A4yZ~{%Aj|*& literal 0 HcmV?d00001 From 587069351c4766c011b7dbdb43db90d21f431e20 Mon Sep 17 00:00:00 2001 From: DeTrix2712 Date: Tue, 25 Jun 2024 22:42:56 +0500 Subject: [PATCH 14/14] fix: minor fixes --- main3.py | 46 +++++++++++++++++++++++++++------------- multienv/env/multienv.py | 16 ++++++++------ 2 files changed, 41 insertions(+), 21 deletions(-) diff --git a/main3.py b/main3.py index dd377c1..97e4542 100644 --- a/main3.py +++ b/main3.py @@ -16,34 +16,48 @@ logging.basicConfig(level=logging.INFO) + def plot_metrics(df): # Create a figure with 2 subplots arranged vertically fig, ax = plt.subplots(2, 1, figsize=(10, 8)) # Plot mean episode reward on the first subplot - if 'episode_reward_mean' in df.columns: - df['episode_reward_mean'].plot(ax=ax[0]) - ax[0].set_title('Mean Episode Reward') - ax[0].set_xlabel('Training Iterations') - ax[0].set_ylabel('Reward') + if "episode_reward_mean" in df.columns: + df["episode_reward_mean"].plot(ax=ax[0]) + ax[0].set_title("Mean Episode Reward") + ax[0].set_xlabel("Training Iterations") + ax[0].set_ylabel("Reward") else: logging.warning("No 'episode_reward_mean' column found in results.") - ax[0].text(0.5, 0.5, 'No Data', horizontalalignment='center', verticalalignment='center') + ax[0].text( + 0.5, + 0.5, + "No Data", + horizontalalignment="center", + verticalalignment="center", + ) # Plot mean episode length on the second subplot - if 'episode_len_mean' in df.columns: - df['episode_len_mean'].plot(ax=ax[1]) - ax[1].set_title('Mean Episode Length') - ax[1].set_xlabel('Training Iterations') - ax[1].set_ylabel('Length') + if "episode_len_mean" in df.columns: + df["episode_len_mean"].plot(ax=ax[1]) + ax[1].set_title("Mean Episode Length") + ax[1].set_xlabel("Training Iterations") + ax[1].set_ylabel("Length") else: logging.warning("No 'episode_len_mean' column found in results.") - ax[1].text(0.5, 0.5, 'No Data', horizontalalignment='center', verticalalignment='center') + ax[1].text( + 0.5, + 0.5, + "No Data", + horizontalalignment="center", + verticalalignment="center", + ) # Adjust layout and display the plots plt.tight_layout() plt.show() + if __name__ == "__main__": if len(sys.argv) == 5: nodes_count = int(sys.argv[1]) @@ -56,7 +70,7 @@ def plot_metrics(df): consts.avg_wake_up_time = avg_wake_up_time consts.sim_time = sim_time - ray.init() + ray.init(num_cpus=12, num_gpus=0) # Register the environment def create_env(config): @@ -109,7 +123,7 @@ def create_env(config): "type": "EpsilonGreedy", "initial_epsilon": 0.1, "final_epsilon": 0.0, - "epsilon_timesteps": 1000000, + "epsilon_timesteps": 100000, }, ) .training( @@ -146,7 +160,9 @@ def create_env(config): best_trial = analysis.get_best_trial("episode_reward_mean", mode="max") # Use TensorBoard to visualize results - print(f"Training completed. Use TensorBoard to visualize results: tensorboard --logdir {best_trial.local_path}") + print( + f"Training completed. Use TensorBoard to visualize results: tensorboard --logdir {best_trial.local_path}" + ) except Exception as e: logging.error(f"An error occurred during training: {e}") diff --git a/multienv/env/multienv.py b/multienv/env/multienv.py index 6ddcff4..d27c101 100644 --- a/multienv/env/multienv.py +++ b/multienv/env/multienv.py @@ -87,13 +87,17 @@ def step(self, actions): self.simulator.start_simulation() if self.current_step >= self.sim_time: self.done = True - reward = self._calculate_reward() observations = { agent: self.observe(agent) for agent in self.possible_agents } + rewards = { + agent: self._calculate_reward(self.agent_name_mapping[agent]) + for agent in self.possible_agents + } + dones = {agent: self.done for agent in self.possible_agents} infos = {agent: {} for agent in self.possible_agents} logging.info("Simulation done.") - return observations, reward, self.done, infos + return observations, rewards, dones, dones, infos for agent in actions: agent_index = self.agent_name_mapping[agent] @@ -114,15 +118,15 @@ def step(self, actions): agent: self._calculate_reward(self.agent_name_mapping[agent]) for agent in self.possible_agents } - self.done = self.current_step >= self.sim_time + dones = { + agent: self.current_step >= self.sim_time for agent in self.possible_agents + } + truncations = {agent: self.truncated for agent in self.possible_agents} infos = {agent: {} for agent in self.possible_agents} if self.render_mode == "human": self.render() - dones = {agent: self.done for agent in self.possible_agents} - truncations = {agent: self.truncated for agent in self.possible_agents} - return observations, rewards, dones, truncations, infos def observe(self, agent):