From cb223084eeefb78a35fbad4627be304eaa409e3d Mon Sep 17 00:00:00 2001
From: ltwmori <ltwmori@gmail.com>
Date: Fri, 3 May 2024 18:48:10 +0500
Subject: [PATCH 01/14] feat: add multi agent env

---
 loraenv/loraenv/envs/multiagent_env.py | 105 +++++++++++++++++++++++++
 1 file changed, 105 insertions(+)
 create mode 100644 loraenv/loraenv/envs/multiagent_env.py

diff --git a/loraenv/loraenv/envs/multiagent_env.py b/loraenv/loraenv/envs/multiagent_env.py
new file mode 100644
index 0000000..f546233
--- /dev/null
+++ b/loraenv/loraenv/envs/multiagent_env.py
@@ -0,0 +1,105 @@
+from pettingzoo import AECEnv
+from pettingzoo.utils import agent_selector
+from gymnasium import spaces
+import numpy as np
+import simpy
+from simulator.lora_simulator import LoraSimulator
+import simulator.consts as consts
+import simulator.utils as utils
+from ray.rllib.env import PettingZooEnv
+from ray.tune.registry import register_env
+
+class LoRaEnvPZ(AECEnv):
+    """
+    Multi-agent LoRa Network Simulation for PettingZoo.
+    """
+    metadata = {"render_modes": ["human"]}
+
+    def __init__(self, nodes_count=10, data_size=16, avg_wake_up_time=30, sim_time=3600):
+        super().__init__()
+
+        self.nodes_count = nodes_count
+        self.data_size = data_size
+        self.avg_wake_up_time = avg_wake_up_time
+        self.sim_time = sim_time
+
+        self.simpy_env = simpy.Environment()
+        self.simulator = LoraSimulator(nodes_count, data_size, avg_wake_up_time * 1000, sim_time * 1000, self.simpy_env)
+        
+        self.agents = [f"agent_{i}" for i in range(nodes_count)]
+        self.possible_agents = self.agents[:]
+        self.agent_name_mapping = dict(zip(self.agents, range(len(self.agents))))
+        
+        self.action_spaces = {agent: spaces.Discrete(3) for agent in self.agents}
+        self.observation_spaces = {
+            agent: spaces.Dict({
+                "prr": spaces.Box(low=0, high=1, shape=(), dtype=np.float64),
+                "rssi": spaces.Box(low=-200, high=0, shape=(), dtype=np.float64),
+                "sf": spaces.Box(low=7, high=12, shape=(), dtype=np.int64)
+            }) for agent in self.agents
+        }
+
+        self._agent_selector = agent_selector(self.agents)
+        self.agent_selection = self._agent_selector.next()
+        self.rewards = {agent: 0 for agent in self.agents}
+        self.dones = {agent: False for agent in self.agents}
+        self.infos = {agent: {} for agent in self.agents}
+
+    def observe(self, agent):
+        idx = self.agent_name_mapping[agent]
+        return {
+            "prr": consts.nodes[idx].prr_value,
+            "rssi": consts.nodes[idx].rssi_value,
+            "sf": consts.nodes[idx].sf_value
+        }
+
+    def step(self, action):
+        if self.dones[self.agent_selection]:
+            return self._was_done_step(action)
+
+        agent_index = self.agent_name_mapping[self.agent_selection]
+        self.simulator.update_node_behavior(agent_index, action)
+        self.simpy_env.run(until=self.current_step * 1000)
+
+        # Update rewards and observations
+        self.rewards[self.agent_selection] = self._calculate_reward(agent_index)
+        self._accumulate_rewards()
+
+        # Check if simulation is done
+        self.current_step += 1
+        if self.current_step >= self.sim_time:
+            for agent in self.agents:
+                self.dones[agent] = True
+
+        self.agent_selection = self._agent_selector.next()
+
+    def reset(self, **kwargs):
+        self.simpy_env = simpy.Environment()
+        self.simulator = LoraSimulator(self.nodes_count, self.data_size, self.avg_wake_up_time * 1000, self.sim_time * 1000, self.simpy_env)
+        self.simulator.add_nodes()
+        self.current_step = 0
+
+        self.rewards = {agent: 0 for agent in self.agents}
+        self.dones = {agent: False for agent in self.agents}
+        self.infos = {agent: {} for agent in self.agents}
+        self._agent_selector = agent_selector(self.agents)
+        self.agent_selection = self._agent_selector.next()
+
+    def render(self, mode="human"):
+        if mode == "human":
+            print({agent: self.observe(agent) for agent in self.agents})
+
+    def _calculate_reward(self, agent_index):
+        lambda_value = 0.0001
+        mean_prr = consts.nodes[agent_index].calculate_prr()
+        retransmission_penalty = lambda_value * consts.nodes[agent_index].packets_sent_count
+        return mean_prr - retransmission_penalty
+
+# Wrap the PettingZoo environment for RLlib
+def env_creator(env_config):
+    return LoRaEnvPZ(**env_config)
+
+# Register the environment with RLlib
+register_env("lora_multi_agent", lambda config: PettingZooEnv(env_creator(config)))
+
+# This registered environment can now be used in RLlib with the environment name "lora_multi_agent"

From 9c7c373247dbd082cfb0ddfb51000500ceb2d143 Mon Sep 17 00:00:00 2001
From: ltwmori <ltwmori@gmail.com>
Date: Thu, 9 May 2024 15:39:26 +0500
Subject: [PATCH 02/14] feat: add multiagent functions

---
 loraenv/loraenv/__init__.py             |  15 ++--
 loraenv/loraenv/envs/LoRaEnvParallel.py |  93 +++++++++++++++++++++
 loraenv/loraenv/envs/__init__.py        |   3 +-
 loraenv/loraenv/envs/multiagent_env.py  | 105 ------------------------
 main2.py                                |  58 +++++++++++++
 5 files changed, 163 insertions(+), 111 deletions(-)
 create mode 100644 loraenv/loraenv/envs/LoRaEnvParallel.py
 delete mode 100644 loraenv/loraenv/envs/multiagent_env.py
 create mode 100644 main2.py

diff --git a/loraenv/loraenv/__init__.py b/loraenv/loraenv/__init__.py
index c81cddd..3ac5ae7 100644
--- a/loraenv/loraenv/__init__.py
+++ b/loraenv/loraenv/__init__.py
@@ -1,6 +1,11 @@
-from gymnasium.envs.registration import register
+# from gymnasium.envs.registration import register
+
+# register(
+#     id='loraenv/LoRa-v0',
+#     entry_point='loraenv.envs:LoRaEnv', 
+# )
+
+
+from pettingzoo.utils import from_parallel
+from pettingzoo.test import api_test
 
-register(
-    id='loraenv/LoRa-v0',
-    entry_point='loraenv.envs:LoRaEnv', 
-)
\ No newline at end of file
diff --git a/loraenv/loraenv/envs/LoRaEnvParallel.py b/loraenv/loraenv/envs/LoRaEnvParallel.py
new file mode 100644
index 0000000..954452d
--- /dev/null
+++ b/loraenv/loraenv/envs/LoRaEnvParallel.py
@@ -0,0 +1,93 @@
+import simpy
+import numpy as np
+from gymnasium import spaces
+from pettingzoo import ParallelEnv
+from pettingzoo.utils import parallel_to_aec
+from simulator.lora_simulator import LoraSimulator
+import simulator.consts as consts
+
+class LoRaEnvParallel(ParallelEnv):
+    metadata = {'render_modes': ['human'], 'name': 'lora_v1'}
+
+    def __init__(self, nodes_count=10, data_size=16, avg_wake_up_time=30, sim_time=3600, render_mode=None):
+        self.possible_agents = [f"agent_{i}" for i in range(nodes_count)]
+        self.agent_name_mapping = dict(zip(self.possible_agents, range(len(self.possible_agents))))
+        self.render_mode = render_mode
+        self.nodes_count = nodes_count
+        self.data_size = data_size
+        self.avg_wake_up_time = avg_wake_up_time
+        self.sim_time = sim_time
+
+    def observation_space(self, agent):
+        return spaces.Dict({
+            "prr": spaces.Box(low=0, high=1, shape=(), dtype=np.float32),
+            "rssi": spaces.Box(low=-200, high=0, shape=(), dtype=np.float32),
+            "sf": spaces.Box(low=7, high=12, shape=(), dtype=np.int64)
+        })
+
+    def action_space(self, agent):
+        return spaces.Discrete(3)
+
+    def reset(self, **kwargs):
+        self.simpy_env = simpy.Environment()
+        self.simulator = LoraSimulator(self.nodes_count, self.data_size, self.avg_wake_up_time * 1000, self.sim_time * 1000, self.simpy_env)
+        self.simulator.add_nodes()
+        self.agents = self.possible_agents[:]
+        self.current_step = 0
+        self.dones = {agent: False for agent in self.agents}  # Initialize dones for all agents
+        observations = {agent: self.observe(agent) for agent in self.agents}
+        return observations
+
+
+    def step(self, actions):
+        for agent in actions:
+            agent_index = self.agent_name_mapping[agent]
+            if not self.dones[agent]:
+                self.simulator.update_node_behavior(agent_index, actions[agent])
+
+        self.simpy_env.run(until=self.current_step * 1000)
+        self.current_step += 1
+
+        observations = {agent: self.observe(agent) for agent in self.agents}
+        rewards = {agent: self._calculate_reward(self.agent_name_mapping[agent]) for agent in self.agents}
+        dones = {agent: self.current_step >= self.sim_time for agent in self.agents}
+        infos = {agent: {} for agent in self.agents}
+
+        if self.render_mode == 'human':
+            self.render()
+
+        return observations, rewards, dones, dones, infos
+
+    def observe(self, agent):
+        idx = self.agent_name_mapping[agent]
+        return {
+            "prr": consts.nodes[idx].prr_value,
+            "rssi": consts.nodes[idx].rssi_value,
+            "sf": consts.nodes[idx].sf_value
+        }
+
+    def render(self):
+        if self.render_mode == 'human':
+            print({agent: self.observe(agent) for agent in self.agents})
+
+    def _calculate_reward(self, agent_index):
+        lambda_value = 0.0001
+        mean_prr = consts.nodes[agent_index].calculate_prr()
+        retransmission_penalty = lambda_value * consts.nodes[agent_index].packets_sent_count
+        return mean_prr - retransmission_penalty
+
+# To support the AEC API from this parallel environment
+def env_creator(env_config):
+    env = LoRaEnvParallel(**env_config)
+    env = parallel_to_aec(env)
+    return env
+
+# Example usage
+env = LoRaEnvParallel(render_mode="human")
+observations = env.reset()
+while True:
+    actions = {agent: env.action_space(agent).sample() for agent in env.agents}
+    observations, rewards, dones, _, infos = env.step(actions)
+    if all(dones.values()):
+        break
+env.close()
diff --git a/loraenv/loraenv/envs/__init__.py b/loraenv/loraenv/envs/__init__.py
index 795fbe6..2c61fa5 100644
--- a/loraenv/loraenv/envs/__init__.py
+++ b/loraenv/loraenv/envs/__init__.py
@@ -1 +1,2 @@
-from loraenv.envs.environment import LoRaEnv
+# from loraenv.envs.environment import LoRaEnv
+from loraenv.envs.environment import LoRaEnvParallel
diff --git a/loraenv/loraenv/envs/multiagent_env.py b/loraenv/loraenv/envs/multiagent_env.py
deleted file mode 100644
index f546233..0000000
--- a/loraenv/loraenv/envs/multiagent_env.py
+++ /dev/null
@@ -1,105 +0,0 @@
-from pettingzoo import AECEnv
-from pettingzoo.utils import agent_selector
-from gymnasium import spaces
-import numpy as np
-import simpy
-from simulator.lora_simulator import LoraSimulator
-import simulator.consts as consts
-import simulator.utils as utils
-from ray.rllib.env import PettingZooEnv
-from ray.tune.registry import register_env
-
-class LoRaEnvPZ(AECEnv):
-    """
-    Multi-agent LoRa Network Simulation for PettingZoo.
-    """
-    metadata = {"render_modes": ["human"]}
-
-    def __init__(self, nodes_count=10, data_size=16, avg_wake_up_time=30, sim_time=3600):
-        super().__init__()
-
-        self.nodes_count = nodes_count
-        self.data_size = data_size
-        self.avg_wake_up_time = avg_wake_up_time
-        self.sim_time = sim_time
-
-        self.simpy_env = simpy.Environment()
-        self.simulator = LoraSimulator(nodes_count, data_size, avg_wake_up_time * 1000, sim_time * 1000, self.simpy_env)
-        
-        self.agents = [f"agent_{i}" for i in range(nodes_count)]
-        self.possible_agents = self.agents[:]
-        self.agent_name_mapping = dict(zip(self.agents, range(len(self.agents))))
-        
-        self.action_spaces = {agent: spaces.Discrete(3) for agent in self.agents}
-        self.observation_spaces = {
-            agent: spaces.Dict({
-                "prr": spaces.Box(low=0, high=1, shape=(), dtype=np.float64),
-                "rssi": spaces.Box(low=-200, high=0, shape=(), dtype=np.float64),
-                "sf": spaces.Box(low=7, high=12, shape=(), dtype=np.int64)
-            }) for agent in self.agents
-        }
-
-        self._agent_selector = agent_selector(self.agents)
-        self.agent_selection = self._agent_selector.next()
-        self.rewards = {agent: 0 for agent in self.agents}
-        self.dones = {agent: False for agent in self.agents}
-        self.infos = {agent: {} for agent in self.agents}
-
-    def observe(self, agent):
-        idx = self.agent_name_mapping[agent]
-        return {
-            "prr": consts.nodes[idx].prr_value,
-            "rssi": consts.nodes[idx].rssi_value,
-            "sf": consts.nodes[idx].sf_value
-        }
-
-    def step(self, action):
-        if self.dones[self.agent_selection]:
-            return self._was_done_step(action)
-
-        agent_index = self.agent_name_mapping[self.agent_selection]
-        self.simulator.update_node_behavior(agent_index, action)
-        self.simpy_env.run(until=self.current_step * 1000)
-
-        # Update rewards and observations
-        self.rewards[self.agent_selection] = self._calculate_reward(agent_index)
-        self._accumulate_rewards()
-
-        # Check if simulation is done
-        self.current_step += 1
-        if self.current_step >= self.sim_time:
-            for agent in self.agents:
-                self.dones[agent] = True
-
-        self.agent_selection = self._agent_selector.next()
-
-    def reset(self, **kwargs):
-        self.simpy_env = simpy.Environment()
-        self.simulator = LoraSimulator(self.nodes_count, self.data_size, self.avg_wake_up_time * 1000, self.sim_time * 1000, self.simpy_env)
-        self.simulator.add_nodes()
-        self.current_step = 0
-
-        self.rewards = {agent: 0 for agent in self.agents}
-        self.dones = {agent: False for agent in self.agents}
-        self.infos = {agent: {} for agent in self.agents}
-        self._agent_selector = agent_selector(self.agents)
-        self.agent_selection = self._agent_selector.next()
-
-    def render(self, mode="human"):
-        if mode == "human":
-            print({agent: self.observe(agent) for agent in self.agents})
-
-    def _calculate_reward(self, agent_index):
-        lambda_value = 0.0001
-        mean_prr = consts.nodes[agent_index].calculate_prr()
-        retransmission_penalty = lambda_value * consts.nodes[agent_index].packets_sent_count
-        return mean_prr - retransmission_penalty
-
-# Wrap the PettingZoo environment for RLlib
-def env_creator(env_config):
-    return LoRaEnvPZ(**env_config)
-
-# Register the environment with RLlib
-register_env("lora_multi_agent", lambda config: PettingZooEnv(env_creator(config)))
-
-# This registered environment can now be used in RLlib with the environment name "lora_multi_agent"
diff --git a/main2.py b/main2.py
new file mode 100644
index 0000000..738cff2
--- /dev/null
+++ b/main2.py
@@ -0,0 +1,58 @@
+import gymnasium as gym
+# import numpy as np
+from stable_baselines3 import DQN
+from stable_baselines3.common.callbacks import BaseCallback
+import matplotlib.pyplot as plt
+
+# Assuming you have an appropriate multi-agent version of your environment registered in Gym
+class RewardLoggerCallback(BaseCallback):
+    def __init__(self, check_freq):
+        super(RewardLoggerCallback, self).__init__()
+        self.check_freq = check_freq
+        self.episode_rewards = []
+
+    def _on_step(self) -> bool:
+        if self.n_calls % self.check_freq == 0:
+            rewards = self.training_env.get_attr('rewards')
+            self.episode_rewards.append(rewards)
+        return True
+
+if __name__ == "__main__":
+    nodes_count = 10  # Example parameter
+    data_size = 16    # Example parameter
+    avg_wake_up_time = 30  # Example parameter
+    sim_time = 3600   # Example parameter
+
+    env = gym.make("loraenv/LoRaMulti-v0", config={
+        "nodes_count": nodes_count,
+        "data_size": data_size,
+        "avg_wake_up_time": avg_wake_up_time,
+        "sim_time": sim_time,
+    })
+
+    models = {agent: DQN("MlpPolicy", env, verbose=1) for agent in env.possible_agents}
+
+    # Train each model
+    for agent, model in models.items():
+        callback = RewardLoggerCallback(check_freq=100)
+        model.learn(total_timesteps=int(sim_time * 100), callback=callback)
+        model.save(f"{agent}_lora_model")
+        plt.figure(figsize=(10, 5))
+        plt.plot(callback.episode_rewards, marker="o", linestyle="-")
+        plt.title(f"Total Reward per Episode During Training for {agent}")
+        plt.xlabel("Episode")
+        plt.ylabel("Total Reward")
+        plt.grid(True)
+        plt.savefig(f"{agent}_training_rewards.png")
+
+    # Example evaluation phase for one agent
+    test_env = env
+    obs = test_env.reset()
+    done = {agent: False for agent in test_env.possible_agents}
+    while not all(done.values()):
+        actions = {agent: models[agent].predict(obs[agent], deterministic=True)[0] for agent in test_env.possible_agents}
+        obs, rewards, dones, _ = test_env.step(actions)
+        done = dones
+
+    # Close environment
+    env.close()

From 776ebcf1a63d9ff81daf8a20bd183e9f1ea7e0d6 Mon Sep 17 00:00:00 2001
From: ltwmori <ltwmori@gmail.com>
Date: Thu, 9 May 2024 18:19:06 +0500
Subject: [PATCH 03/14] fix: fixing env for multiagent

---
 loraenv/loraenv.egg-info/PKG-INFO       |  9 +--
 loraenv/loraenv.egg-info/SOURCES.txt    |  1 +
 loraenv/loraenv.egg-info/requires.txt   |  6 +-
 loraenv/loraenv.egg-info/top_level.txt  |  2 +-
 loraenv/loraenv/__init__.py             | 18 +++--
 loraenv/loraenv/envs/LoRaEnvParallel.py | 93 -------------------------
 main2.py                                | 79 +++++++++++----------
 main3.py                                | 62 +++++++++++++++++
 multienv/multienv_v0.py                 |  7 ++
 simulator/lora_simulator.py             |  7 +-
 10 files changed, 134 insertions(+), 150 deletions(-)
 delete mode 100644 loraenv/loraenv/envs/LoRaEnvParallel.py
 create mode 100644 main3.py
 create mode 100644 multienv/multienv_v0.py

diff --git a/loraenv/loraenv.egg-info/PKG-INFO b/loraenv/loraenv.egg-info/PKG-INFO
index 8f7a00e..90bef7d 100644
--- a/loraenv/loraenv.egg-info/PKG-INFO
+++ b/loraenv/loraenv.egg-info/PKG-INFO
@@ -1,6 +1,3 @@
-Metadata-Version: 2.1
-Name: loraenv
-Version: 0.0.1
-Requires-Dist: gymnasium
-Requires-Dist: numpy
-Requires-Dist: matplotlib
+Metadata-Version: 2.1
+Name: loraenv
+Version: 0.0.1
diff --git a/loraenv/loraenv.egg-info/SOURCES.txt b/loraenv/loraenv.egg-info/SOURCES.txt
index 85c918d..beee9af 100644
--- a/loraenv/loraenv.egg-info/SOURCES.txt
+++ b/loraenv/loraenv.egg-info/SOURCES.txt
@@ -5,5 +5,6 @@ loraenv.egg-info/SOURCES.txt
 loraenv.egg-info/dependency_links.txt
 loraenv.egg-info/requires.txt
 loraenv.egg-info/top_level.txt
+loraenv/envs/LoRaEnvParallel.py
 loraenv/envs/__init__.py
 loraenv/envs/environment.py
\ No newline at end of file
diff --git a/loraenv/loraenv.egg-info/requires.txt b/loraenv/loraenv.egg-info/requires.txt
index d39ffd8..77cff3c 100644
--- a/loraenv/loraenv.egg-info/requires.txt
+++ b/loraenv/loraenv.egg-info/requires.txt
@@ -1,3 +1,3 @@
-gymnasium
-numpy
-matplotlib
+gymnasium
+numpy
+matplotlib
diff --git a/loraenv/loraenv.egg-info/top_level.txt b/loraenv/loraenv.egg-info/top_level.txt
index bca9dd9..ac96a7e 100644
--- a/loraenv/loraenv.egg-info/top_level.txt
+++ b/loraenv/loraenv.egg-info/top_level.txt
@@ -1 +1 @@
-loraenv
+loraenv
diff --git a/loraenv/loraenv/__init__.py b/loraenv/loraenv/__init__.py
index 3ac5ae7..58ca7d5 100644
--- a/loraenv/loraenv/__init__.py
+++ b/loraenv/loraenv/__init__.py
@@ -1,11 +1,15 @@
-# from gymnasium.envs.registration import register
+# # from gymnasium.envs.registration import register
+
+# # register(
+# #     id='loraenv/LoRa-v0',
+# #     entry_point='loraenv.envs:LoRaEnv', 
+# # )
 
-# register(
-#     id='loraenv/LoRa-v0',
-#     entry_point='loraenv.envs:LoRaEnv', 
-# )
 
+# from pettingzoo.utils import from_parallel
 
-from pettingzoo.utils import from_parallel
-from pettingzoo.test import api_test
 
+# register(
+#     id='loraenv/LoRaMulti-v0',
+#     entry_point='loraenv.envs:LoRaEnvParallel', 
+# )
\ No newline at end of file
diff --git a/loraenv/loraenv/envs/LoRaEnvParallel.py b/loraenv/loraenv/envs/LoRaEnvParallel.py
deleted file mode 100644
index 954452d..0000000
--- a/loraenv/loraenv/envs/LoRaEnvParallel.py
+++ /dev/null
@@ -1,93 +0,0 @@
-import simpy
-import numpy as np
-from gymnasium import spaces
-from pettingzoo import ParallelEnv
-from pettingzoo.utils import parallel_to_aec
-from simulator.lora_simulator import LoraSimulator
-import simulator.consts as consts
-
-class LoRaEnvParallel(ParallelEnv):
-    metadata = {'render_modes': ['human'], 'name': 'lora_v1'}
-
-    def __init__(self, nodes_count=10, data_size=16, avg_wake_up_time=30, sim_time=3600, render_mode=None):
-        self.possible_agents = [f"agent_{i}" for i in range(nodes_count)]
-        self.agent_name_mapping = dict(zip(self.possible_agents, range(len(self.possible_agents))))
-        self.render_mode = render_mode
-        self.nodes_count = nodes_count
-        self.data_size = data_size
-        self.avg_wake_up_time = avg_wake_up_time
-        self.sim_time = sim_time
-
-    def observation_space(self, agent):
-        return spaces.Dict({
-            "prr": spaces.Box(low=0, high=1, shape=(), dtype=np.float32),
-            "rssi": spaces.Box(low=-200, high=0, shape=(), dtype=np.float32),
-            "sf": spaces.Box(low=7, high=12, shape=(), dtype=np.int64)
-        })
-
-    def action_space(self, agent):
-        return spaces.Discrete(3)
-
-    def reset(self, **kwargs):
-        self.simpy_env = simpy.Environment()
-        self.simulator = LoraSimulator(self.nodes_count, self.data_size, self.avg_wake_up_time * 1000, self.sim_time * 1000, self.simpy_env)
-        self.simulator.add_nodes()
-        self.agents = self.possible_agents[:]
-        self.current_step = 0
-        self.dones = {agent: False for agent in self.agents}  # Initialize dones for all agents
-        observations = {agent: self.observe(agent) for agent in self.agents}
-        return observations
-
-
-    def step(self, actions):
-        for agent in actions:
-            agent_index = self.agent_name_mapping[agent]
-            if not self.dones[agent]:
-                self.simulator.update_node_behavior(agent_index, actions[agent])
-
-        self.simpy_env.run(until=self.current_step * 1000)
-        self.current_step += 1
-
-        observations = {agent: self.observe(agent) for agent in self.agents}
-        rewards = {agent: self._calculate_reward(self.agent_name_mapping[agent]) for agent in self.agents}
-        dones = {agent: self.current_step >= self.sim_time for agent in self.agents}
-        infos = {agent: {} for agent in self.agents}
-
-        if self.render_mode == 'human':
-            self.render()
-
-        return observations, rewards, dones, dones, infos
-
-    def observe(self, agent):
-        idx = self.agent_name_mapping[agent]
-        return {
-            "prr": consts.nodes[idx].prr_value,
-            "rssi": consts.nodes[idx].rssi_value,
-            "sf": consts.nodes[idx].sf_value
-        }
-
-    def render(self):
-        if self.render_mode == 'human':
-            print({agent: self.observe(agent) for agent in self.agents})
-
-    def _calculate_reward(self, agent_index):
-        lambda_value = 0.0001
-        mean_prr = consts.nodes[agent_index].calculate_prr()
-        retransmission_penalty = lambda_value * consts.nodes[agent_index].packets_sent_count
-        return mean_prr - retransmission_penalty
-
-# To support the AEC API from this parallel environment
-def env_creator(env_config):
-    env = LoRaEnvParallel(**env_config)
-    env = parallel_to_aec(env)
-    return env
-
-# Example usage
-env = LoRaEnvParallel(render_mode="human")
-observations = env.reset()
-while True:
-    actions = {agent: env.action_space(agent).sample() for agent in env.agents}
-    observations, rewards, dones, _, infos = env.step(actions)
-    if all(dones.values()):
-        break
-env.close()
diff --git a/main2.py b/main2.py
index 738cff2..4c2cce1 100644
--- a/main2.py
+++ b/main2.py
@@ -1,13 +1,18 @@
 import gymnasium as gym
-# import numpy as np
 from stable_baselines3 import DQN
 from stable_baselines3.common.callbacks import BaseCallback
 import matplotlib.pyplot as plt
 
-# Assuming you have an appropriate multi-agent version of your environment registered in Gym
+from multienv.multienv_v0 import LoRaEnvParallel
+from pettingzoo.utils import parallel_to_aec
+from pettingzoo.utils.wrappers import BaseWrapper
+from stable_baselines3.common.env_util import make_vec_env
+from supersuit import pad_action_space_v0, pad_observations_v0
+import supersuit as ss
+
 class RewardLoggerCallback(BaseCallback):
-    def __init__(self, check_freq):
-        super(RewardLoggerCallback, self).__init__()
+    def __init__(self, check_freq, verbose=1):
+        super(RewardLoggerCallback, self).__init__(verbose)
         self.check_freq = check_freq
         self.episode_rewards = []
 
@@ -23,36 +28,36 @@ def _on_step(self) -> bool:
     avg_wake_up_time = 30  # Example parameter
     sim_time = 3600   # Example parameter
 
-    env = gym.make("loraenv/LoRaMulti-v0", config={
-        "nodes_count": nodes_count,
-        "data_size": data_size,
-        "avg_wake_up_time": avg_wake_up_time,
-        "sim_time": sim_time,
-    })
-
-    models = {agent: DQN("MlpPolicy", env, verbose=1) for agent in env.possible_agents}
-
-    # Train each model
-    for agent, model in models.items():
-        callback = RewardLoggerCallback(check_freq=100)
-        model.learn(total_timesteps=int(sim_time * 100), callback=callback)
-        model.save(f"{agent}_lora_model")
-        plt.figure(figsize=(10, 5))
-        plt.plot(callback.episode_rewards, marker="o", linestyle="-")
-        plt.title(f"Total Reward per Episode During Training for {agent}")
-        plt.xlabel("Episode")
-        plt.ylabel("Total Reward")
-        plt.grid(True)
-        plt.savefig(f"{agent}_training_rewards.png")
-
-    # Example evaluation phase for one agent
-    test_env = env
-    obs = test_env.reset()
-    done = {agent: False for agent in test_env.possible_agents}
-    while not all(done.values()):
-        actions = {agent: models[agent].predict(obs[agent], deterministic=True)[0] for agent in test_env.possible_agents}
-        obs, rewards, dones, _ = test_env.step(actions)
-        done = dones
-
-    # Close environment
-    env.close()
+    env = LoRaEnvParallel(nodes_count=nodes_count, data_size=data_size, avg_wake_up_time=avg_wake_up_time, sim_time=sim_time)
+    aec_env = env
+    # wrapped_env = pad_action_space(pad_observations(aec_env))
+
+    # vec_env = pettingzoo_env_to_vec_env_v1(wrapped_env)
+    wrapped_env = pad_observations_v0(pad_action_space_v0(aec_env))
+
+    vec_env = ss.pettingzoo_env_to_vec_env_v1(wrapped_env)
+    # aec_env = ss.concat_vec_envs_v1(aec_env, 8, num_cpus=1, base_class="stable_baselines3")
+
+    model = DQN("MultiInputPolicy", vec_env, verbose=1)
+    reward_logger = RewardLoggerCallback(check_freq=100)
+
+    # Training Phase
+    model.learn(total_timesteps=100, callback=reward_logger)
+    model.save("dqn_lora_model")
+
+    # Plot the rewards collected during the training
+    plt.figure(figsize=(10, 5))
+    plt.plot(reward_logger.episode_rewards, marker="o", linestyle="-")
+    plt.title("Total Reward per Episode During Training")
+    plt.xlabel("Episode")
+    plt.ylabel("Total Reward")
+    plt.grid(True)
+    plt.savefig("training_rewards.png")
+
+    # Evaluation Phase
+    obs = vec_env.reset()
+    done = False
+    while not done:
+        action, _states = model.predict(obs, deterministic=True)
+        obs, reward, done, info = vec_env.step(action)
+        vec_env.render()
diff --git a/main3.py b/main3.py
new file mode 100644
index 0000000..1e957ae
--- /dev/null
+++ b/main3.py
@@ -0,0 +1,62 @@
+import os
+import ray
+from ray import tune
+from ray.rllib.algorithms.dqn import DQNConfig
+from ray.rllib.env import PettingZooEnv
+from ray.tune.registry import register_env
+
+from multienv.multienv_v0 import env
+
+if __name__ == "__main__":
+    ray.init()
+
+    # Register the environment
+    register_env("LoRaEnvParallel", lambda config: PettingZooEnv(env(config)))
+
+    # Create a test environment to get observation and action spaces
+    test_env = PettingZooEnv(env({
+        "nodes_count": 10,
+        "data_size": 16,
+        "avg_wake_up_time": 30,
+        "sim_time": 3600
+    }))
+    obs_space = test_env.observation_space(test_env.possible_agents[0])
+    act_space = test_env.action_space(test_env.possible_agents[0])
+
+    config = (
+        DQNConfig()
+        .environment(env="LoRaEnvParallel", env_config={
+            "nodes_count": 10,
+            "data_size": 16,
+            "avg_wake_up_time": 30,
+            "sim_time": 3600
+        })
+        .rollouts(num_rollout_workers=1, rollout_fragment_length=30)
+        .training(
+            train_batch_size=200,
+            hiddens=[],
+            dueling=False,
+        )
+        .multi_agent(
+            policies={agent: (None, obs_space, act_space, {}) for agent in test_env.possible_agents},
+            policy_mapping_fn=(lambda agent_id, *args, **kwargs: agent_id),
+        )
+        .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
+        .framework(framework="torch")
+        .exploration(
+            exploration_config={
+                "type": "EpsilonGreedy",
+                "initial_epsilon": 0.1,
+                "final_epsilon": 0.0,
+                "epsilon_timesteps": 100000,
+            }
+        )
+    )
+
+    tune.run(
+        "DQN",
+        name="DQN_LoRaEnvParallel",
+        stop={"timesteps_total": 1000000},
+        checkpoint_freq=10,
+        config=config.to_dict(),
+    )
diff --git a/multienv/multienv_v0.py b/multienv/multienv_v0.py
new file mode 100644
index 0000000..2ad9081
--- /dev/null
+++ b/multienv/multienv_v0.py
@@ -0,0 +1,7 @@
+from multienv.env.multienv import LoRaEnvParallel
+from pettingzoo.utils import parallel_to_aec
+
+def env(**kwargs): 
+    env_cur = LoRaEnvParallel(**kwargs)
+    env_cur = parallel_to_aec(env_cur)
+    return env_cur
\ No newline at end of file
diff --git a/simulator/lora_simulator.py b/simulator/lora_simulator.py
index cb21e37..df1a62d 100644
--- a/simulator/lora_simulator.py
+++ b/simulator/lora_simulator.py
@@ -19,9 +19,10 @@ def add_nodes(self):
         for i in range(self.nodes_count):
             consts.nodes.append(EndNode(i, self.env, data_gateway))
 
-    def update_nodes_behavior(self, action):
-        for node in consts.nodes:
-            node.perform_action(action)
+    def update_nodes_behavior(self, index, action):
+        # for node in consts.nodes:
+        #     node.perform_action(action)
+        consts.nodes[index].perform_action(action)
 
     def start_simulation(self):
         from simulator.frame import Frame

From 29428de868db19bff2b2ca1c778bf0b9531224de Mon Sep 17 00:00:00 2001
From: ltwmori <ltwmori@gmail.com>
Date: Fri, 10 May 2024 15:57:36 +0500
Subject: [PATCH 04/14] feat: tests

---
 main2.py | 66 +++++++++++++++++++++++++++++---------------------------
 1 file changed, 34 insertions(+), 32 deletions(-)

diff --git a/main2.py b/main2.py
index 4c2cce1..3aa659c 100644
--- a/main2.py
+++ b/main2.py
@@ -3,6 +3,7 @@
 from stable_baselines3.common.callbacks import BaseCallback
 import matplotlib.pyplot as plt
 
+from pettingzoo.test import parallel_api_test
 from multienv.multienv_v0 import LoRaEnvParallel
 from pettingzoo.utils import parallel_to_aec
 from pettingzoo.utils.wrappers import BaseWrapper
@@ -29,35 +30,36 @@ def _on_step(self) -> bool:
     sim_time = 3600   # Example parameter
 
     env = LoRaEnvParallel(nodes_count=nodes_count, data_size=data_size, avg_wake_up_time=avg_wake_up_time, sim_time=sim_time)
-    aec_env = env
-    # wrapped_env = pad_action_space(pad_observations(aec_env))
-
-    # vec_env = pettingzoo_env_to_vec_env_v1(wrapped_env)
-    wrapped_env = pad_observations_v0(pad_action_space_v0(aec_env))
-
-    vec_env = ss.pettingzoo_env_to_vec_env_v1(wrapped_env)
-    # aec_env = ss.concat_vec_envs_v1(aec_env, 8, num_cpus=1, base_class="stable_baselines3")
-
-    model = DQN("MultiInputPolicy", vec_env, verbose=1)
-    reward_logger = RewardLoggerCallback(check_freq=100)
-
-    # Training Phase
-    model.learn(total_timesteps=100, callback=reward_logger)
-    model.save("dqn_lora_model")
-
-    # Plot the rewards collected during the training
-    plt.figure(figsize=(10, 5))
-    plt.plot(reward_logger.episode_rewards, marker="o", linestyle="-")
-    plt.title("Total Reward per Episode During Training")
-    plt.xlabel("Episode")
-    plt.ylabel("Total Reward")
-    plt.grid(True)
-    plt.savefig("training_rewards.png")
-
-    # Evaluation Phase
-    obs = vec_env.reset()
-    done = False
-    while not done:
-        action, _states = model.predict(obs, deterministic=True)
-        obs, reward, done, info = vec_env.step(action)
-        vec_env.render()
+    parallel_api_test(env, num_cycles=1000)
+    # aec_env = env
+    # # wrapped_env = pad_action_space(pad_observations(aec_env))
+
+    # # vec_env = pettingzoo_env_to_vec_env_v1(wrapped_env)
+    # wrapped_env = pad_observations_v0(pad_action_space_v0(aec_env))
+
+    # vec_env = ss.pettingzoo_env_to_vec_env_v1(wrapped_env)
+    # # aec_env = ss.concat_vec_envs_v1(aec_env, 8, num_cpus=1, base_class="stable_baselines3")
+
+    # model = DQN("MultiInputPolicy", vec_env, verbose=1)
+    # reward_logger = RewardLoggerCallback(check_freq=100)
+
+    # # Training Phase
+    # model.learn(total_timesteps=100, callback=reward_logger)
+    # model.save("dqn_lora_model")
+
+    # # Plot the rewards collected during the training
+    # plt.figure(figsize=(10, 5))
+    # plt.plot(reward_logger.episode_rewards, marker="o", linestyle="-")
+    # plt.title("Total Reward per Episode During Training")
+    # plt.xlabel("Episode")
+    # plt.ylabel("Total Reward")
+    # plt.grid(True)
+    # plt.savefig("training_rewards.png")
+
+    # # Evaluation Phase
+    # obs = vec_env.reset()
+    # done = False
+    # while not done:
+    #     action, _states = model.predict(obs, deterministic=True)
+    #     obs, reward, done, info = vec_env.step(action)
+    #     vec_env.render()

From 74cd5eca4b36608d922ab01eeebc4bbd6ed73b20 Mon Sep 17 00:00:00 2001
From: ltwmori <ltwmori@gmail.com>
Date: Wed, 15 May 2024 18:22:43 +0500
Subject: [PATCH 05/14] feat: added code

---
 main2.py | 143 +++++++++++++++++++++++++++++++++++--------------------
 main3.py |  47 +++++++++++-------
 2 files changed, 122 insertions(+), 68 deletions(-)

diff --git a/main2.py b/main2.py
index 3aa659c..b372bf7 100644
--- a/main2.py
+++ b/main2.py
@@ -1,65 +1,106 @@
+import sys
 import gymnasium as gym
-from stable_baselines3 import DQN
-from stable_baselines3.common.callbacks import BaseCallback
+import loraenv
+import simulator.utils as utils
 import matplotlib.pyplot as plt
 
-from pettingzoo.test import parallel_api_test
-from multienv.multienv_v0 import LoRaEnvParallel
-from pettingzoo.utils import parallel_to_aec
-from pettingzoo.utils.wrappers import BaseWrapper
-from stable_baselines3.common.env_util import make_vec_env
-from supersuit import pad_action_space_v0, pad_observations_v0
-import supersuit as ss
+import numpy as np
+import simulator.consts as consts
 
-class RewardLoggerCallback(BaseCallback):
-    def __init__(self, check_freq, verbose=1):
-        super(RewardLoggerCallback, self).__init__(verbose)
-        self.check_freq = check_freq
-        self.episode_rewards = []
+from simulator.lora_simulator import LoraSimulator
+from reward_caller_callback import RewardLoggerCallback
 
-    def _on_step(self) -> bool:
-        if self.n_calls % self.check_freq == 0:
-            rewards = self.training_env.get_attr('rewards')
-            self.episode_rewards.append(rewards)
-        return True
+from stable_baselines3 import PPO
 
 if __name__ == "__main__":
-    nodes_count = 10  # Example parameter
-    data_size = 16    # Example parameter
-    avg_wake_up_time = 30  # Example parameter
-    sim_time = 3600   # Example parameter
+    if len(sys.argv) == 5:
+        nodes_count = int(sys.argv[1])
+        data_size = int(sys.argv[2])
+        avg_wake_up_time = int(sys.argv[3])
+        sim_time = int(sys.argv[4])
 
-    env = LoRaEnvParallel(nodes_count=nodes_count, data_size=data_size, avg_wake_up_time=avg_wake_up_time, sim_time=sim_time)
-    parallel_api_test(env, num_cycles=1000)
-    # aec_env = env
-    # # wrapped_env = pad_action_space(pad_observations(aec_env))
+        # Gymnasium environment
+        gym_env = gym.make(
+            "loraenv/LoRa-v0",
+            nodes_count=nodes_count,
+            data_size=data_size,
+            avg_wake_up_time=avg_wake_up_time,
+            sim_time=sim_time,
+        )
 
-    # # vec_env = pettingzoo_env_to_vec_env_v1(wrapped_env)
-    # wrapped_env = pad_observations_v0(pad_action_space_v0(aec_env))
+        train = True
+        if train:
+            # Create new model
+            model = PPO("MultiInputPolicy", gym_env, verbose=1)
+            reward_logger = RewardLoggerCallback()
 
-    # vec_env = ss.pettingzoo_env_to_vec_env_v1(wrapped_env)
-    # # aec_env = ss.concat_vec_envs_v1(aec_env, 8, num_cpus=1, base_class="stable_baselines3")
+            # Training Phase
+            # --------------
+            utils.logging = False
+            utils.log(f"!-- TRAINING START --!")
+            # Calculate total timesteps for training
+            episodes = 10
+            total_timesteps = (
+                sim_time * episodes
+            )  # Assuming 1 timestep = 1 second in simulation
+            model.learn(
+                total_timesteps=total_timesteps,
+                log_interval=4,
+                progress_bar=True,
+                callback=reward_logger,
+            )
+            model.save("lora_model")
+            utils.log(f"!-- TRAINING END --!")
 
-    # model = DQN("MultiInputPolicy", vec_env, verbose=1)
-    # reward_logger = RewardLoggerCallback(check_freq=100)
+            # Plot the rewards collected during the training
+            plt.figure(figsize=(10, 5))
+            plt.plot(reward_logger.episode_rewards, marker="o", linestyle="-")
+            plt.title("Total Reward per Episode During Training")
+            plt.xlabel("Episode")
+            plt.ylabel("Total Reward")
+            plt.grid(True)
+            plt.savefig("training_phase.png")
 
-    # # Training Phase
-    # model.learn(total_timesteps=100, callback=reward_logger)
-    # model.save("dqn_lora_model")
+        # Evaluation Phase
+        # ----------------
+        model = PPO.load("lora_model")
+        utils.log(f"!-- EVALUATION START --!")
+        obs, info = gym_env.reset()
+        rewards_per_evaluation = [[] for _ in range(nodes_count)]  # List to hold rewards for each node
+        total_rewards_per_node = [0] * nodes_count  # List to hold total rewards for each node
 
-    # # Plot the rewards collected during the training
-    # plt.figure(figsize=(10, 5))
-    # plt.plot(reward_logger.episode_rewards, marker="o", linestyle="-")
-    # plt.title("Total Reward per Episode During Training")
-    # plt.xlabel("Episode")
-    # plt.ylabel("Total Reward")
-    # plt.grid(True)
-    # plt.savefig("training_rewards.png")
+        done = False
+        while True:
+            action, _states = model.predict(obs, deterministic=True)
+            obs, reward, done, terminated, info = gym_env.step(action)
+            for i in range(nodes_count):
+                rewards_per_evaluation[i].append(reward[i])  # Log each reward for each node
+                total_rewards_per_node[i] += reward[i]  # Sum rewards for each node
 
-    # # Evaluation Phase
-    # obs = vec_env.reset()
-    # done = False
-    # while not done:
-    #     action, _states = model.predict(obs, deterministic=True)
-    #     obs, reward, done, info = vec_env.step(action)
-    #     vec_env.render()
+            if done or terminated:
+                utils.show_final_statistics()
+                utils.log(f"!-- EVALUATION END --!")
+                break
+
+        # Plot the rewards collected during the evaluation for each node
+        plt.figure(figsize=(10, 5))
+        for i in range(nodes_count):
+            plt.plot(
+                range(1, len(rewards_per_evaluation[i]) + 1),
+                rewards_per_evaluation[i],
+                marker="o",
+                linestyle="-",
+                label=f'Node {i+1}'
+            )
+        plt.title("Rewards per Step During Evaluation for Each Node")
+        plt.xlabel("Step")
+        plt.ylabel("Reward")
+        plt.legend()
+        plt.grid(True)
+        plt.savefig("evaluation_phase_per_node.png")
+
+    else:
+        print(
+            "usage: ./main <number_of_nodes> <data_size(bytes)> <avg_wake_up_time(secs)> <sim_time(secs)>"
+        )
+        exit(-1)
\ No newline at end of file
diff --git a/main3.py b/main3.py
index 1e957ae..9e5ffe8 100644
--- a/main3.py
+++ b/main3.py
@@ -4,24 +4,33 @@
 from ray.rllib.algorithms.dqn import DQNConfig
 from ray.rllib.env import PettingZooEnv
 from ray.tune.registry import register_env
+import logging
 
 from multienv.multienv_v0 import env
 
+logging.basicConfig(level=logging.INFO)
+
 if __name__ == "__main__":
     ray.init()
 
     # Register the environment
-    register_env("LoRaEnvParallel", lambda config: PettingZooEnv(env(config)))
+    def create_env(config):
+        env_instance = env()
+        logging.info(f"Custom Env possible_agents: {env_instance.possible_agents}")
+        return PettingZooEnv(env_instance)
+
+    register_env("LoRaEnvParallel", create_env)
 
     # Create a test environment to get observation and action spaces
-    test_env = PettingZooEnv(env({
-        "nodes_count": 10,
-        "data_size": 16,
-        "avg_wake_up_time": 30,
-        "sim_time": 3600
-    }))
-    obs_space = test_env.observation_space(test_env.possible_agents[0])
-    act_space = test_env.action_space(test_env.possible_agents[0])
+    test_env = create_env({})
+    logging.info(f"Wrapped Env possible_agents: {test_env.env.possible_agents}")
+
+    # Check if possible_agents exists
+    if hasattr(test_env.env, 'possible_agents'):
+        obs_space = test_env.env.observation_space(test_env.env.possible_agents[0])
+        act_space = test_env.env.action_space(test_env.env.possible_agents[0])
+    else:
+        raise AttributeError("The environment does not have 'possible_agents' attribute.")
 
     config = (
         DQNConfig()
@@ -38,7 +47,7 @@
             dueling=False,
         )
         .multi_agent(
-            policies={agent: (None, obs_space, act_space, {}) for agent in test_env.possible_agents},
+            policies={agent: (None, obs_space, act_space, {}) for agent in test_env.env.possible_agents},
             policy_mapping_fn=(lambda agent_id, *args, **kwargs: agent_id),
         )
         .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
@@ -53,10 +62,14 @@
         )
     )
 
-    tune.run(
-        "DQN",
-        name="DQN_LoRaEnvParallel",
-        stop={"timesteps_total": 1000000},
-        checkpoint_freq=10,
-        config=config.to_dict(),
-    )
+    try:
+        tune.run(
+            "DQN",
+            name="DQN_LoRaEnvParallel",
+            stop={"timesteps_total": 1000000},
+            checkpoint_freq=10,
+            config=config.to_dict(),
+        )
+    except Exception as e:
+        logging.error(f"An error occurred during training: {e}")
+        raise

From b51f7412a49893f8ca5767061f29cda9993b72f6 Mon Sep 17 00:00:00 2001
From: ltwmori <ltwmori@gmail.com>
Date: Thu, 16 May 2024 11:48:37 +0500
Subject: [PATCH 06/14] pushing env to git

---
 .gitignore               |   6 +-
 multienv/env/multienv.py | 132 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 133 insertions(+), 5 deletions(-)
 create mode 100644 multienv/env/multienv.py

diff --git a/.gitignore b/.gitignore
index 0530986..b2ac8c5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -120,11 +120,7 @@ celerybeat.pid
 *.sage.py
 
 # Environments
-.env
-.venv
-env/
-venv/
-ENV/
+
 env.bak/
 venv.bak/
 
diff --git a/multienv/env/multienv.py b/multienv/env/multienv.py
new file mode 100644
index 0000000..1ac9fd9
--- /dev/null
+++ b/multienv/env/multienv.py
@@ -0,0 +1,132 @@
+import simpy
+import numpy as np
+from gymnasium import spaces
+from pettingzoo import ParallelEnv
+from pettingzoo.utils import parallel_to_aec
+from simulator.lora_simulator import LoraSimulator
+import simulator.consts as consts
+import simulator.utils as utils
+from copy import copy
+import functools
+import logging
+
+class LoRaEnvParallel(ParallelEnv):
+    metadata = {'render_modes': ['human'], 'name': 'multiagent_v0'}
+
+    def __init__(self, nodes_count=10, data_size=16, avg_wake_up_time=30, sim_time=3600, render_mode=None):
+        self.possible_agents = [f"agent_{i}" for i in range(nodes_count)]
+        self.agent_name_mapping = dict(zip(self.possible_agents, range(len(self.possible_agents))))
+        self.render_mode = render_mode
+        self.nodes_count = nodes_count
+        self.data_size = data_size
+        self.avg_wake_up_time = avg_wake_up_time
+        self.sim_time = sim_time
+
+        # Setup simulator environment
+        self.simpy_env = simpy.Environment()
+        self.simulator = LoraSimulator(
+            self.nodes_count,
+            self.data_size,
+            self.avg_wake_up_time * 1000,
+            self.sim_time * 1000,
+            self.simpy_env,
+        )
+
+        self.current_step = 0
+        self.done = False
+        self.truncated = False
+
+    @functools.lru_cache(maxsize=None)
+    def observation_space(self, agent):
+        return spaces.Dict({
+            "prr": spaces.Box(low=0, high=1, shape=(), dtype=np.float32),
+            "rssi": spaces.Box(low=-200, high=0, shape=(), dtype=np.float32),
+            "sf": spaces.Box(low=7, high=12, shape=(), dtype=np.int64)
+        })
+
+    @functools.lru_cache(maxsize=None)
+    def action_space(self, agent):
+        return spaces.Discrete(3)
+
+    def reset(self, **kwargs):
+        logging.info("Resetting environment.")
+        self.agents = copy(self.possible_agents)
+        self.simpy_env = simpy.Environment()
+        self.simulator = LoraSimulator(
+            self.nodes_count,
+            self.data_size,
+            self.avg_wake_up_time * 1000,
+            self.sim_time * 1000,
+            self.simpy_env,
+        )
+        self.current_step = 0
+        self.done = False
+        self.truncated = False
+        utils.reset_simulator()
+        self.simulator.add_nodes()
+        observations = {agent: self.observe(agent) for agent in self.possible_agents}
+        infos = {agent: {} for agent in self.possible_agents}
+        logging.info("Environment reset complete.")
+        return observations, infos
+
+    def step(self, actions):
+        logging.info(f"Step {self.current_step} with actions {actions}.")
+        if self.current_step == 0:
+            self.simulator.start_simulation()
+        if self.current_step >= self.sim_time:
+            self.done = True
+            reward = self._calculate_reward()
+            observations = {agent: self.observe(agent) for agent in self.possible_agents}
+            infos = {agent: {} for agent in self.possible_agents}
+            logging.info("Simulation done.")
+            return observations, reward, self.done, infos
+        
+        for agent in actions:
+            agent_index = self.agent_name_mapping[agent]
+            if not self.done:
+                self.simulator.update_nodes_behavior(agent_index, actions[agent])
+
+        self.current_step += 1
+        timestep = self.current_step * 1000
+        for i in range(self.nodes_count):
+            utils.log(
+                f"!-- UPLINK NUMBER FOR STEP [{self.current_step}] FOR NODE {i}: {actions[f'agent_{i}'] + 1} --!",
+                self.simpy_env,
+            )
+        self.simulator.env.run(until=timestep)
+
+        observations = {agent: self.observe(agent) for agent in self.possible_agents}
+        rewards = {agent: self._calculate_reward(self.agent_name_mapping[agent]) for agent in self.possible_agents}
+        self.done = self.current_step >= self.sim_time
+        infos = {agent: {} for agent in self.possible_agents}
+
+        if self.render_mode == 'human':
+            self.render()
+
+        dones = {agent: self.done for agent in self.possible_agents}
+        truckations = {agent: self.truncated for agent in self.possible_agents}
+
+        return observations, rewards, dones, truckations, infos
+
+    def observe(self, agent):
+        idx = self.agent_name_mapping[agent]
+        return {
+            "prr": consts.nodes[idx].prr_value,
+            "rssi": consts.nodes[idx].rssi_value,
+            "sf": consts.nodes[idx].sf_value
+        }
+
+    def render(self):
+        if self.render_mode == 'human':
+            print({agent: self.observe(agent) for agent in self.possible_agents})
+
+    def _calculate_reward(self, agent_index):
+        lambda_value = 0.0001
+        mean_prr = consts.nodes[agent_index].calculate_prr()
+        retransmission_penalty = lambda_value * consts.nodes[agent_index].packets_sent_count
+        return mean_prr - retransmission_penalty
+
+def env_creator(env_config):
+    env_instance = LoRaEnvParallel(**env_config)
+    env_instance = parallel_to_aec(env_instance)
+    return env_instance

From bb76173a514cebe913d20ec32d59aebe8af98432 Mon Sep 17 00:00:00 2001
From: ltwmori <ltwmori@gmail.com>
Date: Thu, 16 May 2024 14:12:35 +0500
Subject: [PATCH 07/14] fix: minor changes in the code

---
 main3.py | 65 ++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 47 insertions(+), 18 deletions(-)

diff --git a/main3.py b/main3.py
index 9e5ffe8..ab8cdd0 100644
--- a/main3.py
+++ b/main3.py
@@ -5,24 +5,26 @@
 from ray.rllib.env import PettingZooEnv
 from ray.tune.registry import register_env
 import logging
+import sys
 
 from multienv.multienv_v0 import env
 
 logging.basicConfig(level=logging.INFO)
 
-if __name__ == "__main__":
-    ray.init()
-
-    # Register the environment
-    def create_env(config):
-        env_instance = env()
-        logging.info(f"Custom Env possible_agents: {env_instance.possible_agents}")
-        return PettingZooEnv(env_instance)
+# Function to create the environment
+def create_env(config):
+    logging.info(f"Creating environment with config: {config}")
+    env_instance = env(**config)
+    logging.info(f"Custom Env possible_agents: {env_instance.possible_agents}")
+    return PettingZooEnv(env_instance)
 
+# Function to train the environment
+def train_fn(config):
+    logging.info("Registering environment.")
     register_env("LoRaEnvParallel", create_env)
 
     # Create a test environment to get observation and action spaces
-    test_env = create_env({})
+    test_env = create_env(config)
     logging.info(f"Wrapped Env possible_agents: {test_env.env.possible_agents}")
 
     # Check if possible_agents exists
@@ -32,15 +34,10 @@ def create_env(config):
     else:
         raise AttributeError("The environment does not have 'possible_agents' attribute.")
 
-    config = (
+    algo_config = (
         DQNConfig()
-        .environment(env="LoRaEnvParallel", env_config={
-            "nodes_count": 10,
-            "data_size": 16,
-            "avg_wake_up_time": 30,
-            "sim_time": 3600
-        })
-        .rollouts(num_rollout_workers=1, rollout_fragment_length=30)
+        .environment(env="LoRaEnvParallel", env_config=config)
+        .env_runners(num_env_runners=1, rollout_fragment_length=30)
         .training(
             train_batch_size=200,
             hiddens=[],
@@ -68,8 +65,40 @@ def create_env(config):
             name="DQN_LoRaEnvParallel",
             stop={"timesteps_total": 1000000},
             checkpoint_freq=10,
-            config=config.to_dict(),
+            config=algo_config.to_dict(),
         )
     except Exception as e:
         logging.error(f"An error occurred during training: {e}")
         raise
+
+if __name__ == "__main__":
+    if len(sys.argv) != 5:
+        print("Usage: python3 main3.py <number_of_nodes> <data_size(bytes)> <avg_wake_up_time(secs)> <sim_time(secs)>")
+        sys.exit(1)
+    
+    nodes_count = int(sys.argv[1])
+    data_size = int(sys.argv[2])
+    avg_wake_up_time = int(sys.argv[3])
+    sim_time = int(sys.argv[4])
+
+    ray.init()
+
+    try:
+        analysis = tune.run(
+            train_fn,
+            config={
+                "nodes_count": nodes_count,
+                "data_size": data_size,
+                "avg_wake_up_time": avg_wake_up_time,
+                "sim_time": sim_time
+            },
+            metric="episode_reward_mean",
+            mode="max"
+        )
+
+        print("Best checkpoint:", analysis.best_checkpoint)
+
+        with analysis.best_checkpoint.as_directory() as tmpdir:
+            trainer = DQNConfig.load_from_checkpoint(tmpdir)
+    except Exception as e:
+        logging.error(f"An error occurred during the Ray Tune run: {e}")

From 3c797dab5ebef45c00bbd4e29524786d54c4c694 Mon Sep 17 00:00:00 2001
From: DeTrix2712 <detrix27122002@gmail.com>
Date: Thu, 16 May 2024 14:56:09 +0500
Subject: [PATCH 08/14] fix: RLlib fix

---
 main3.py | 80 +++++++++++++++++++++-----------------------------------
 1 file changed, 30 insertions(+), 50 deletions(-)

diff --git a/main3.py b/main3.py
index ab8cdd0..2b41b89 100644
--- a/main3.py
+++ b/main3.py
@@ -2,41 +2,50 @@
 import ray
 from ray import tune
 from ray.rllib.algorithms.dqn import DQNConfig
+from ray.rllib.algorithms.dqn import DQN
 from ray.rllib.env import PettingZooEnv
 from ray.tune.registry import register_env
 import logging
-import sys
 
 from multienv.multienv_v0 import env
 
 logging.basicConfig(level=logging.INFO)
 
-# Function to create the environment
-def create_env(config):
-    logging.info(f"Creating environment with config: {config}")
-    env_instance = env(**config)
-    logging.info(f"Custom Env possible_agents: {env_instance.possible_agents}")
-    return PettingZooEnv(env_instance)
+if __name__ == "__main__":
+    ray.init()
+
+    # Register the environment
+    def create_env(config):
+        env_instance = env()
+        logging.info(f"Custom Env possible_agents: {env_instance.possible_agents}")
+        return PettingZooEnv(env_instance)
 
-# Function to train the environment
-def train_fn(config):
-    logging.info("Registering environment.")
     register_env("LoRaEnvParallel", create_env)
 
     # Create a test environment to get observation and action spaces
-    test_env = create_env(config)
+    test_env = create_env({})
     logging.info(f"Wrapped Env possible_agents: {test_env.env.possible_agents}")
 
     # Check if possible_agents exists
-    if hasattr(test_env.env, 'possible_agents'):
+    if hasattr(test_env.env, "possible_agents"):
         obs_space = test_env.env.observation_space(test_env.env.possible_agents[0])
         act_space = test_env.env.action_space(test_env.env.possible_agents[0])
     else:
-        raise AttributeError("The environment does not have 'possible_agents' attribute.")
+        raise AttributeError(
+            "The environment does not have 'possible_agents' attribute."
+        )
 
-    algo_config = (
+    config = (
         DQNConfig()
-        .environment(env="LoRaEnvParallel", env_config=config)
+        .environment(
+            env="LoRaEnvParallel",
+            env_config={
+                "nodes_count": 10,
+                "data_size": 16,
+                "avg_wake_up_time": 30,
+                "sim_time": 3600,
+            },
+        )
         .env_runners(num_env_runners=1, rollout_fragment_length=30)
         .training(
             train_batch_size=200,
@@ -44,7 +53,10 @@ def train_fn(config):
             dueling=False,
         )
         .multi_agent(
-            policies={agent: (None, obs_space, act_space, {}) for agent in test_env.env.possible_agents},
+            policies={
+                agent: (None, obs_space, act_space, {})
+                for agent in test_env.env.possible_agents
+            },
             policy_mapping_fn=(lambda agent_id, *args, **kwargs: agent_id),
         )
         .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
@@ -61,44 +73,12 @@ def train_fn(config):
 
     try:
         tune.run(
-            "DQN",
+            DQN,
             name="DQN_LoRaEnvParallel",
             stop={"timesteps_total": 1000000},
             checkpoint_freq=10,
-            config=algo_config.to_dict(),
+            config=config.to_dict(),
         )
     except Exception as e:
         logging.error(f"An error occurred during training: {e}")
         raise
-
-if __name__ == "__main__":
-    if len(sys.argv) != 5:
-        print("Usage: python3 main3.py <number_of_nodes> <data_size(bytes)> <avg_wake_up_time(secs)> <sim_time(secs)>")
-        sys.exit(1)
-    
-    nodes_count = int(sys.argv[1])
-    data_size = int(sys.argv[2])
-    avg_wake_up_time = int(sys.argv[3])
-    sim_time = int(sys.argv[4])
-
-    ray.init()
-
-    try:
-        analysis = tune.run(
-            train_fn,
-            config={
-                "nodes_count": nodes_count,
-                "data_size": data_size,
-                "avg_wake_up_time": avg_wake_up_time,
-                "sim_time": sim_time
-            },
-            metric="episode_reward_mean",
-            mode="max"
-        )
-
-        print("Best checkpoint:", analysis.best_checkpoint)
-
-        with analysis.best_checkpoint.as_directory() as tmpdir:
-            trainer = DQNConfig.load_from_checkpoint(tmpdir)
-    except Exception as e:
-        logging.error(f"An error occurred during the Ray Tune run: {e}")

From 32bcfbd1a64f550aef969295038913bf22694d13 Mon Sep 17 00:00:00 2001
From: DeTrix2712 <detrix27122002@gmail.com>
Date: Thu, 16 May 2024 15:44:23 +0500
Subject: [PATCH 09/14] fix: minor fixes

---
 multienv/env/multienv.py | 60 ++++++++++++++++++++++++++--------------
 simulator/singleton.py   | 12 +++++---
 2 files changed, 48 insertions(+), 24 deletions(-)

diff --git a/multienv/env/multienv.py b/multienv/env/multienv.py
index 1ac9fd9..a297ac7 100644
--- a/multienv/env/multienv.py
+++ b/multienv/env/multienv.py
@@ -10,12 +10,22 @@
 import functools
 import logging
 
-class LoRaEnvParallel(ParallelEnv):
-    metadata = {'render_modes': ['human'], 'name': 'multiagent_v0'}
 
-    def __init__(self, nodes_count=10, data_size=16, avg_wake_up_time=30, sim_time=3600, render_mode=None):
+class LoRaEnvParallel(ParallelEnv):
+    metadata = {"render_modes": ["human"], "name": "multiagent_v0"}
+
+    def __init__(
+        self,
+        nodes_count=10,
+        data_size=16,
+        avg_wake_up_time=30,
+        sim_time=3600,
+        render_mode=None,
+    ):
         self.possible_agents = [f"agent_{i}" for i in range(nodes_count)]
-        self.agent_name_mapping = dict(zip(self.possible_agents, range(len(self.possible_agents))))
+        self.agent_name_mapping = dict(
+            zip(self.possible_agents, range(len(self.possible_agents)))
+        )
         self.render_mode = render_mode
         self.nodes_count = nodes_count
         self.data_size = data_size
@@ -38,11 +48,13 @@ def __init__(self, nodes_count=10, data_size=16, avg_wake_up_time=30, sim_time=3
 
     @functools.lru_cache(maxsize=None)
     def observation_space(self, agent):
-        return spaces.Dict({
-            "prr": spaces.Box(low=0, high=1, shape=(), dtype=np.float32),
-            "rssi": spaces.Box(low=-200, high=0, shape=(), dtype=np.float32),
-            "sf": spaces.Box(low=7, high=12, shape=(), dtype=np.int64)
-        })
+        return spaces.Dict(
+            {
+                "prr": spaces.Box(low=0.0, high=1.0, shape=(), dtype=np.float32),
+                "rssi": spaces.Box(low=-200.0, high=0.0, shape=(), dtype=np.float32),
+                "sf": spaces.Box(low=7, high=12, shape=(), dtype=np.int64),
+            }
+        )
 
     @functools.lru_cache(maxsize=None)
     def action_space(self, agent):
@@ -76,11 +88,13 @@ def step(self, actions):
         if self.current_step >= self.sim_time:
             self.done = True
             reward = self._calculate_reward()
-            observations = {agent: self.observe(agent) for agent in self.possible_agents}
+            observations = {
+                agent: self.observe(agent) for agent in self.possible_agents
+            }
             infos = {agent: {} for agent in self.possible_agents}
             logging.info("Simulation done.")
             return observations, reward, self.done, infos
-        
+
         for agent in actions:
             agent_index = self.agent_name_mapping[agent]
             if not self.done:
@@ -96,36 +110,42 @@ def step(self, actions):
         self.simulator.env.run(until=timestep)
 
         observations = {agent: self.observe(agent) for agent in self.possible_agents}
-        rewards = {agent: self._calculate_reward(self.agent_name_mapping[agent]) for agent in self.possible_agents}
+        rewards = {
+            agent: self._calculate_reward(self.agent_name_mapping[agent])
+            for agent in self.possible_agents
+        }
         self.done = self.current_step >= self.sim_time
         infos = {agent: {} for agent in self.possible_agents}
 
-        if self.render_mode == 'human':
+        if self.render_mode == "human":
             self.render()
 
         dones = {agent: self.done for agent in self.possible_agents}
-        truckations = {agent: self.truncated for agent in self.possible_agents}
+        truncations = {agent: self.truncated for agent in self.possible_agents}
 
-        return observations, rewards, dones, truckations, infos
+        return observations, rewards, dones, truncations, infos
 
     def observe(self, agent):
         idx = self.agent_name_mapping[agent]
         return {
-            "prr": consts.nodes[idx].prr_value,
-            "rssi": consts.nodes[idx].rssi_value,
-            "sf": consts.nodes[idx].sf_value
+            "prr": np.array(consts.nodes[idx].prr_value, dtype=np.float32),
+            "rssi": np.array(consts.nodes[idx].rssi_value, dtype=np.float32),
+            "sf": np.array(consts.nodes[idx].sf_value, dtype=np.int64),
         }
 
     def render(self):
-        if self.render_mode == 'human':
+        if self.render_mode == "human":
             print({agent: self.observe(agent) for agent in self.possible_agents})
 
     def _calculate_reward(self, agent_index):
         lambda_value = 0.0001
         mean_prr = consts.nodes[agent_index].calculate_prr()
-        retransmission_penalty = lambda_value * consts.nodes[agent_index].packets_sent_count
+        retransmission_penalty = (
+            lambda_value * consts.nodes[agent_index].packets_sent_count
+        )
         return mean_prr - retransmission_penalty
 
+
 def env_creator(env_config):
     env_instance = LoRaEnvParallel(**env_config)
     env_instance = parallel_to_aec(env_instance)
diff --git a/simulator/singleton.py b/simulator/singleton.py
index 58d39c3..c7481e2 100644
--- a/simulator/singleton.py
+++ b/simulator/singleton.py
@@ -19,10 +19,14 @@ def __init__(self):
             self.sim_time = int(sys.argv[4])
 
         else:
-            print(
-                "usage: ./main <number_of_nodes> <data_size(bytes)> <avg_wake_up_time(secs)> <sim_time(secs)>"
-            )
-            exit(-1)
+            self.nodes_count = 10
+            self.data_size = 16
+            self.avg_wake_up_time = 30 * 1000
+            self.sim_time = 3600 * 1000
+            # print(
+            #     "usage: ./main <number_of_nodes> <data_size(bytes)> <avg_wake_up_time(secs)> <sim_time(secs)>"
+            # )
+            # exit(-1)
 
 
 class DataGatewaySingleton:

From 0588526bad424a2d2ffa3f69725e7ddeba37a1ea Mon Sep 17 00:00:00 2001
From: ltwmori <ltwmori@gmail.com>
Date: Thu, 16 May 2024 16:11:23 +0500
Subject: [PATCH 10/14] fix: something

---
 multienv/env/multienv.py |  2 +-
 simulator/entities.py    | 17 ++++++++++-------
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/multienv/env/multienv.py b/multienv/env/multienv.py
index a297ac7..6ddcff4 100644
--- a/multienv/env/multienv.py
+++ b/multienv/env/multienv.py
@@ -128,7 +128,7 @@ def step(self, actions):
     def observe(self, agent):
         idx = self.agent_name_mapping[agent]
         return {
-            "prr": np.array(consts.nodes[idx].prr_value, dtype=np.float32),
+            "prr": np.array(consts.nodes[idx].calculate_prr(), dtype=np.float32),
             "rssi": np.array(consts.nodes[idx].rssi_value, dtype=np.float32),
             "sf": np.array(consts.nodes[idx].sf_value, dtype=np.int64),
         }
diff --git a/simulator/entities.py b/simulator/entities.py
index f24e8eb..8fdedbf 100644
--- a/simulator/entities.py
+++ b/simulator/entities.py
@@ -112,9 +112,17 @@ def __init__(self, node_id, env, gateway=None):
         self.packets_sent_count = 0
         self.packets_received_count = 0
 
+        self.x, self.y = EndNode.find_place_for_new_node()
+        self.dist = np.sqrt(
+            (self.x - consts.bsx) * (self.x - consts.bsx)
+            + (self.y - consts.bsy) * (self.y - consts.bsy)
+        )
+
+        self.sf = self.find_optimal_sf()
+
         self.prr_value = 0
         self.rssi_value = 0
-        self.sf_value = 0
+        self.sf_value = self.sf
 
         self.nr_lost = 0
         self.nr_collisions = 0
@@ -139,13 +147,8 @@ def __init__(self, node_id, env, gateway=None):
         self.data_packet = None
         self.sack_packet_received = env.event()
 
-        self.x, self.y = EndNode.find_place_for_new_node()
-        self.dist = np.sqrt(
-            (self.x - consts.bsx) * (self.x - consts.bsx)
-            + (self.y - consts.bsy) * (self.y - consts.bsy)
-        )
 
-        self.sf = self.find_optimal_sf()
+        
 
     def __str__(self):
         # return "EndNode: " + str(self.node_id) + " x: " + str(self.x) + " y: " + str(self.y) + " sf: " + str(self.sf)

From 09da482e656a8cca34c218df7f0df851e2cfd3ac Mon Sep 17 00:00:00 2001
From: DeTrix2712 <detrix27122002@gmail.com>
Date: Wed, 22 May 2024 13:30:48 +0500
Subject: [PATCH 11/14] fix: minor fixes

---
 main.py                     |   6 ++
 main2.py                    |  24 ++++--
 main3.py                    | 156 ++++++++++++++++++++++--------------
 simulator/communications.py |   3 +-
 simulator/consts.py         |  14 +++-
 simulator/entities.py       |  10 +--
 simulator/singleton.py      |  27 -------
 simulator/utils.py          |  13 ++-
 8 files changed, 136 insertions(+), 117 deletions(-)

diff --git a/main.py b/main.py
index 681e6bc..8f6d828 100644
--- a/main.py
+++ b/main.py
@@ -2,6 +2,7 @@
 import gymnasium as gym
 import loraenv
 import simulator.utils as utils
+import simulator.consts as consts
 import matplotlib.pyplot as plt
 
 from simulator.lora_simulator import LoraSimulator
@@ -16,6 +17,11 @@
         avg_wake_up_time = int(sys.argv[3])
         sim_time = int(sys.argv[4])
 
+        consts.nodes_count = nodes_count
+        consts.data_size = data_size
+        consts.avg_wake_up_time = avg_wake_up_time
+        consts.sim_time = sim_time
+
         # Gymnasium environment
         gym_env = gym.make(
             "loraenv/LoRa-v0",
diff --git a/main2.py b/main2.py
index b372bf7..bb23d89 100644
--- a/main2.py
+++ b/main2.py
@@ -2,6 +2,7 @@
 import gymnasium as gym
 import loraenv
 import simulator.utils as utils
+import simulator.consts as consts
 import matplotlib.pyplot as plt
 
 import numpy as np
@@ -19,6 +20,11 @@
         avg_wake_up_time = int(sys.argv[3])
         sim_time = int(sys.argv[4])
 
+        consts.nodes_count = nodes_count
+        consts.data_size = data_size
+        consts.avg_wake_up_time = avg_wake_up_time
+        consts.sim_time = sim_time
+
         # Gymnasium environment
         gym_env = gym.make(
             "loraenv/LoRa-v0",
@@ -28,7 +34,7 @@
             sim_time=sim_time,
         )
 
-        train = True
+        train = False
         if train:
             # Create new model
             model = PPO("MultiInputPolicy", gym_env, verbose=1)
@@ -66,15 +72,21 @@
         model = PPO.load("lora_model")
         utils.log(f"!-- EVALUATION START --!")
         obs, info = gym_env.reset()
-        rewards_per_evaluation = [[] for _ in range(nodes_count)]  # List to hold rewards for each node
-        total_rewards_per_node = [0] * nodes_count  # List to hold total rewards for each node
+        rewards_per_evaluation = [
+            [] for _ in range(nodes_count)
+        ]  # List to hold rewards for each node
+        total_rewards_per_node = [
+            0
+        ] * nodes_count  # List to hold total rewards for each node
 
         done = False
         while True:
             action, _states = model.predict(obs, deterministic=True)
             obs, reward, done, terminated, info = gym_env.step(action)
             for i in range(nodes_count):
-                rewards_per_evaluation[i].append(reward[i])  # Log each reward for each node
+                rewards_per_evaluation[i].append(
+                    reward[i]
+                )  # Log each reward for each node
                 total_rewards_per_node[i] += reward[i]  # Sum rewards for each node
 
             if done or terminated:
@@ -90,7 +102,7 @@
                 rewards_per_evaluation[i],
                 marker="o",
                 linestyle="-",
-                label=f'Node {i+1}'
+                label=f"Node {i+1}",
             )
         plt.title("Rewards per Step During Evaluation for Each Node")
         plt.xlabel("Step")
@@ -103,4 +115,4 @@
         print(
             "usage: ./main <number_of_nodes> <data_size(bytes)> <avg_wake_up_time(secs)> <sim_time(secs)>"
         )
-        exit(-1)
\ No newline at end of file
+        exit(-1)
diff --git a/main3.py b/main3.py
index 2b41b89..0dbdf9e 100644
--- a/main3.py
+++ b/main3.py
@@ -1,84 +1,116 @@
 import os
+import sys
 import ray
+import logging
+import simulator.consts as consts
+
 from ray import tune
 from ray.rllib.algorithms.dqn import DQNConfig
 from ray.rllib.algorithms.dqn import DQN
 from ray.rllib.env import PettingZooEnv
 from ray.tune.registry import register_env
-import logging
 
 from multienv.multienv_v0 import env
 
 logging.basicConfig(level=logging.INFO)
 
 if __name__ == "__main__":
-    ray.init()
+    if len(sys.argv) == 5:
+        nodes_count = int(sys.argv[1])
+        data_size = int(sys.argv[2])
+        avg_wake_up_time = int(sys.argv[3])
+        sim_time = int(sys.argv[4])
 
-    # Register the environment
-    def create_env(config):
-        env_instance = env()
-        logging.info(f"Custom Env possible_agents: {env_instance.possible_agents}")
-        return PettingZooEnv(env_instance)
+        consts.nodes_count = nodes_count
+        consts.data_size = data_size
+        consts.avg_wake_up_time = avg_wake_up_time
+        consts.sim_time = sim_time
 
-    register_env("LoRaEnvParallel", create_env)
+        ray.init()
 
-    # Create a test environment to get observation and action spaces
-    test_env = create_env({})
-    logging.info(f"Wrapped Env possible_agents: {test_env.env.possible_agents}")
+        # Register the environment
+        def create_env(config):
+            env_instance = env(
+                nodes_count=nodes_count,
+                data_size=data_size,
+                avg_wake_up_time=avg_wake_up_time,
+                sim_time=sim_time,
+            )
+            logging.info(f"Custom Env possible_agents: {env_instance.possible_agents}")
+            return PettingZooEnv(env_instance)
 
-    # Check if possible_agents exists
-    if hasattr(test_env.env, "possible_agents"):
-        obs_space = test_env.env.observation_space(test_env.env.possible_agents[0])
-        act_space = test_env.env.action_space(test_env.env.possible_agents[0])
-    else:
-        raise AttributeError(
-            "The environment does not have 'possible_agents' attribute."
-        )
+        register_env("LoRaEnvParallel", create_env)
 
-    config = (
-        DQNConfig()
-        .environment(
-            env="LoRaEnvParallel",
-            env_config={
-                "nodes_count": 10,
-                "data_size": 16,
-                "avg_wake_up_time": 30,
-                "sim_time": 3600,
-            },
-        )
-        .env_runners(num_env_runners=1, rollout_fragment_length=30)
-        .training(
-            train_batch_size=200,
-            hiddens=[],
-            dueling=False,
-        )
-        .multi_agent(
-            policies={
-                agent: (None, obs_space, act_space, {})
-                for agent in test_env.env.possible_agents
-            },
-            policy_mapping_fn=(lambda agent_id, *args, **kwargs: agent_id),
-        )
-        .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
-        .framework(framework="torch")
-        .exploration(
-            exploration_config={
-                "type": "EpsilonGreedy",
-                "initial_epsilon": 0.1,
-                "final_epsilon": 0.0,
-                "epsilon_timesteps": 100000,
+        # Create a test environment to get observation and action spaces
+        test_env = create_env(
+            {
+                "nodes_count": nodes_count,
+                "data_size": data_size,
+                "avg_wake_up_time": avg_wake_up_time,
+                "sim_time": sim_time,
             }
         )
-    )
+        logging.info(f"Wrapped Env possible_agents: {test_env.env.possible_agents}")
+
+        # Check if possible_agents exists
+        if hasattr(test_env.env, "possible_agents"):
+            obs_space = test_env.env.observation_space(test_env.env.possible_agents[0])
+            act_space = test_env.env.action_space(test_env.env.possible_agents[0])
+        else:
+            raise AttributeError(
+                "The environment does not have 'possible_agents' attribute."
+            )
+
+        config = (
+            DQNConfig()
+            .environment(
+                env="LoRaEnvParallel",
+                env_config={
+                    "nodes_count": nodes_count,
+                    "data_size": data_size,
+                    "avg_wake_up_time": avg_wake_up_time,
+                    "sim_time": sim_time,
+                },
+            )
+            .env_runners(
+                num_env_runners=1,
+                rollout_fragment_length=30,
+                exploration_config={
+                    "type": "EpsilonGreedy",
+                    "initial_epsilon": 0.1,
+                    "final_epsilon": 0.0,
+                    "epsilon_timesteps": 100000,
+                },
+            )
+            .training(
+                train_batch_size=200,
+                hiddens=[],
+                dueling=False,
+            )
+            .multi_agent(
+                policies={
+                    agent: (None, obs_space, act_space, {})
+                    for agent in test_env.env.possible_agents
+                },
+                policy_mapping_fn=(lambda agent_id, *args, **kwargs: agent_id),
+            )
+            .resources(num_gpus=1)
+            .framework(framework="torch")
+        )
 
-    try:
-        tune.run(
-            DQN,
-            name="DQN_LoRaEnvParallel",
-            stop={"timesteps_total": 1000000},
-            checkpoint_freq=10,
-            config=config.to_dict(),
+        try:
+            tune.run(
+                DQN,
+                name="DQN_LoRaEnvParallel",
+                stop={"timesteps_total": 1000000},
+                checkpoint_freq=10,
+                config=config.to_dict(),
+            )
+        except Exception as e:
+            logging.error(f"An error occurred during training: {e}")
+            raise
+    else:
+        print(
+            "usage: python main3.py <number_of_nodes> <data_size(bytes)> <avg_wake_up_time(secs)> <sim_time(secs)>"
         )
-    except Exception as e:
-        logging.error(f"An error occurred during training: {e}")
-        raise
+        exit(-1)
diff --git a/simulator/communications.py b/simulator/communications.py
index 41fa209..f7a6bbb 100644
--- a/simulator/communications.py
+++ b/simulator/communications.py
@@ -6,7 +6,6 @@
 import math
 import random
 from simulator.channels import Channels
-from simulator.singleton import ArgumentSingleton
 
 
 class Packet:
@@ -174,7 +173,7 @@ def __init__(self, sf=None, node=None):
         self.sf = sf
         self.bw = 125
         self.freq = Channels.get_sf_freq(sf)
-        self.pl = ArgumentSingleton.get_instance().data_size
+        self.pl = consts.data_size
         self.rec_time = self.airtime()
 
     def update_statistics(self):
diff --git a/simulator/consts.py b/simulator/consts.py
index ad2c5c0..72eaed6 100644
--- a/simulator/consts.py
+++ b/simulator/consts.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from collections import defaultdict
 
 # CONSTANTS
@@ -24,10 +25,6 @@
 pow_cons = [75, 45, 30]
 V = 3.3  # voltage
 
-# global
-data_gateway = None
-nodes = []
-
 coding_rate = 1
 drifting_range = [-0.2, 0.2]
 mean = 0  # Mean of the normal distribution
@@ -75,3 +72,12 @@
 total_energy = 0
 erx = 0
 etx = 0
+
+# Default values for parameters (to be overwritten)
+nodes_count = 10
+data_size = 16
+avg_wake_up_time = 30
+sim_time = 3600
+
+data_gateway = None
+nodes = []
diff --git a/simulator/entities.py b/simulator/entities.py
index 8fdedbf..ef54196 100644
--- a/simulator/entities.py
+++ b/simulator/entities.py
@@ -2,7 +2,6 @@
 import numpy as np
 from simulator.singleton import (
     DataGatewaySingleton,
-    ArgumentSingleton,
 )
 import random, math
 from simulator.utils import *
@@ -10,8 +9,6 @@
 from simulator.broadcast_traffic import BroadcastTraffic
 from simulator.frame import Frame
 
-args = ArgumentSingleton.get_instance()
-
 
 class NetworkNode:
     def __init__(self, node_id=None):
@@ -41,7 +38,7 @@ def __init__(self, node_id):
     def frame(self, sf):
         if sf > 6:
             return self.frames[sf - 7]
-        raise ValueError("sf must be greater than 6")
+        raise ValueError("SF must be greater than 6")
 
     def transmit_sack(self, env, sf):
         from simulator.communications import SackPacket
@@ -147,9 +144,6 @@ def __init__(self, node_id, env, gateway=None):
         self.data_packet = None
         self.sack_packet_received = env.event()
 
-
-        
-
     def __str__(self):
         # return "EndNode: " + str(self.node_id) + " x: " + str(self.x) + " y: " + str(self.y) + " sf: " + str(self.sf)
         return f"node {self.node_id}: \t x {self.x:3f} \t y {self.y:3f} \t dist {self.dist:4.3f} \t SF {self.sf}"
@@ -226,7 +220,7 @@ def transmit(self, env):
         while True:
 
             # calculating round start time
-            yield env.timeout(random.uniform(0.0, float(2 * args.avg_wake_up_time)))
+            yield env.timeout(random.uniform(0.0, float(2 * consts.avg_wake_up_time)))
             if self.waiting_first_sack:
                 yield self.sack_packet_received
                 self.waiting_first_sack = False
diff --git a/simulator/singleton.py b/simulator/singleton.py
index c7481e2..524470e 100644
--- a/simulator/singleton.py
+++ b/simulator/singleton.py
@@ -2,33 +2,6 @@
 import simpy
 
 
-class ArgumentSingleton:
-    _instance = None
-
-    @staticmethod
-    def get_instance():
-        if ArgumentSingleton._instance is None:
-            ArgumentSingleton._instance = ArgumentSingleton()
-        return ArgumentSingleton._instance
-
-    def __init__(self):
-        if len(sys.argv) == 5:
-            self.nodes_count = int(sys.argv[1])
-            self.data_size = int(sys.argv[2])
-            self.avg_wake_up_time = int(sys.argv[3])
-            self.sim_time = int(sys.argv[4])
-
-        else:
-            self.nodes_count = 10
-            self.data_size = 16
-            self.avg_wake_up_time = 30 * 1000
-            self.sim_time = 3600 * 1000
-            # print(
-            #     "usage: ./main <number_of_nodes> <data_size(bytes)> <avg_wake_up_time(secs)> <sim_time(secs)>"
-            # )
-            # exit(-1)
-
-
 class DataGatewaySingleton:
     _instance = None
 
diff --git a/simulator/utils.py b/simulator/utils.py
index 388c326..a217acd 100644
--- a/simulator/utils.py
+++ b/simulator/utils.py
@@ -2,10 +2,7 @@
 import datetime
 import simulator.consts as consts
 
-from simulator.singleton import ArgumentSingleton, DataGatewaySingleton
-
-args = ArgumentSingleton.get_instance()
-nodes_count = args.nodes_count
+from simulator.singleton import DataGatewaySingleton
 
 
 def get_log_filename():
@@ -123,7 +120,7 @@ def show_final_statistics():
             f"{node.nr_lost:{max_length}} packets lost, "
             f"{node.nr_collisions:{max_length}} collisions"
         )
-    log(f"Average PRR: {(sum / nodes_count):.3f}")
+    log(f"Average PRR: {(sum / consts.nodes_count):.3f}")
 
     log("\n!-- NETWORK STATISTICS --!\n")
     log(f"Data collisions: {consts.nr_data_collisions}")
@@ -133,10 +130,10 @@ def show_final_statistics():
     log(f"Transmitted SACK packets: {consts.nr_sack_sent}")
     log(f"Missed SACK packets: {consts.nr_sack_missed_count}")
     log(f"Data Retransmissions: {consts.nr_data_retransmissions}")
-    log(f"Average energy consumption (Rx): {(consts.erx / nodes_count):.3f} J")
-    log(f"Average energy consumption (Tx): {(consts.etx / nodes_count):.3f} J")
+    log(f"Average energy consumption (Rx): {(consts.erx / consts.nodes_count):.3f} J")
+    log(f"Average energy consumption (Tx): {(consts.etx / consts.nodes_count):.3f} J")
     log(
-        f"Average energy consumption per node: {consts.total_energy / nodes_count:.3f} J"
+        f"Average energy consumption per node: {consts.total_energy / consts.nodes_count:.3f} J"
     )
     log(
         f"Network PRR (version 1): {(consts.nr_data_packets_sent - consts.nr_data_retransmissions) / consts.nr_data_packets_sent:.3f}"

From 46db3964e4bb690d3f1f7af3ea4474a2d9cf674f Mon Sep 17 00:00:00 2001
From: ltwmori <ltwmori@gmail.com>
Date: Thu, 23 May 2024 15:24:44 +0500
Subject: [PATCH 12/14] feat: added tensorboard

---
 main3.py | 52 +++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 47 insertions(+), 5 deletions(-)

diff --git a/main3.py b/main3.py
index 0dbdf9e..21e02ab 100644
--- a/main3.py
+++ b/main3.py
@@ -3,17 +3,47 @@
 import ray
 import logging
 import simulator.consts as consts
+import matplotlib.pyplot as plt
 
 from ray import tune
 from ray.rllib.algorithms.dqn import DQNConfig
 from ray.rllib.algorithms.dqn import DQN
 from ray.rllib.env import PettingZooEnv
 from ray.tune.registry import register_env
+from ray.tune.logger import TBXLoggerCallback
 
 from multienv.multienv_v0 import env
 
 logging.basicConfig(level=logging.INFO)
 
+def plot_metrics(df):
+    # Create a figure with 2 subplots arranged vertically
+    fig, ax = plt.subplots(2, 1, figsize=(10, 8))
+
+    # Plot mean episode reward on the first subplot
+    if 'episode_reward_mean' in df.columns:
+        df['episode_reward_mean'].plot(ax=ax[0])
+        ax[0].set_title('Mean Episode Reward')
+        ax[0].set_xlabel('Training Iterations')
+        ax[0].set_ylabel('Reward')
+    else:
+        logging.warning("No 'episode_reward_mean' column found in results.")
+        ax[0].text(0.5, 0.5, 'No Data', horizontalalignment='center', verticalalignment='center')
+
+    # Plot mean episode length on the second subplot
+    if 'episode_len_mean' in df.columns:
+        df['episode_len_mean'].plot(ax=ax[1])
+        ax[1].set_title('Mean Episode Length')
+        ax[1].set_xlabel('Training Iterations')
+        ax[1].set_ylabel('Length')
+    else:
+        logging.warning("No 'episode_len_mean' column found in results.")
+        ax[1].text(0.5, 0.5, 'No Data', horizontalalignment='center', verticalalignment='center')
+
+    # Adjust layout and display the plots
+    plt.tight_layout()
+    plt.show()
+
 if __name__ == "__main__":
     if len(sys.argv) == 5:
         nodes_count = int(sys.argv[1])
@@ -79,7 +109,7 @@ def create_env(config):
                     "type": "EpsilonGreedy",
                     "initial_epsilon": 0.1,
                     "final_epsilon": 0.0,
-                    "epsilon_timesteps": 100000,
+                    "epsilon_timesteps": 1000000,
                 },
             )
             .training(
@@ -94,18 +124,30 @@ def create_env(config):
                 },
                 policy_mapping_fn=(lambda agent_id, *args, **kwargs: agent_id),
             )
-            .resources(num_gpus=1)
+            .resources(num_gpus=0)
             .framework(framework="torch")
         )
 
         try:
-            tune.run(
+            analysis = tune.run(
                 DQN,
                 name="DQN_LoRaEnvParallel",
-                stop={"timesteps_total": 1000000},
-                checkpoint_freq=10,
+                stop={"timesteps_total": 1000},
+                checkpoint_freq=100,
+                keep_checkpoints_num=5,
+                checkpoint_score_attr="training_iteration",
                 config=config.to_dict(),
+                local_dir="~/ray_results",  # Specify the directory for logging
+                callbacks=[TBXLoggerCallback()],
+                log_to_file=True,
             )
+
+            # Get the best trial
+            best_trial = analysis.get_best_trial("episode_reward_mean", mode="max")
+
+            # Use TensorBoard to visualize results
+            print(f"Training completed. Use TensorBoard to visualize results: tensorboard --logdir {best_trial.local_path}")
+
         except Exception as e:
             logging.error(f"An error occurred during training: {e}")
             raise

From 0ed0f81edddddc0aa9948e21a8288e355fd47e8e Mon Sep 17 00:00:00 2001
From: ltwmori <ltwmori@gmail.com>
Date: Thu, 23 May 2024 17:44:24 +0500
Subject: [PATCH 13/14] feat: restored prev version to main 2

---
 main2.py                                      | 224 +++++++++++-------
 main3.py                                      |   2 +-
 ...1716462842.MacBook-Air-Assel.local.44116.0 | Bin 0 -> 88 bytes
 ...1716462901.MacBook-Air-Assel.local.44462.0 | Bin 0 -> 88 bytes
 ...1716464109.MacBook-Air-Assel.local.50289.0 | Bin 0 -> 88 bytes
 ...1716464382.MacBook-Air-Assel.local.51656.0 | Bin 0 -> 88 bytes
 ...1716464601.MacBook-Air-Assel.local.52747.0 | Bin 0 -> 88 bytes
 ...1716464781.MacBook-Air-Assel.local.53639.0 | Bin 0 -> 88 bytes
 ...1716465092.MacBook-Air-Assel.local.55124.0 | Bin 0 -> 88 bytes
 ...1716465333.MacBook-Air-Assel.local.56249.0 | Bin 0 -> 88 bytes
 ...1716467317.MacBook-Air-Assel.local.65894.0 | Bin 0 -> 88 bytes
 11 files changed, 134 insertions(+), 92 deletions(-)
 create mode 100644 ~/ray_results/events.out.tfevents.1716462842.MacBook-Air-Assel.local.44116.0
 create mode 100644 ~/ray_results/events.out.tfevents.1716462901.MacBook-Air-Assel.local.44462.0
 create mode 100644 ~/ray_results/events.out.tfevents.1716464109.MacBook-Air-Assel.local.50289.0
 create mode 100644 ~/ray_results/events.out.tfevents.1716464382.MacBook-Air-Assel.local.51656.0
 create mode 100644 ~/ray_results/events.out.tfevents.1716464601.MacBook-Air-Assel.local.52747.0
 create mode 100644 ~/ray_results/events.out.tfevents.1716464781.MacBook-Air-Assel.local.53639.0
 create mode 100644 ~/ray_results/events.out.tfevents.1716465092.MacBook-Air-Assel.local.55124.0
 create mode 100644 ~/ray_results/events.out.tfevents.1716465333.MacBook-Air-Assel.local.56249.0
 create mode 100644 ~/ray_results/events.out.tfevents.1716467317.MacBook-Air-Assel.local.65894.0

diff --git a/main2.py b/main2.py
index bb23d89..085c9aa 100644
--- a/main2.py
+++ b/main2.py
@@ -1,17 +1,58 @@
+import os
 import sys
-import gymnasium as gym
-import loraenv
-import simulator.utils as utils
+import ray
+import logging
 import simulator.consts as consts
-import matplotlib.pyplot as plt
 
-import numpy as np
-import simulator.consts as consts
+from ray import tune
+from ray.rllib.algorithms.dqn import DQNConfig
+from ray.rllib.algorithms.dqn import DQN
+from ray.rllib.env import PettingZooEnv
+from ray.tune.registry import register_env
+from ray.tune.logger import TBXLoggerCallback
+from torch.utils.tensorboard import SummaryWriter
+
+from multienv.multienv_v0 import env
+
+logging.basicConfig(level=logging.INFO)
+
+class CustomMetricsCallback(tune.Callback):
+    def __init__(self, logdir):
+        self.logdir = logdir
+        self.writer = SummaryWriter(log_dir=logdir)
+
+    def on_episode_end(self, *, worker, base_env, policies, episode, **kwargs):
+        env = base_env.get_unwrapped()[0]
+        total_steps = episode.length
+        total_uplinks = sum(episode.custom_metrics[agent]["uplink_attempts"] for agent in env.possible_agents)
+        total_reward = sum(episode.reward[agent] for agent in env.possible_agents)
+        uplinks_per_node = {agent: episode.custom_metrics[agent]["uplink_attempts"] for agent in env.possible_agents}
 
-from simulator.lora_simulator import LoraSimulator
-from reward_caller_callback import RewardLoggerCallback
+        uplinks_per_step = total_uplinks / total_steps if total_steps > 0 else 0
+        reward_per_uplink = total_reward / total_uplinks if total_uplinks > 0 else 0
 
-from stable_baselines3 import PPO
+        episode.custom_metrics["total_uplinks"] = total_uplinks
+        episode.custom_metrics["total_steps"] = total_steps
+        episode.custom_metrics["uplinks_per_step"] = uplinks_per_step
+        episode.custom_metrics["reward_per_uplink"] = reward_per_uplink
+        episode.custom_metrics["uplinks_per_node"] = uplinks_per_node
+
+        logging.info(f"Episode {episode.episode_id} ended with {total_uplinks} total uplinks, "
+                     f"{total_steps} steps, {uplinks_per_step:.4f} uplinks per step, "
+                     f"{reward_per_uplink:.4f} reward per uplink, and {uplinks_per_node} uplink attempts.")
+
+        # Log custom metrics to TensorBoard
+        self.writer.add_scalar("Metrics/Total_Uplinks", total_uplinks, episode.episode_id)
+        self.writer.add_scalar("Metrics/Reward_Per_Uplink", reward_per_uplink, episode.episode_id)
+        
+        for agent, uplinks in uplinks_per_node.items():
+            self.writer.add_scalar(f"Metrics/Uplinks_Per_Node/{agent}", uplinks, episode.episode_id)
+
+    def on_trial_end(self, iteration, trials, trial, **info):
+        self.writer.flush()
+
+    def on_experiment_end(self, **kwargs):
+        self.writer.close()
 
 if __name__ == "__main__":
     if len(sys.argv) == 5:
@@ -25,94 +66,95 @@
         consts.avg_wake_up_time = avg_wake_up_time
         consts.sim_time = sim_time
 
-        # Gymnasium environment
-        gym_env = gym.make(
-            "loraenv/LoRa-v0",
-            nodes_count=nodes_count,
-            data_size=data_size,
-            avg_wake_up_time=avg_wake_up_time,
-            sim_time=sim_time,
+        ray.init()
+
+        # Register the environment
+        def create_env(config):
+            env_instance = env(
+                nodes_count=nodes_count,
+                data_size=data_size,
+                avg_wake_up_time=avg_wake_up_time,
+                sim_time=sim_time,
+            )
+            logging.info(f"Custom Env possible_agents: {env_instance.possible_agents}")
+            return PettingZooEnv(env_instance)
+
+        register_env("LoRaEnvParallel", create_env)
+
+        # Create a test environment to get observation and action spaces
+        test_env = create_env(
+            {
+                "nodes_count": nodes_count,
+                "data_size": data_size,
+                "avg_wake_up_time": avg_wake_up_time,
+                "sim_time": sim_time,
+            }
         )
+        logging.info(f"Wrapped Env possible_agents: {test_env.env.possible_agents}")
+
+        # Check if possible_agents exists
+        if hasattr(test_env.env, "possible_agents"):
+            obs_space = test_env.env.observation_space(test_env.env.possible_agents[0])
+            act_space = test_env.env.action_space(test_env.env.possible_agents[0])
+        else:
+            raise AttributeError(
+                "The environment does not have 'possible_agents' attribute."
+            )
 
-        train = False
-        if train:
-            # Create new model
-            model = PPO("MultiInputPolicy", gym_env, verbose=1)
-            reward_logger = RewardLoggerCallback()
-
-            # Training Phase
-            # --------------
-            utils.logging = False
-            utils.log(f"!-- TRAINING START --!")
-            # Calculate total timesteps for training
-            episodes = 10
-            total_timesteps = (
-                sim_time * episodes
-            )  # Assuming 1 timestep = 1 second in simulation
-            model.learn(
-                total_timesteps=total_timesteps,
-                log_interval=4,
-                progress_bar=True,
-                callback=reward_logger,
+        config = (
+            DQNConfig()
+            .environment(
+                env="LoRaEnvParallel",
+                env_config={
+                    "nodes_count": nodes_count,
+                    "data_size": data_size,
+                    "avg_wake_up_time": avg_wake_up_time,
+                    "sim_time": sim_time,
+                },
             )
-            model.save("lora_model")
-            utils.log(f"!-- TRAINING END --!")
-
-            # Plot the rewards collected during the training
-            plt.figure(figsize=(10, 5))
-            plt.plot(reward_logger.episode_rewards, marker="o", linestyle="-")
-            plt.title("Total Reward per Episode During Training")
-            plt.xlabel("Episode")
-            plt.ylabel("Total Reward")
-            plt.grid(True)
-            plt.savefig("training_phase.png")
-
-        # Evaluation Phase
-        # ----------------
-        model = PPO.load("lora_model")
-        utils.log(f"!-- EVALUATION START --!")
-        obs, info = gym_env.reset()
-        rewards_per_evaluation = [
-            [] for _ in range(nodes_count)
-        ]  # List to hold rewards for each node
-        total_rewards_per_node = [
-            0
-        ] * nodes_count  # List to hold total rewards for each node
-
-        done = False
-        while True:
-            action, _states = model.predict(obs, deterministic=True)
-            obs, reward, done, terminated, info = gym_env.step(action)
-            for i in range(nodes_count):
-                rewards_per_evaluation[i].append(
-                    reward[i]
-                )  # Log each reward for each node
-                total_rewards_per_node[i] += reward[i]  # Sum rewards for each node
-
-            if done or terminated:
-                utils.show_final_statistics()
-                utils.log(f"!-- EVALUATION END --!")
-                break
-
-        # Plot the rewards collected during the evaluation for each node
-        plt.figure(figsize=(10, 5))
-        for i in range(nodes_count):
-            plt.plot(
-                range(1, len(rewards_per_evaluation[i]) + 1),
-                rewards_per_evaluation[i],
-                marker="o",
-                linestyle="-",
-                label=f"Node {i+1}",
+            .env_runners(
+                num_env_runners=1,
+                rollout_fragment_length=30,
+                exploration_config={
+                    "type": "EpsilonGreedy",
+                    "initial_epsilon": 0.1,
+                    "final_epsilon": 0.0,
+                    "epsilon_timesteps": 100000,
+                },
             )
-        plt.title("Rewards per Step During Evaluation for Each Node")
-        plt.xlabel("Step")
-        plt.ylabel("Reward")
-        plt.legend()
-        plt.grid(True)
-        plt.savefig("evaluation_phase_per_node.png")
+            .training(
+                train_batch_size=200,
+                hiddens=[],
+                dueling=False,
+            )
+            .multi_agent(
+                policies={
+                    agent: (None, obs_space, act_space, {})
+                    for agent in test_env.env.possible_agents
+                },
+                policy_mapping_fn=(lambda agent_id, *args, **kwargs: agent_id),
+            )
+            .resources(num_gpus=0)
+            .framework(framework="torch")
+        )
 
+        try:
+            logdir = "~/ray_results"
+            tune.run(
+                DQN,
+                name="DQN_LoRaEnvParallel",
+                stop={"timesteps_total": 100000},
+                checkpoint_freq=10,
+                config=config.to_dict(),
+                local_dir=logdir,  # Specify the directory for logging
+                callbacks=[TBXLoggerCallback(), CustomMetricsCallback(logdir)],
+                log_to_file=True,
+            )
+        except Exception as e:
+            logging.error(f"An error occurred during training: {e}")
+            raise
     else:
         print(
-            "usage: ./main <number_of_nodes> <data_size(bytes)> <avg_wake_up_time(secs)> <sim_time(secs)>"
+            "usage: python main3.py <number_of_nodes> <data_size(bytes)> <avg_wake_up_time(secs)> <sim_time(secs)>"
         )
         exit(-1)
diff --git a/main3.py b/main3.py
index 21e02ab..dd377c1 100644
--- a/main3.py
+++ b/main3.py
@@ -132,7 +132,7 @@ def create_env(config):
             analysis = tune.run(
                 DQN,
                 name="DQN_LoRaEnvParallel",
-                stop={"timesteps_total": 1000},
+                stop={"timesteps_total": 100000},
                 checkpoint_freq=100,
                 keep_checkpoints_num=5,
                 checkpoint_score_attr="training_iteration",
diff --git a/~/ray_results/events.out.tfevents.1716462842.MacBook-Air-Assel.local.44116.0 b/~/ray_results/events.out.tfevents.1716462842.MacBook-Air-Assel.local.44116.0
new file mode 100644
index 0000000000000000000000000000000000000000..8d025e05caf28d666fc4f1ff432b947ef5e17077
GIT binary patch
literal 88
zcmeZZfPjCKJmzwWs4TNPIr*le6mL>dVrHJ6YguYuiIq{19+yr@YF=@EQBr<lQHox1
hX>M*}QKepaQD#YMkzOiDReV}zPHH?vMEs-OCjk7{APWEh

literal 0
HcmV?d00001

diff --git a/~/ray_results/events.out.tfevents.1716462901.MacBook-Air-Assel.local.44462.0 b/~/ray_results/events.out.tfevents.1716462901.MacBook-Air-Assel.local.44462.0
new file mode 100644
index 0000000000000000000000000000000000000000..465ab83c4cc619f47dc15580f4ca1ef8409e1b22
GIT binary patch
literal 88
zcmeZZfPjCKJmzvH>E-&KoP5(!iZ`h!F*8rkwJbHS#L6g0k4vW{HLp0oC@DX&C`GTh
hG&eV~s8X-ID6=HBNG}znDn2bUCp8`-a=EU|0ssr>AOHXW

literal 0
HcmV?d00001

diff --git a/~/ray_results/events.out.tfevents.1716464109.MacBook-Air-Assel.local.50289.0 b/~/ray_results/events.out.tfevents.1716464109.MacBook-Air-Assel.local.50289.0
new file mode 100644
index 0000000000000000000000000000000000000000..2292004e70c3e33ff5781fd1fa71d0ca061b5d15
GIT binary patch
literal 88
zcmeZZfPjCKJmzwSJPD{iHTkBa6mL>dVrHJ6YguYuiIq{19+yr@YF=@EQBr<lQHox1
hX>M*}QKepaQD#YMkzOiDReV}zPHH?vWZL1R2mnDZAkF{)

literal 0
HcmV?d00001

diff --git a/~/ray_results/events.out.tfevents.1716464382.MacBook-Air-Assel.local.51656.0 b/~/ray_results/events.out.tfevents.1716464382.MacBook-Air-Assel.local.51656.0
new file mode 100644
index 0000000000000000000000000000000000000000..b2ab0df859f9acdfd440eec57106170320f886b5
GIT binary patch
literal 88
zcmeZZfPjCKJmzvT^iSG<YVu7-Dc+=_#LPTB*Rs^S5-X!1JuaP+)V$*SqNM!9q7=R2
h(%js{qDsB;qRf)iBE3|Qs`#|boYZ)TNZ#q7N&q!%Ae;aI

literal 0
HcmV?d00001

diff --git a/~/ray_results/events.out.tfevents.1716464601.MacBook-Air-Assel.local.52747.0 b/~/ray_results/events.out.tfevents.1716464601.MacBook-Air-Assel.local.52747.0
new file mode 100644
index 0000000000000000000000000000000000000000..b21706d4bb7f7b3ce706fc26d98c2dab8757ed0f
GIT binary patch
literal 88
zcmeZZfPjCKJmzvLT4a4YHTkBa6mL>dVrHJ6YguYuiIq{19+yr@YF=@EQBr<lQHox1
hX>M*}QKepaQD#YMkzOiDReV}zPHH?v<W!T4762*RAOipZ

literal 0
HcmV?d00001

diff --git a/~/ray_results/events.out.tfevents.1716464781.MacBook-Air-Assel.local.53639.0 b/~/ray_results/events.out.tfevents.1716464781.MacBook-Air-Assel.local.53639.0
new file mode 100644
index 0000000000000000000000000000000000000000..b4e02904d28e39fcedb513df8bbaa93d5df3add2
GIT binary patch
literal 88
zcmeZZfPjCKJmzvXB<3rho_y0$iZ`h!F*8rkwJbHS#L6g0k4vW{HLp0oC@DX&C`GTh
hG&eV~s8X-ID6=HBNG}znDn2bUCp8`-61sHl1^^f%AcFt^

literal 0
HcmV?d00001

diff --git a/~/ray_results/events.out.tfevents.1716465092.MacBook-Air-Assel.local.55124.0 b/~/ray_results/events.out.tfevents.1716465092.MacBook-Air-Assel.local.55124.0
new file mode 100644
index 0000000000000000000000000000000000000000..f6ad4b560a047f123b5c2809da8a919b9ad0e9c8
GIT binary patch
literal 88
zcmeZZfPjCKJmzw;eP=E_J^7}i6mL>dVrHJ6YguYuiIq{19+yr@YF=@EQBr<lQHox1
hX>M*}QKepaQD#YMkzOiDReV}zPHH?v#N5l*2mllwA1VL<

literal 0
HcmV?d00001

diff --git a/~/ray_results/events.out.tfevents.1716465333.MacBook-Air-Assel.local.56249.0 b/~/ray_results/events.out.tfevents.1716465333.MacBook-Air-Assel.local.56249.0
new file mode 100644
index 0000000000000000000000000000000000000000..d2194fa659a2cdb643f9cbbf0ca7d1bbfcab7d98
GIT binary patch
literal 88
zcmeZZfPjCKJmzwy<z=otJ^7}i6mL>dVrHJ6YguYuiIq{19+yr@YF=@EQBr<lQHox1
hX>M*}QKepaQD#YMkzOiDReV}zPHH?vMB-XN6#zI8AX@+c

literal 0
HcmV?d00001

diff --git a/~/ray_results/events.out.tfevents.1716467317.MacBook-Air-Assel.local.65894.0 b/~/ray_results/events.out.tfevents.1716467317.MacBook-Air-Assel.local.65894.0
new file mode 100644
index 0000000000000000000000000000000000000000..70b4a424d7dca9da0968baf4f2a41c47f7b289e6
GIT binary patch
literal 88
zcmeZZfPjCKJmzxF_*p;q?Btt{QoKn;iJ5tNu4SotC00g3dR#gssd>fuMM?RIMJam4
hrMbC@MU{HxMVTe3MS7_qRq<(=IjQjwk^3A4yZ~{%Aj|*&

literal 0
HcmV?d00001


From 587069351c4766c011b7dbdb43db90d21f431e20 Mon Sep 17 00:00:00 2001
From: DeTrix2712 <detrix27122002@gmail.com>
Date: Tue, 25 Jun 2024 22:42:56 +0500
Subject: [PATCH 14/14] fix: minor fixes

---
 main3.py                 | 46 +++++++++++++++++++++++++++-------------
 multienv/env/multienv.py | 16 ++++++++------
 2 files changed, 41 insertions(+), 21 deletions(-)

diff --git a/main3.py b/main3.py
index dd377c1..97e4542 100644
--- a/main3.py
+++ b/main3.py
@@ -16,34 +16,48 @@
 
 logging.basicConfig(level=logging.INFO)
 
+
 def plot_metrics(df):
     # Create a figure with 2 subplots arranged vertically
     fig, ax = plt.subplots(2, 1, figsize=(10, 8))
 
     # Plot mean episode reward on the first subplot
-    if 'episode_reward_mean' in df.columns:
-        df['episode_reward_mean'].plot(ax=ax[0])
-        ax[0].set_title('Mean Episode Reward')
-        ax[0].set_xlabel('Training Iterations')
-        ax[0].set_ylabel('Reward')
+    if "episode_reward_mean" in df.columns:
+        df["episode_reward_mean"].plot(ax=ax[0])
+        ax[0].set_title("Mean Episode Reward")
+        ax[0].set_xlabel("Training Iterations")
+        ax[0].set_ylabel("Reward")
     else:
         logging.warning("No 'episode_reward_mean' column found in results.")
-        ax[0].text(0.5, 0.5, 'No Data', horizontalalignment='center', verticalalignment='center')
+        ax[0].text(
+            0.5,
+            0.5,
+            "No Data",
+            horizontalalignment="center",
+            verticalalignment="center",
+        )
 
     # Plot mean episode length on the second subplot
-    if 'episode_len_mean' in df.columns:
-        df['episode_len_mean'].plot(ax=ax[1])
-        ax[1].set_title('Mean Episode Length')
-        ax[1].set_xlabel('Training Iterations')
-        ax[1].set_ylabel('Length')
+    if "episode_len_mean" in df.columns:
+        df["episode_len_mean"].plot(ax=ax[1])
+        ax[1].set_title("Mean Episode Length")
+        ax[1].set_xlabel("Training Iterations")
+        ax[1].set_ylabel("Length")
     else:
         logging.warning("No 'episode_len_mean' column found in results.")
-        ax[1].text(0.5, 0.5, 'No Data', horizontalalignment='center', verticalalignment='center')
+        ax[1].text(
+            0.5,
+            0.5,
+            "No Data",
+            horizontalalignment="center",
+            verticalalignment="center",
+        )
 
     # Adjust layout and display the plots
     plt.tight_layout()
     plt.show()
 
+
 if __name__ == "__main__":
     if len(sys.argv) == 5:
         nodes_count = int(sys.argv[1])
@@ -56,7 +70,7 @@ def plot_metrics(df):
         consts.avg_wake_up_time = avg_wake_up_time
         consts.sim_time = sim_time
 
-        ray.init()
+        ray.init(num_cpus=12, num_gpus=0)
 
         # Register the environment
         def create_env(config):
@@ -109,7 +123,7 @@ def create_env(config):
                     "type": "EpsilonGreedy",
                     "initial_epsilon": 0.1,
                     "final_epsilon": 0.0,
-                    "epsilon_timesteps": 1000000,
+                    "epsilon_timesteps": 100000,
                 },
             )
             .training(
@@ -146,7 +160,9 @@ def create_env(config):
             best_trial = analysis.get_best_trial("episode_reward_mean", mode="max")
 
             # Use TensorBoard to visualize results
-            print(f"Training completed. Use TensorBoard to visualize results: tensorboard --logdir {best_trial.local_path}")
+            print(
+                f"Training completed. Use TensorBoard to visualize results: tensorboard --logdir {best_trial.local_path}"
+            )
 
         except Exception as e:
             logging.error(f"An error occurred during training: {e}")
diff --git a/multienv/env/multienv.py b/multienv/env/multienv.py
index 6ddcff4..d27c101 100644
--- a/multienv/env/multienv.py
+++ b/multienv/env/multienv.py
@@ -87,13 +87,17 @@ def step(self, actions):
             self.simulator.start_simulation()
         if self.current_step >= self.sim_time:
             self.done = True
-            reward = self._calculate_reward()
             observations = {
                 agent: self.observe(agent) for agent in self.possible_agents
             }
+            rewards = {
+                agent: self._calculate_reward(self.agent_name_mapping[agent])
+                for agent in self.possible_agents
+            }
+            dones = {agent: self.done for agent in self.possible_agents}
             infos = {agent: {} for agent in self.possible_agents}
             logging.info("Simulation done.")
-            return observations, reward, self.done, infos
+            return observations, rewards, dones, dones, infos
 
         for agent in actions:
             agent_index = self.agent_name_mapping[agent]
@@ -114,15 +118,15 @@ def step(self, actions):
             agent: self._calculate_reward(self.agent_name_mapping[agent])
             for agent in self.possible_agents
         }
-        self.done = self.current_step >= self.sim_time
+        dones = {
+            agent: self.current_step >= self.sim_time for agent in self.possible_agents
+        }
+        truncations = {agent: self.truncated for agent in self.possible_agents}
         infos = {agent: {} for agent in self.possible_agents}
 
         if self.render_mode == "human":
             self.render()
 
-        dones = {agent: self.done for agent in self.possible_agents}
-        truncations = {agent: self.truncated for agent in self.possible_agents}
-
         return observations, rewards, dones, truncations, infos
 
     def observe(self, agent):