From 966ef43f1552ea8aa4fa2bb2affa13da6b6fa6e6 Mon Sep 17 00:00:00 2001
From: Yu Ishihara <yu.ishihara@sony.com>
Date: Fri, 14 Jul 2023 12:31:40 +0900
Subject: [PATCH] Fixing testing code errors

Fix np.int deprecation errors
Fix non terminal's shape on test
---
 nnabla_rl/algorithms/a2c.py           | 2 +-
 nnabla_rl/algorithms/ppo.py           | 2 +-
 nnabla_rl/distributions/bernoulli.py  | 2 +-
 nnabla_rl/distributions/softmax.py    | 2 +-
 tests/algorithms/test_common_utils.py | 4 ++--
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/nnabla_rl/algorithms/a2c.py b/nnabla_rl/algorithms/a2c.py
index 61511880..8a42dd8d 100644
--- a/nnabla_rl/algorithms/a2c.py
+++ b/nnabla_rl/algorithms/a2c.py
@@ -523,7 +523,7 @@ def array_and_dtype(mp_arrays_item):
     def _compute_action(self, s, *, begin_of_episode=False):
         action, info = self._exploration_actor(s, begin_of_episode=begin_of_episode)
         if self._env_info.is_discrete_action_env():
-            return np.int(action), info
+            return np.int32(action), info
         else:
             return action, info
 
diff --git a/nnabla_rl/algorithms/ppo.py b/nnabla_rl/algorithms/ppo.py
index a26d514b..3d31b0b3 100644
--- a/nnabla_rl/algorithms/ppo.py
+++ b/nnabla_rl/algorithms/ppo.py
@@ -638,7 +638,7 @@ def _compute_action(self, s, *, begin_of_episode=False):
         info = {}
         info['log_prob'] = log_prob
         if self._env_info.is_discrete_action_env():
-            return np.int(action), info
+            return np.int32(action), info
         else:
             return action, info
 
diff --git a/nnabla_rl/distributions/bernoulli.py b/nnabla_rl/distributions/bernoulli.py
index 2eb02417..55818845 100644
--- a/nnabla_rl/distributions/bernoulli.py
+++ b/nnabla_rl/distributions/bernoulli.py
@@ -41,7 +41,7 @@ def __init__(self, z):
         self._distribution = NF.concatenate(self._p, 1 - self._p)
         self._log_distribution = NF.concatenate(self._log_p, self._log_1_minus_p)
 
-        labels = np.array([1, 0], dtype=np.int)
+        labels = np.array([1, 0], dtype=np.int32)
         labels = nn.Variable.from_numpy_array(labels)
         self._labels = labels
         for size in reversed(z.shape[0:-1]):
diff --git a/nnabla_rl/distributions/softmax.py b/nnabla_rl/distributions/softmax.py
index d62996ff..f58f3316 100644
--- a/nnabla_rl/distributions/softmax.py
+++ b/nnabla_rl/distributions/softmax.py
@@ -42,7 +42,7 @@ def __init__(self, z):
         self._num_class = z.shape[-1]
 
         labels = np.array(
-            [label for label in range(self._num_class)], dtype=np.int)
+            [label for label in range(self._num_class)], dtype=np.int32)
         self._labels = nn.Variable.from_numpy_array(labels)
         self._actions = self._labels
         for size in reversed(z.shape[0:-1]):
diff --git a/tests/algorithms/test_common_utils.py b/tests/algorithms/test_common_utils.py
index f9d3c10f..a403f174 100644
--- a/tests/algorithms/test_common_utils.py
+++ b/tests/algorithms/test_common_utils.py
@@ -1,5 +1,5 @@
 # Copyright 2020,2021 Sony Corporation.
-# Copyright 2021,2022 Sony Group Corporation.
+# Copyright 2021,2022,2023 Sony Group Corporation.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -55,7 +55,7 @@ def _collect_dummy_experience(self, num_episodes=1, episode_length=3, tupled_sta
                 r = np.ones(1, )
                 non_terminal = np.ones(1, )
                 if i == episode_length-1:
-                    non_terminal = 0
+                    non_terminal = np.zeros(1, )
                 experience.append((s_current, a, r, non_terminal, s_next))
         return experience