From 966ef43f1552ea8aa4fa2bb2affa13da6b6fa6e6 Mon Sep 17 00:00:00 2001 From: Yu Ishihara Date: Fri, 14 Jul 2023 12:31:40 +0900 Subject: [PATCH] Fixing testing code errors Fix np.int deprecation errors Fix non terminal's shape on test --- nnabla_rl/algorithms/a2c.py | 2 +- nnabla_rl/algorithms/ppo.py | 2 +- nnabla_rl/distributions/bernoulli.py | 2 +- nnabla_rl/distributions/softmax.py | 2 +- tests/algorithms/test_common_utils.py | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/nnabla_rl/algorithms/a2c.py b/nnabla_rl/algorithms/a2c.py index 61511880..8a42dd8d 100644 --- a/nnabla_rl/algorithms/a2c.py +++ b/nnabla_rl/algorithms/a2c.py @@ -523,7 +523,7 @@ def array_and_dtype(mp_arrays_item): def _compute_action(self, s, *, begin_of_episode=False): action, info = self._exploration_actor(s, begin_of_episode=begin_of_episode) if self._env_info.is_discrete_action_env(): - return np.int(action), info + return np.int32(action), info else: return action, info diff --git a/nnabla_rl/algorithms/ppo.py b/nnabla_rl/algorithms/ppo.py index a26d514b..3d31b0b3 100644 --- a/nnabla_rl/algorithms/ppo.py +++ b/nnabla_rl/algorithms/ppo.py @@ -638,7 +638,7 @@ def _compute_action(self, s, *, begin_of_episode=False): info = {} info['log_prob'] = log_prob if self._env_info.is_discrete_action_env(): - return np.int(action), info + return np.int32(action), info else: return action, info diff --git a/nnabla_rl/distributions/bernoulli.py b/nnabla_rl/distributions/bernoulli.py index 2eb02417..55818845 100644 --- a/nnabla_rl/distributions/bernoulli.py +++ b/nnabla_rl/distributions/bernoulli.py @@ -41,7 +41,7 @@ def __init__(self, z): self._distribution = NF.concatenate(self._p, 1 - self._p) self._log_distribution = NF.concatenate(self._log_p, self._log_1_minus_p) - labels = np.array([1, 0], dtype=np.int) + labels = np.array([1, 0], dtype=np.int32) labels = nn.Variable.from_numpy_array(labels) self._labels = labels for size in reversed(z.shape[0:-1]): diff --git a/nnabla_rl/distributions/softmax.py b/nnabla_rl/distributions/softmax.py index d62996ff..f58f3316 100644 --- a/nnabla_rl/distributions/softmax.py +++ b/nnabla_rl/distributions/softmax.py @@ -42,7 +42,7 @@ def __init__(self, z): self._num_class = z.shape[-1] labels = np.array( - [label for label in range(self._num_class)], dtype=np.int) + [label for label in range(self._num_class)], dtype=np.int32) self._labels = nn.Variable.from_numpy_array(labels) self._actions = self._labels for size in reversed(z.shape[0:-1]): diff --git a/tests/algorithms/test_common_utils.py b/tests/algorithms/test_common_utils.py index f9d3c10f..a403f174 100644 --- a/tests/algorithms/test_common_utils.py +++ b/tests/algorithms/test_common_utils.py @@ -1,5 +1,5 @@ # Copyright 2020,2021 Sony Corporation. -# Copyright 2021,2022 Sony Group Corporation. +# Copyright 2021,2022,2023 Sony Group Corporation. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -55,7 +55,7 @@ def _collect_dummy_experience(self, num_episodes=1, episode_length=3, tupled_sta r = np.ones(1, ) non_terminal = np.ones(1, ) if i == episode_length-1: - non_terminal = 0 + non_terminal = np.zeros(1, ) experience.append((s_current, a, r, non_terminal, s_next)) return experience