diff --git a/.github/workflows/pylint-test.yml b/.github/workflows/pylint-test.yml index c80fe2877..1f2c4c377 100644 --- a/.github/workflows/pylint-test.yml +++ b/.github/workflows/pylint-test.yml @@ -7,6 +7,8 @@ jobs: runs-on: ubuntu-latest strategy: matrix: + # Randomly hitting TypeError: object int can't be used in 'await' expression in 3.11 + # So, excluding 3.11 for now python-version: ["3.8", "3.9", "3.10"] steps: - uses: actions/checkout@v3 @@ -16,8 +18,9 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade pip setuptools wheel + python -m pip install --upgrade pip setuptools wheel cython pip install . + python setup.py build_ext --inplace - name: Running unit tests run: pytest - name: Analysing the code with pylint diff --git a/.gitignore b/.gitignore index 011a34c5c..8b7f009b4 100644 --- a/.gitignore +++ b/.gitignore @@ -16,8 +16,9 @@ __pycache__/ *.py[cod] *$py.class -# C extensions +# C extensions, cython *.so +*.c # Distribution / packaging .Python @@ -126,13 +127,14 @@ celerybeat.pid # Environments .env -.venv +.*venv env/ venv/ ENV/ env.bak/ venv.bak/ + # Spyder project settings .spyderproject .spyproject diff --git a/nmmo/core/action.py b/nmmo/core/action.py index 5a136be3e..f40d27d7a 100644 --- a/nmmo/core/action.py +++ b/nmmo/core/action.py @@ -1,14 +1,12 @@ -# CHECK ME: Should these be fixed as well? # pylint: disable=no-method-argument,unused-argument,no-self-argument,no-member - from enum import Enum, auto import numpy as np -from nmmo.core.observation import Observation from nmmo.lib import utils from nmmo.lib.utils import staticproperty from nmmo.systems.item import Stack -from nmmo.lib.log import EventCode +from nmmo.lib.event_code import EventCode +from nmmo.core.observation import Observation class NodeType(Enum): @@ -48,7 +46,7 @@ def leaf(): def N(cls, config): return len(cls.edges) - def deserialize(realm, entity, index, obs: Observation): + def deserialize(realm, entity, index: int, obs: Observation): return index class Fixed: @@ -76,7 +74,7 @@ def hook(config): action.init(config) for args in action.edges: # pylint: disable=not-an-iterable args.init(config) - if not 'edges' in args.__dict__: + if not "edges" in args.__dict__: continue for arg in args.edges: arguments.append(arg) @@ -92,7 +90,7 @@ def n(): # pylint: disable=invalid-overridden-method @classmethod def edges(cls, config): - '''List of valid actions''' + """List of valid actions""" edges = [Move] if config.COMBAT_SYSTEM_ENABLED: edges.append(Attack) @@ -124,12 +122,15 @@ def call(realm, entity, direction): realm.map.tiles[r_new, c_new].impassible: return - if entity.status.freeze > 0: + # ALLOW_MOVE_INTO_OCCUPIED_TILE only applies to players, NOT npcs + if entity.is_player and not realm.config.ALLOW_MOVE_INTO_OCCUPIED_TILE and \ + realm.map.tiles[r_new, c_new].occupied: return - entity.row.update(r_new) - entity.col.update(c_new) + if entity.status.freeze > 0: + return + entity.set_pos(r_new, c_new) realm.map.tiles[r, c].remove_entity(ent_id) realm.map.tiles[r_new, c_new].add_entity(entity) @@ -165,7 +166,7 @@ class Direction(Node): def edges(): return [North, South, East, West, Stay] - def deserialize(realm, entity, index, obs: Observation): + def deserialize(realm, entity, index: int, obs): return deserialize_fixed_arg(Direction, index) # a quick helper function @@ -243,8 +244,8 @@ def call(realm, entity, style, target): target.history.time_alive < immunity: return None - #Check if self targeted - if entity.ent_id == target.ent_id: + #Check if self targeted or target already dead + if entity.ent_id == target.ent_id or not target.alive: return None #Can't attack out of range @@ -253,17 +254,14 @@ def call(realm, entity, style, target): #Execute attack entity.history.attack = {} - entity.history.attack['target'] = target.ent_id - entity.history.attack['style'] = style.__name__ + entity.history.attack["target"] = target.ent_id + entity.history.attack["style"] = style.__name__ target.attacker = entity target.attacker_id.update(entity.ent_id) from nmmo.systems import combat dmg = combat.attack(realm, entity, target, style.skill) - if style.freeze and dmg > 0: - target.status.freeze.update(config.COMBAT_FREEZE_TIME) - # record the combat tick for both entities # players and npcs both have latest_combat_tick in EntityState for ent in [entity, target]: @@ -277,7 +275,7 @@ class Style(Node): def edges(): return [Melee, Range, Mage] - def deserialize(realm, entity, index, obs: Observation): + def deserialize(realm, entity, index: int, obs): return deserialize_fixed_arg(Style, index) class Target(Node): @@ -438,8 +436,11 @@ def call(realm, entity, item, target): if not (config.ITEM_ALLOW_GIFT and entity.ent_id != target.ent_id and # but not self - target.is_player and - entity.pos == target.pos): # the same tile + target.is_player): + return + + # NOTE: allow give within the visual range + if utils.linf_single(entity.pos, target.pos) > config.PLAYER_VISION_RADIUS: return if not target.inventory.space: @@ -486,8 +487,11 @@ def call(realm, entity, amount, target): if not (config.ITEM_ALLOW_GIFT and entity.ent_id != target.ent_id and # but not self - target.is_player and - entity.pos == target.pos): # the same tile + target.is_player): + return + + # NOTE: allow give within the visual range + if utils.linf_single(entity.pos, target.pos) > config.PLAYER_VISION_RADIUS: return if not isinstance(amount, int): @@ -511,7 +515,6 @@ def N(cls, config): def deserialize(realm, entity, index: int, obs: Observation): if index >= len(obs.market.ids): return None - return realm.items.get(obs.market.ids[index]) class Buy(Node): @@ -532,7 +535,7 @@ def call(realm, entity, item): assert entity.alive, "Dead entity cannot act" assert entity.is_player, "Npcs cannot buy an item" assert item.quantity.val > 0, "Item quantity cannot be 0" # indicates item leak - assert item.equipped.val == 0, 'Listed item must not be equipped' + assert item.equipped.val == 0, "Listed item must not be equipped" if not realm.config.EXCHANGE_SYSTEM_ENABLED: return @@ -601,8 +604,8 @@ def call(realm, entity, item, price): def init_discrete(values): classes = [] for i in values: - name = f'Discrete_{i}' - cls = type(name, (object,), {'val': i}) + name = f"Discrete_{i}" + cls = type(name, (object,), {"val": i}) classes.append(cls) return classes @@ -628,7 +631,7 @@ def index(cls, price): def edges(): return Price.classes - def deserialize(realm, entity, index, obs: Observation): + def deserialize(realm, entity, index: int, obs): return deserialize_fixed_arg(Price, index) class Token(Node): @@ -636,13 +639,13 @@ class Token(Node): @classmethod def init(cls, config): - Token.classes = init_discrete(range(config.COMMUNICATION_NUM_TOKENS)) + Token.classes = init_discrete(range(1, config.COMMUNICATION_NUM_TOKENS+1)) @staticproperty def edges(): return Token.classes - def deserialize(realm, entity, index, obs: Observation): + def deserialize(realm, entity, index: int, obs): return deserialize_fixed_arg(Token, index) class Comm(Node): diff --git a/nmmo/core/config.py b/nmmo/core/config.py index 08b60c15f..9940f4691 100644 --- a/nmmo/core/config.py +++ b/nmmo/core/config.py @@ -4,20 +4,36 @@ import os import sys import logging +import re import nmmo from nmmo.core.agent import Agent from nmmo.core.terrain import MapGenerator from nmmo.lib import utils, material, spawn +CONFIG_ATTR_PATTERN = r"^[A-Z_]+$" +GAME_SYSTEMS = ["TERRAIN", "RESOURCE", "COMBAT", "NPC", "PROGRESSION", "ITEM", + "EQUIPMENT", "PROFESSION", "EXCHANGE", "COMMUNICATION"] + +# These attributes are critical for trainer and must not change from the initial values +OBS_ATTRS = set(["MAX_HORIZON", "PLAYER_N", "MAP_N_OBS", "PLAYER_N_OBS", "TASK_EMBED_DIM", + "ITEM_INVENTORY_CAPACITY", "MARKET_N_OBS", "PRICE_N_OBS", + "COMMUNICATION_NUM_TOKENS", "COMMUNICATION_N_OBS", "PROVIDE_ACTION_TARGETS", + "PROVIDE_DEATH_FOG_OBS", "PROVIDE_NOOP_ACTION_TARGET"]) +IMMUTABLE_ATTRS = set(["USE_CYTHON", "CURRICULUM_FILE_PATH", "PLAYER_VISION_RADIUS", "MAP_SIZE", + "PLAYER_BASE_HEALTH", "RESOURCE_BASE", "PROGRESSION_LEVEL_MAX"]) + + class Template(metaclass=utils.StaticIterable): def __init__(self): - self.data = {} - cls = type(self) + self._data = {} + cls = type(self) - #Set defaults from static properties - for k, v in cls: - self.set(k, v) + # Set defaults from static properties + for attr in dir(cls): + val = getattr(cls, attr) + if re.match(CONFIG_ATTR_PATTERN, attr) and not isinstance(val, property): + self._data[attr] = val def override(self, **kwargs): for k, v in kwargs.items(): @@ -32,34 +48,35 @@ def set(self, k, v): except AttributeError: logging.error('Cannot set attribute: %s to %s', str(k), str(v)) sys.exit() - self.data[k] = v + self._data[k] = v # pylint: disable=bad-builtin def print(self): key_len = 0 - for k in self.data: + for k in self._data: key_len = max(key_len, len(k)) print('Configuration') - for k, v in self.data.items(): + for k, v in self._data.items(): print(f' {k:{key_len}s}: {v}') def items(self): - return self.data.items() + return self._data.items() def __iter__(self): - for k in self.data: + for k in self._data: yield k def keys(self): - return self.data.keys() + return self._data.keys() def values(self): - return self.data.values() + return self._data.values() def validate(config): err = 'config.Config is a base class. Use config.{Small, Medium Large}''' assert isinstance(config, Config), err + assert config.HORIZON < config.MAX_HORIZON, 'HORIZON must be <= MAX_HORIZON' if not config.TERRAIN_SYSTEM_ENABLED: err = 'Invalid Config: {} requires Terrain' @@ -78,56 +95,18 @@ def validate(config): class Config(Template): - '''An environment configuration object - - Global constants are defined as static class variables. You can override - any Config variable using standard CLI syntax (e.g. --NENT=128). - - The default config as of v1.5 uses 1024x1024 maps with up to 2048 agents - and 1024 NPCs. It is suitable to time horizons of 8192+ steps. For smaller - experiments, consider the SmallMaps config. - - Notes: - We use Google Fire internally to replace standard manual argparse - definitions for each Config property. This means you can subclass - Config to add new static attributes -- CLI definitions will be - generated automatically. - ''' + '''An environment configuration object''' + env_initialized = False def __init__(self): super().__init__() + self._attr_to_reset = [] # TODO: Come up with a better way # to resolve mixin MRO conflicts - if not hasattr(self, 'TERRAIN_SYSTEM_ENABLED'): - self.TERRAIN_SYSTEM_ENABLED = False - - if not hasattr(self, 'RESOURCE_SYSTEM_ENABLED'): - self.RESOURCE_SYSTEM_ENABLED = False - - if not hasattr(self, 'COMBAT_SYSTEM_ENABLED'): - self.COMBAT_SYSTEM_ENABLED = False - - if not hasattr(self, 'NPC_SYSTEM_ENABLED'): - self.NPC_SYSTEM_ENABLED = False - - if not hasattr(self, 'PROGRESSION_SYSTEM_ENABLED'): - self.PROGRESSION_SYSTEM_ENABLED = False - - if not hasattr(self, 'ITEM_SYSTEM_ENABLED'): - self.ITEM_SYSTEM_ENABLED = False - - if not hasattr(self, 'EQUIPMENT_SYSTEM_ENABLED'): - self.EQUIPMENT_SYSTEM_ENABLED = False - - if not hasattr(self, 'PROFESSION_SYSTEM_ENABLED'): - self.PROFESSION_SYSTEM_ENABLED = False - - if not hasattr(self, 'EXCHANGE_SYSTEM_ENABLED'): - self.EXCHANGE_SYSTEM_ENABLED = False - - if not hasattr(self, 'COMMUNICATION_SYSTEM_ENABLED'): - self.COMMUNICATION_SYSTEM_ENABLED = False + for system in GAME_SYSTEMS: + if not hasattr(self, f'{system}_SYSTEM_ENABLED'): + self.set(f'{system}_SYSTEM_ENABLED', False) if __debug__: validate(self) @@ -138,24 +117,90 @@ def __init__(self): for attr in deprecated_attrs: assert not hasattr(self, attr), f'{attr} has been deprecated or renamed' + @property + def original(self): + return self._data - ############################################################################ - ### Meta-Parameters - def game_system_enabled(self, name) -> bool: - return hasattr(self, name) + def reset(self): + '''Reset all attributes changed during the episode''' + for attr in self._attr_to_reset: + setattr(self, attr, self.original[attr]) - PROVIDE_ACTION_TARGETS = True - '''Provide action targets mask''' + def set(self, k, v): + assert self.env_initialized is False, 'Cannot set config attr after env init' + super().set(k, v) + + def set_for_episode(self, k, v): + '''Set a config property for the current episode''' + assert hasattr(self, k), f'Invalid config property: {k}' + assert k not in OBS_ATTRS, f'Cannot change OBS config {k} during the episode' + assert k not in IMMUTABLE_ATTRS, f'Cannot change {k} during the episode' + # Cannot turn on a game system that was not enabled when the env was created + if k.endswith('_SYSTEM_ENABLED') and self._data[k] is False and v is True: + raise AssertionError(f'Cannot turn on {k} because it was not enabled during env init') + + # Change only the attribute and keep the original value in the data dict + setattr(self, k, v) + self._attr_to_reset.append(k) - PROVIDE_NOOP_ACTION_TARGET = True - '''Provide a no-op option for each action''' + @property + def enabled_systems(self): + '''Return a list of the enabled systems from Env.__init__()''' + return [k[:-len('_SYSTEM_ENABLED')] + for k, v in self._data.items() if k.endswith('_SYSTEM_ENABLED') and v is True] + + @property + def system_states(self): + '''Return a one-hot encoding of each system enabled/disabled, + which can be used as an observation and changed from episode to episode''' + return [int(getattr(self, f'{system}_SYSTEM_ENABLED')) for system in GAME_SYSTEMS] + + def are_systems_enabled(self, systems): # systems is a list of strings + '''Check if all provided systems are enabled''' + return all(s.upper() in self.enabled_systems for s in systems) + + def toggle_systems(self, target_systems): # systems is a list of strings + '''Activate only the provided game systems and turn off the others''' + target_systems = [s.upper() for s in target_systems] + for system in target_systems: + assert system in self.enabled_systems, f'Invalid game system: {system}' + self.set_for_episode(f'{system}_SYSTEM_ENABLED', True) + + for system in self.enabled_systems: + if system not in target_systems: + self.set_for_episode(f'{system}_SYSTEM_ENABLED', False) + ############################################################################ + ### Meta-Parameters PLAYERS = [Agent] '''Player classes from which to spawn''' + @property + def PLAYER_POLICIES(self): + '''Number of player policies''' + return len(self.PLAYERS) + + PLAYER_N = None + '''Maximum number of players spawnable in the environment''' + + @property + def POSSIBLE_AGENTS(self): + '''List of possible agents to spawn''' + return list(range(1, self.PLAYER_N + 1)) + + # TODO: CHECK if there could be 100+ entities within one's vision + PLAYER_N_OBS = 100 + '''Number of distinct agent observations''' + + MAX_HORIZON = 2**15 - 1 # this is arbitrary + '''Maximum number of steps the environment can run for''' + HORIZON = 1024 '''Number of steps before the environment resets''' + GAME_PACKS = None + '''List of game packs to load and sample: [(game class, sampling weight)]''' + CURRICULUM_FILE_PATH = None '''Path to a curriculum task file containing a list of task specs for training''' @@ -165,38 +210,31 @@ def game_system_enabled(self, name) -> bool: ALLOW_MULTI_TASKS_PER_AGENT = False '''Whether to allow multiple tasks per agent''' - ############################################################################ - ### Population Parameters - LOG_VERBOSE = False - '''Whether to log server messages or just stats''' - - LOG_ENV = False - '''Whether to log env steps (expensive)''' + PROVIDE_ACTION_TARGETS = True + '''Provide action targets mask''' - LOG_MILESTONES = True - '''Whether to log server-firsts (semi-expensive)''' + PROVIDE_NOOP_ACTION_TARGET = True + '''Provide a no-op option for each action''' - LOG_EVENTS = True - '''Whether to log events (semi-expensive)''' + PROVIDE_DEATH_FOG_OBS = False + '''Provide death fog observation''' - LOG_FILE = None - '''Where to write logs (defaults to console)''' + ALLOW_MOVE_INTO_OCCUPIED_TILE = True + '''Whether agents can move into tiles occupied by other agents/npcs + However, this does not apply to spawning''' ############################################################################ - ### Player Parameters - PLAYER_N = None - '''Maximum number of players spawnable in the environment''' + ### System/debug Parameters + USE_CYTHON = True + '''Whether to use cython modules for performance''' - # TODO: CHECK if there could be 100+ entities within one's vision - PLAYER_N_OBS = 100 - '''Number of distinct agent observations''' + IMMORTAL = False + '''Debug parameter: prevents agents from dying except by void''' - @property - def PLAYER_POLICIES(self): - '''Number of player policies''' - return len(self.PLAYERS) + ############################################################################ + ### Player Parameters PLAYER_BASE_HEALTH = 100 '''Initial agent health''' @@ -208,34 +246,27 @@ def PLAYER_VISION_DIAMETER(self): '''Size of the square tile crop visible to an agent''' return 2*self.PLAYER_VISION_RADIUS + 1 - PLAYER_DEATH_FOG = None + PLAYER_HEALTH_INCREMENT = 0 + '''The amount to increment health by 1 per tick for players, like npcs''' + + DEATH_FOG_ONSET = None '''How long before spawning death fog. None for no death fog''' - PLAYER_DEATH_FOG_SPEED = 1 + DEATH_FOG_SPEED = 1 '''Number of tiles per tick that the fog moves in''' - PLAYER_DEATH_FOG_FINAL_SIZE = 8 + DEATH_FOG_FINAL_SIZE = 8 '''Number of tiles from the center that the fog stops''' PLAYER_LOADER = spawn.SequentialLoader '''Agent loader class specifying spawn sampling''' - PLAYER_SPAWN_TEAMMATE_DISTANCE = 1 - '''Buffer tiles between teammates at spawn''' - - @property - def PLAYER_TEAM_SIZE(self): - if __debug__: - assert not self.PLAYER_N % len(self.PLAYERS) - return self.PLAYER_N // len(self.PLAYERS) ############################################################################ - ### Debug Parameters - IMMORTAL = False - '''Debug parameter: prevents agents from dying except by void''' + ### Team Parameters + TEAMS = None # Dict[Any, List[int]] + '''A dictionary of team assignments: key is team_id, value is a list of agent_ids''' - RESET_ON_DEATH = False - '''Debug parameter: whether to reset the environment whenever an agent dies''' ############################################################################ ### Map Parameters @@ -250,15 +281,16 @@ def MAP_N_OBS(self): '''Number of distinct tile observations''' return int(self.PLAYER_VISION_DIAMETER ** 2) - MAP_CENTER = None - '''Size of each map (number of tiles along each side)''' + MAP_SIZE = None + '''Size of the whole map, including the center and borders''' - MAP_BORDER = 16 - '''Number of void border tiles surrounding each side of the map''' + MAP_CENTER = None + '''Size of each map (number of tiles along each side), where agents can move around''' @property - def MAP_SIZE(self): - return int(self.MAP_CENTER + 2*self.MAP_BORDER) + def MAP_BORDER(self): + '''Number of background, void border tiles surrounding each side of the map''' + return int((self.MAP_SIZE - self.MAP_CENTER) // 2) MAP_GENERATOR = MapGenerator '''Specifies a user map generator. Uses default generator if unspecified.''' @@ -266,6 +298,9 @@ def MAP_SIZE(self): MAP_FORCE_GENERATION = True '''Whether to regenerate and overwrite existing maps''' + MAP_RESET_FROM_FRACTAL = True + '''Whether to regenerate the map from the fractal source''' + MAP_GENERATE_PREVIEWS = False '''Whether map generation should also save .png previews (slow + large file size)''' @@ -293,8 +328,8 @@ def MAP_SIZE(self): PATH_MAP_SUFFIX = 'map{}/map.npy' '''Map file name''' - PATH_MAP_SUFFIX = 'map{}/map.npy' - '''Map file name''' + PATH_FRACTAL_SUFFIX = 'map{}/fractal.npy' + '''Fractal file name''' ############################################################################ @@ -335,6 +370,16 @@ class Terrain: TERRAIN_FOILAGE = 0.85 '''Noise threshold for foilage (food tile)''' + TERRAIN_RESET_TO_GRASS = False + '''Whether to make all tiles grass. + Only works when MAP_RESET_FROM_FRACTAL is True''' + + TERRAIN_DISABLE_STONE = False + '''Disable stone (obstacle) tiles''' + + TERRAIN_SCATTER_EXTRA_RESOURCES = True + '''Whether to scatter extra food, water on the map. + Only works when MAP_RESET_FROM_FRACTAL is True''' class Resource: '''Resource Game System''' @@ -378,6 +423,15 @@ class Resource: '''Fraction of health restored per tick when above half food+water''' +# NOTE: Included self to be picklable (in torch.save) since lambdas are not picklable +def original_combat_damage_formula(self, offense, defense, multiplier, minimum_proportion): + # pylint: disable=unused-argument + return int(multiplier * (offense * (15 / (15 + defense)))) + +def alt_combat_damage_formula(self, offense, defense, multiplier, minimum_proportion): + # pylint: disable=unused-argument + return int(max(multiplier * offense - defense, offense * minimum_proportion)) + class Combat: '''Combat Game System''' @@ -387,6 +441,9 @@ class Combat: COMBAT_SPAWN_IMMUNITY = 20 '''Agents older than this many ticks cannot attack agents younger than this many ticks''' + COMBAT_ALLOW_FLEXIBLE_STYLE = True + '''Whether to allow agents to attack with any style in a given turn''' + COMBAT_STATUS_DURATION = 3 '''Combat status lasts for this many ticks after the last combat event. Combat events include both attacking and being attacked.''' @@ -394,32 +451,35 @@ class Combat: COMBAT_WEAKNESS_MULTIPLIER = 1.5 '''Multiplier for super-effective attacks''' - def COMBAT_DAMAGE_FORMULA(self, offense, defense, multiplier): - '''Damage formula''' - return int(multiplier * (offense * (15 / (15 + defense)))) + COMBAT_MINIMUM_DAMAGE_PROPORTION = 0.25 + '''Minimum proportion of damage to inflict on a target''' - COMBAT_MELEE_DAMAGE = 30 + # NOTE: When using a custom function, include "self" as the first arg + COMBAT_DAMAGE_FORMULA = alt_combat_damage_formula + '''Damage formula''' + + COMBAT_MELEE_DAMAGE = 10 '''Melee attack damage''' COMBAT_MELEE_REACH = 3 '''Reach of attacks using the Melee skill''' - COMBAT_RANGE_DAMAGE = 30 + COMBAT_RANGE_DAMAGE = 10 '''Range attack damage''' COMBAT_RANGE_REACH = 3 '''Reach of attacks using the Range skill''' - COMBAT_MAGE_DAMAGE = 30 + COMBAT_MAGE_DAMAGE = 10 '''Mage attack damage''' COMBAT_MAGE_REACH = 3 '''Reach of attacks using the Mage skill''' -def default_exp_threshold(max_level): +def default_exp_threshold(base_exp, max_level): import math - additional_exp_per_level = [round(90*math.sqrt(lvl)) + additional_exp_per_level = [round(base_exp * math.sqrt(lvl)) for lvl in range(1, max_level+1)] return [sum(additional_exp_per_level[:lvl]) for lvl in range(max_level)] @@ -435,10 +495,10 @@ class Progression: PROGRESSION_LEVEL_MAX = 10 '''Max skill level''' - PROGRESSION_EXP_THRESHOLD = default_exp_threshold(PROGRESSION_LEVEL_MAX) + PROGRESSION_EXP_THRESHOLD = default_exp_threshold(90, PROGRESSION_LEVEL_MAX) '''A list of experience thresholds for each level''' - PROGRESSION_COMBAT_XP_SCALE = 3 + PROGRESSION_COMBAT_XP_SCALE = 6 '''Additional XP for each attack for skills Melee, Range, and Mage''' PROGRESSION_AMMUNITION_XP_SCALE = 15 @@ -447,19 +507,19 @@ class Progression: PROGRESSION_CONSUMABLE_XP_SCALE = 30 '''Multiplier XP for each harvest for Fishing and Herbalism''' - PROGRESSION_MELEE_BASE_DAMAGE = 20 + PROGRESSION_MELEE_BASE_DAMAGE = 10 '''Base Melee attack damage''' PROGRESSION_MELEE_LEVEL_DAMAGE = 5 '''Bonus Melee attack damage per level''' - PROGRESSION_RANGE_BASE_DAMAGE = 20 + PROGRESSION_RANGE_BASE_DAMAGE = 10 '''Base Range attack damage''' PROGRESSION_RANGE_LEVEL_DAMAGE = 5 '''Bonus Range attack damage per level''' - PROGRESSION_MAGE_BASE_DAMAGE = 20 + PROGRESSION_MAGE_BASE_DAMAGE = 10 '''Base Mage attack damage ''' PROGRESSION_MAGE_LEVEL_DAMAGE = 5 @@ -481,6 +541,9 @@ class NPC: NPC_N = None '''Maximum number of NPCs spawnable in the environment''' + NPC_DEFAULT_REFILL_DEAD_NPCS = True + '''Whether to refill dead NPCs''' + NPC_SPAWN_ATTEMPTS = 25 '''Number of NPC spawn attempts per tick''' @@ -502,15 +565,21 @@ class NPC: NPC_BASE_DEFENSE = 0 '''Base NPC defense''' - NPC_LEVEL_DEFENSE = 15 + NPC_LEVEL_DEFENSE = 8 '''Bonus NPC defense per level''' - NPC_BASE_DAMAGE = 15 + NPC_BASE_DAMAGE = 0 '''Base NPC damage''' - NPC_LEVEL_DAMAGE = 15 + NPC_LEVEL_DAMAGE = 8 '''Bonus NPC damage per level''' + NPC_LEVEL_MULTIPLIER = 1.0 + '''Multiplier for NPC level damage and defense, for easier difficulty tuning''' + + NPC_ALLOW_ATTACK_OTHER_NPCS = False + '''Whether NPCs can attack other NPCs''' + class Item: '''Inventory Game System''' @@ -542,19 +611,19 @@ class Equipment: WEAPON_DROP_PROB = 0.025 '''Chance of getting a weapon while harvesting ammunition''' - EQUIPMENT_WEAPON_BASE_DAMAGE = 15 + EQUIPMENT_WEAPON_BASE_DAMAGE = 5 '''Base weapon damage''' - EQUIPMENT_WEAPON_LEVEL_DAMAGE = 15 + EQUIPMENT_WEAPON_LEVEL_DAMAGE = 5 '''Added weapon damage per level''' - EQUIPMENT_AMMUNITION_BASE_DAMAGE = 15 + EQUIPMENT_AMMUNITION_BASE_DAMAGE = 5 '''Base ammunition damage''' - EQUIPMENT_AMMUNITION_LEVEL_DAMAGE = 15 + EQUIPMENT_AMMUNITION_LEVEL_DAMAGE = 10 '''Added ammunition damage per level''' - EQUIPMENT_TOOL_BASE_DEFENSE = 30 + EQUIPMENT_TOOL_BASE_DEFENSE = 15 '''Base tool defense''' EQUIPMENT_TOOL_LEVEL_DEFENSE = 0 @@ -563,7 +632,7 @@ class Equipment: EQUIPMENT_ARMOR_BASE_DEFENSE = 0 '''Base armor defense''' - EQUIPMENT_ARMOR_LEVEL_DEFENSE = 10 + EQUIPMENT_ARMOR_LEVEL_DEFENSE = 3 '''Base equipment defense''' @@ -603,8 +672,8 @@ class Profession: PROFESSION_FISH_RESPAWN = 0.02 '''Probability that a harvested fish tile will regenerate each tick''' - @staticmethod - def PROFESSION_CONSUMABLE_RESTORE(level): + def PROFESSION_CONSUMABLE_RESTORE(self, level): + '''Amount of food/water restored by consuming a consumable item''' return 50 + 5*level @@ -617,13 +686,13 @@ class Exchange: EXCHANGE_BASE_GOLD = 1 '''Initial gold amount''' - EXCHANGE_LISTING_DURATION = 5 + EXCHANGE_LISTING_DURATION = 3 '''The number of ticks, during which the item is listed for sale''' - MARKET_N_OBS = 1024 + MARKET_N_OBS = 384 # this should be proportion to PLAYER_N '''Number of distinct item observations''' - PRICE_N_OBS = 99 # make it different from PLAYER_N_OBS + PRICE_N_OBS = 99 # make it different from PLAYER_N_OBS '''Number of distinct price observations This also determines the maximum price one can set for an item ''' @@ -632,11 +701,13 @@ class Exchange: class Communication: '''Exchange Game System''' - COMMUNICATION_SYSTEM_ENABLED = True + COMMUNICATION_SYSTEM_ENABLED = True '''Game system flag''' - # CHECK ME: When do we actually use this? - COMMUNICATION_NUM_TOKENS = 50 + COMMUNICATION_N_OBS = 32 + '''Number of players that share the same communication obs, i.e. the same team''' + + COMMUNICATION_NUM_TOKENS = 127 '''Number of distinct COMM tokens''' @@ -656,6 +727,7 @@ class Small(Config): PLAYER_N = 64 MAP_PREVIEW_DOWNSCALE = 4 + MAP_SIZE = 64 MAP_CENTER = 32 TERRAIN_LOG_INTERPOLATE_MIN = 0 @@ -678,6 +750,7 @@ class Medium(Config): PLAYER_N = 128 MAP_PREVIEW_DOWNSCALE = 16 + MAP_SIZE = 160 MAP_CENTER = 128 NPC_N = 128 @@ -698,6 +771,7 @@ class Large(Config): PLAYER_N = 1024 MAP_PREVIEW_DOWNSCALE = 64 + MAP_SIZE = 1056 MAP_CENTER = 1024 NPC_N = 1024 diff --git a/nmmo/core/env.py b/nmmo/core/env.py index db0340335..a876a01a3 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -1,21 +1,22 @@ +import os import functools from typing import Any, Dict, List, Callable from collections import defaultdict -from copy import copy, deepcopy -import dill +from copy import deepcopy -import gym +import gymnasium as gym +import dill import numpy as np from pettingzoo.utils.env import AgentID, ParallelEnv import nmmo from nmmo.core import realm +from nmmo.core import game_api from nmmo.core.config import Default from nmmo.core.observation import Observation from nmmo.core.tile import Tile from nmmo.entity.entity import Entity from nmmo.systems.item import Item -from nmmo.task import task_api, task_spec from nmmo.task.game_state import GameStateGenerator from nmmo.lib import seeding @@ -26,6 +27,13 @@ class Env(ParallelEnv): def __init__(self, config: Default = nmmo.config.Default(), seed = None): + '''Initializes the Neural MMO environment. + + Args: + config (Default, optional): Configuration object for the environment. + Defaults to nmmo.config.Default(). + seed (int, optional): Random seed for the environment. Defaults to None. + ''' self._np_random = None self._np_seed = None self._reset_required = True @@ -33,31 +41,46 @@ def __init__(self, super().__init__() self.config = config + self.config.env_initialized = True + + # Generate maps if they do not exist + config.MAP_GENERATOR(config).generate_all_maps(self._np_seed) self.realm = realm.Realm(config, self._np_random) - self.obs = None - self._dummy_obs = None + self.tile_map = None + self.tile_obs_shape = None - self.possible_agents = list(range(1, config.PLAYER_N + 1)) - self._agents = None - self._dead_agents = set() + self.possible_agents = self.config.POSSIBLE_AGENTS + self._alive_agents = None + self._current_agents = None self._dead_this_tick = None self.scripted_agents = set() + self.obs = {agent_id: Observation(self.config, agent_id) + for agent_id in self.possible_agents} + self._dummy_task_embedding = np.zeros(self.config.TASK_EMBED_DIM, dtype=np.float16) + self._dummy_obs = Observation(self.config, 0).empty_obs + self._comm_obs = {} + self._gamestate_generator = GameStateGenerator(self.realm, self.config) self.game_state = None - # Default task: rewards 1 each turn agent is alive - self.tasks = task_api.nmmo_default_task(self.possible_agents) - self.agent_task_map = None - self._dummy_task_embedding = np.zeros(self.config.TASK_EMBED_DIM, dtype=np.float16) + self.tasks = None + self.agent_task_map = {} # curriculum file path, if provided, should exist self.curriculum_file_path = config.CURRICULUM_FILE_PATH if self.curriculum_file_path is not None: # try to open the file to check if it exists with open(self.curriculum_file_path, 'rb') as f: - curriculum = dill.load(f) # pylint: disable=unused-variable + dill.load(f) f.close() + self.game = None + # NOTE: The default game runs with the full provided config and unmodded realm.reset() + self.default_game = game_api.DefaultGame(self) + self.game_packs: List[game_api.Game] = None + if config.GAME_PACKS: # assume List[Tuple(class, weight)] + self.game_packs = [game_cls(self, weight) for game_cls, weight in config.GAME_PACKS] + @functools.cached_property def _obs_space(self): def box(rows, cols): @@ -68,23 +91,31 @@ def box(rows, cols): def mask_box(length): return gym.spaces.Box(low=0, high=1, shape=(length,), dtype=np.int8) + # NOTE: obs space-related config attributes must NOT be changed after init + num_tile_attributes = len(Tile.State.attr_name_to_col) + num_tile_attributes += 1 if self.config.original["PROVIDE_DEATH_FOG_OBS"] else 0 obs_space = { - "CurrentTick": gym.spaces.Discrete(self.config.HORIZON+1), + "CurrentTick": gym.spaces.Discrete(self.config.MAX_HORIZON), "AgentId": gym.spaces.Discrete(self.config.PLAYER_N+1), - "Tile": box(self.config.MAP_N_OBS, Tile.State.num_attributes), + "Tile": box(self.config.MAP_N_OBS, num_tile_attributes), "Entity": box(self.config.PLAYER_N_OBS, Entity.State.num_attributes), "Task": gym.spaces.Box(low=-2**15, high=2**15-1, shape=(self.config.TASK_EMBED_DIM,), dtype=np.float16), } - if self.config.ITEM_SYSTEM_ENABLED: + # NOTE: cannot turn on a game system that was not enabled during env init + if self.config.original["ITEM_SYSTEM_ENABLED"]: obs_space["Inventory"] = box(self.config.INVENTORY_N_OBS, Item.State.num_attributes) - if self.config.EXCHANGE_SYSTEM_ENABLED: + if self.config.original["EXCHANGE_SYSTEM_ENABLED"]: obs_space["Market"] = box(self.config.MARKET_N_OBS, Item.State.num_attributes) - if self.config.PROVIDE_ACTION_TARGETS: + if self.config.original["COMMUNICATION_SYSTEM_ENABLED"]: + # Comm obs cols: id, row, col, message + obs_space["Communication"] = box(self.config.COMMUNICATION_N_OBS, 4) + + if self.config.original["PROVIDE_ACTION_TARGETS"]: mask_spec = deepcopy(self._atn_space) for atn_str in mask_spec: for arg_str in mask_spec[atn_str]: @@ -98,14 +129,15 @@ def mask_box(length): def observation_space(self, agent: AgentID): '''Neural MMO Observation Space - Args: - agent: Agent ID - - Returns: - observation: gym.spaces object contained the structured observation - for the specified agent.''' + Args: + agent (AgentID): The ID of the agent. + + Returns: + gym.spaces.Dict: The observation space for the agent. + ''' return self._obs_space + # NOTE: make sure this runs once during trainer init and does NOT change afterwards @functools.cached_property def _atn_space(self): actions = {} @@ -133,68 +165,109 @@ def _str_atn_map(self): def action_space(self, agent: AgentID): '''Neural MMO Action Space - Args: - agent: Agent ID - - Returns: - actions: gym.spaces object contained the structured actions - for the specified agent. Each action is parameterized by a list - of discrete-valued arguments. These consist of both fixed, k-way - choices (such as movement direction) and selections from the - observation space (such as targeting)''' + Args: + agent (AgentID): The ID of the agent. + + Returns: + gym.spaces.Dict: The action space for the agent. + ''' return self._atn_space ############################################################################ # Core API - # TODO: This doesn't conform to the PettingZoo API - # pylint: disable=arguments-renamed - def reset(self, map_id=None, seed=None, options=None, - make_task_fn: Callable=None): - '''OpenAI Gym API reset function - - Loads a new game map and returns initial observations - - Args: - map_id: Map index to load. Selects a random map by default - seed: random seed to use - make_task_fn: A function to make tasks - - Returns: - observations, as documented by _compute_observations() - - Notes: - Neural MMO simulates a persistent world. Ideally, you should reset - the environment only once, upon creation. In practice, this approach - limits the number of parallel environment simulations to the number - of CPU cores available. At small and medium hardware scale, we - therefore recommend the standard approach of resetting after a long - but finite horizon: ~1000 timesteps for small maps and - 5000+ timesteps for large maps + def reset(self, seed=None, options=None, # PettingZoo API args + map_id=None, + make_task_fn: Callable=None, + game: game_api.Game=None): + '''Resets the environment and returns the initial observations. + + Args: + seed (int, optional): Random seed for the environment. Defaults to None. + options (dict, optional): Additional options for resetting the environment. + Defaults to None. + map_id (int, optional): The ID of the map to load. Defaults to None. + make_task_fn (callable, optional): Function to create tasks. Defaults to None. + game (Game, optional): The game to be played. Defaults to None. + + Returns: + tuple: A tuple containing: + - obs (dict): Dictionary mapping agent IDs to their initial observations. + - info (dict): Dictionary containing additional information. ''' + # If options are provided, override the kwargs + if options is not None: + map_id = options.get('map_id', None) or map_id + make_task_fn = options.get('make_task_fn', None) or make_task_fn + game = options.get('game', None) or game + self.seed(seed) - self.realm.reset(self._np_random, map_id) - self._agents = list(self.realm.players.keys()) - self._dead_agents = set() + map_dict = self._load_map_file(map_id) + + # Choose and reset the game, realm, and tasks + if make_task_fn is not None: + # Use the provided tasks with the default game (full config, unmodded realm) + self.tasks = make_task_fn() + self.game = self.default_game + self.game.reset(self._np_random, map_dict, self.tasks) # also does realm.reset() + elif game is not None: + # Use the provided game, which comes with its own tasks + self.game = game + self.game.reset(self._np_random, map_dict) + self.tasks = self.game.tasks + elif self.curriculum_file_path is not None or self.game_packs is not None: + # Assume training -- pick a random game from the game packs + self.game = self.default_game + if self.game_packs: + weights = [game.sampling_weight for game in self.game_packs] + self.game = self._np_random.choice(self.game_packs, p=weights/np.sum(weights)) + self.game.reset(self._np_random, map_dict) + # use the sampled tasks from self.game + self.tasks = self.game.tasks + else: + # Just reset the same game and tasks as before + self.game = self.default_game # full config, unmodded realm + self.game.reset(self._np_random, map_dict, self.tasks) # use existing tasks + if self.tasks is None: + self.tasks = self.game.tasks + else: + for task in self.tasks: + task.reset() + + # Reset the agent vars + self._alive_agents = self.possible_agents self._dead_this_tick = {} + self._map_task_to_agent() + self._current_agents = self.possible_agents # tracking alive + dead_this_tick - # check if there are scripted agents + # Check scripted agents + self.scripted_agents.clear() for eid, ent in self.realm.players.items(): if isinstance(ent.agent, nmmo.Scripted): self.scripted_agents.add(eid) ent.agent.set_rng(self._np_random) - if self.curriculum_file_path is not None: - self.tasks = self._sample_training_tasks() - elif make_task_fn is not None: - self.tasks = make_task_fn() - else: - for task in self.tasks: - task.reset() - self.agent_task_map = self._map_task_to_agent() - - self._dummy_obs = self._make_dummy_obs() - self.obs = self._compute_observations() + # Tile map placeholder, to reduce redudunt obs computation + self.tile_map = Tile.Query.get_map(self.realm.datastore, self.config.MAP_SIZE) + if self.config.PROVIDE_DEATH_FOG_OBS: + fog_map = np.round(self.realm.fog_map)[:,:,np.newaxis].astype(np.int16) + self.tile_map = np.concatenate((self.tile_map, fog_map), axis=-1) + self.tile_obs_shape = (self.config.PLAYER_VISION_DIAMETER**2, self.tile_map.shape[-1]) + + # Reset the obs, game state generator + infos = {} + for agent_id in self.possible_agents: + # NOTE: the tasks for each agent is in self.agent_task_map, and task embeddings are + # available in each task instance, via task.embedding + # For now, each agent is assigned to a single task, so we just use the first task + # TODO: can the embeddings of multiple tasks be superposed while preserving the + # task-specific information? This needs research + task_embedding = self._dummy_task_embedding + if agent_id in self.agent_task_map: + task_embedding = self.agent_task_map[agent_id][0].embedding + infos[agent_id] = {"task": self.agent_task_map[agent_id][0].name} + self.obs[agent_id].reset(self.realm.map.habitable_tiles, task_embedding) + self._compute_observations() self._gamestate_generator = GameStateGenerator(self.realm, self.config) if self.game_state is not None: self.game_state.clear_cache() @@ -202,132 +275,56 @@ def reset(self, map_id=None, seed=None, options=None, self._reset_required = False - return {a: o.to_gym() for a,o in self.obs.items()} - - def _sample_training_tasks(self): - with open(self.curriculum_file_path, 'rb') as f: - # curriculum file may have been changed, so read the file when sampling - curriculum = dill.load(f) # a list of TaskSpec - - sampling_weights = [spec.sampling_weight for spec in curriculum] - sampled_spec = self._np_random.choice(curriculum, size=len(self.possible_agents), - p=sampling_weights/np.sum(sampling_weights)) - - return task_spec.make_task_from_spec(self.possible_agents, sampled_spec) + return {a: o.to_gym() for a,o in self.obs.items()}, infos + + def _load_map_file(self, map_id: int=None): + '''Loads a map file, which is a 2D numpy array''' + map_dict= {} + map_id = map_id or self._np_random.integers(self.config.MAP_N) + 1 + map_file_path = os.path.join(self.config.PATH_CWD, self.config.PATH_MAPS, + self.config.PATH_MAP_SUFFIX.format(map_id)) + map_dict["map"] = np.load(map_file_path) + if self.config.MAP_RESET_FROM_FRACTAL: + fractal_file_path = os.path.join(self.config.PATH_CWD, self.config.PATH_MAPS, + self.config.PATH_FRACTAL_SUFFIX.format(map_id)) + map_dict["fractal"] = np.load(fractal_file_path).astype(float) + return map_dict def _map_task_to_agent(self): - agent_task_map: Dict[int, List[task_api.Task]] = {} + self.agent_task_map.clear() + for agent_id in self.agents: + self.realm.players[agent_id].my_task = None for task in self.tasks: if task.embedding is None: task.set_embedding(self._dummy_task_embedding) - # validate task embedding - assert self._obs_space['Task'].contains(task.embedding), "Task embedding is not valid" - # map task to agents for agent_id in task.assignee: - if agent_id in agent_task_map: - agent_task_map[agent_id].append(task) + if agent_id in self.agent_task_map: + self.agent_task_map[agent_id].append(task) else: - agent_task_map[agent_id] = [task] + self.agent_task_map[agent_id] = [task] # for now we only support one task per agent if self.config.ALLOW_MULTI_TASKS_PER_AGENT is False: - for agent_tasks in agent_task_map.values(): + for agent_id, agent_tasks in self.agent_task_map.items(): assert len(agent_tasks) == 1, "Only one task per agent is supported" - - return agent_task_map + self.realm.players[agent_id].my_task = agent_tasks[0] def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): - '''Simulates one game tick or timestep - - Args: - actions: A dictionary of agent decisions of format:: - - { - agent_1: { - action_1: [arg_1, arg_2], - action_2: [...], - ... - }, - agent_2: { - ... - }, - ... - } - - Where agent_i is the integer index of the i\'th agent - - The environment only evaluates provided actions for provided - gents. Unprovided action types are interpreted as no-ops and - illegal actions are ignored - - It is also possible to specify invalid combinations of valid - actions, such as two movements or two attacks. In this case, - one will be selected arbitrarily from each incompatible sets. - - A well-formed algorithm should do none of the above. We only - Perform this conditional processing to make batched action - computation easier. - - Returns: - (dict, dict, dict, None): - - observations: - A dictionary of agent observations of format:: - - { - agent_1: obs_1, - agent_2: obs_2, - ... - } - - Where agent_i is the integer index of the i\'th agent and - obs_i is specified by the observation_space function. - - rewards: - A dictionary of agent rewards of format:: - - { - agent_1: reward_1, - agent_2: reward_2, - ... - } - - Where agent_i is the integer index of the i\'th agent and - reward_i is the reward of the i\'th' agent. - - By default, agents receive -1 reward for dying and 0 reward for - all other circumstances. Override Env.reward to specify - custom reward functions - - dones: - A dictionary of agent done booleans of format:: - - { - agent_1: done_1, - agent_2: done_2, - ... - } - - Where agent_i is the integer index of the i\'th agent and - done_i is a boolean denoting whether the i\'th agent has died. - - Note that obs_i will be a garbage placeholder if done_i is true. - This is provided only for conformity with PettingZoo. Your - algorithm should not attempt to leverage observations outside of - trajectory bounds. You can omit garbage obs_i values by setting - omitDead=True. - - infos: - A dictionary of agent infos of format: - - { - agent_1: None, - agent_2: None, - ... - } - - Provided for conformity with PettingZoo + '''Performs one step in the environment given the provided actions. + + Args: + actions (dict): Dictionary mapping agent IDs to their actions. + + Returns: + tuple: A tuple containing: + - obs (dict): Dictionary mapping agent IDs to their new observations. + - rewards (dict): Dictionary mapping agent IDs to their rewards. + - terminated (dict): Dictionary mapping agent IDs to whether they reached + a terminal state. + - truncated (dict): Dictionary mapping agent IDs to whether the episode was + truncated (e.g. reached maximum number of steps). + - infos (dict): Dictionary containing additional information. ''' assert not self._reset_required, 'step() called before reset' # Add in scripted agents' actions, if any @@ -338,28 +335,45 @@ def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): # we don't need _deserialize_scripted_actions() anymore actions = self._validate_actions(actions) # Execute actions - self._dead_this_tick = self.realm.step(actions) - # the list of "current" agents, both alive and dead_this_tick - self._agents = list(set(list(self.realm.players.keys()) + list(self._dead_this_tick.keys()))) + self._dead_this_tick, dead_npcs = self.realm.step(actions) + self._alive_agents = list(self.realm.players.keys()) + self._current_agents = list(set(self._alive_agents + list(self._dead_this_tick.keys()))) + + terminated = {} + for agent_id in self._current_agents: + if agent_id in self._dead_this_tick: + # NOTE: Even though players can be resurrected, the time of death must be marked. + terminated[agent_id] = True + else: + terminated[agent_id] = False - dones = {} - for agent_id in self.agents: - if agent_id in self._dead_this_tick or \ - self.realm.tick >= self.config.HORIZON or \ - (self.config.RESET_ON_DEATH and len(self._dead_agents) > 0): - self._dead_agents.add(agent_id) - dones[agent_id] = True + if self.realm.tick >= self.config.HORIZON: + self._alive_agents = [] # pettingzoo requires agents to be empty + + # Update the game stats, determine winners, etc. + # Also, resurrect dead agents and/or spawn new npcs if the game allows it + self.game.update(terminated, self._dead_this_tick, dead_npcs) + + # Some games do additional player cull during update(), so process truncated here + truncated = {} + for agent_id in self._current_agents: + if self.realm.tick >= self.config.HORIZON: + truncated[agent_id] = agent_id in self.realm.players else: - dones[agent_id] = False + truncated[agent_id] = False # Store the observations, since actions reference them - self.obs = self._compute_observations() - gym_obs = {a: o.to_gym() for a,o in self.obs.items()} + self._compute_observations() + gym_obs = {a: self.obs[a].to_gym() for a in self._current_agents} rewards, infos = self._compute_rewards() # NOTE: all obs, rewards, dones, infos have data for each agent in self.agents - return gym_obs, rewards, dones, infos + return gym_obs, rewards, terminated, truncated, infos + + @property + def dead_this_tick(self): + return self._dead_this_tick def _validate_actions(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): '''Deserialize action arg values and validate actions @@ -382,10 +396,16 @@ def _validate_actions(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): for atn_key, args in sorted(atns.items()): action_valid = True deserialized_action = {} + + # If action/system is not enabled, it's not in self._str_atn_map + if isinstance(atn_key, str) and atn_key not in self._str_atn_map: + action_valid = False + continue + atn = self._str_atn_map[atn_key] if isinstance(atn_key, str) else atn_key - if not atn.enabled(self.config): + if not atn.enabled(self.config): # This can change from episode to episode action_valid = False - break + continue for arg_key, val in sorted(args.items()): arg = self._str_atn_map[arg_key] if isinstance(arg_key, str) else arg_key @@ -415,82 +435,50 @@ def _compute_scripted_agent_actions(self, actions: Dict[int, Dict[str, Dict[str, return actions - def _make_dummy_obs(self): - dummy_tiles = np.zeros((0, len(Tile.State.attr_name_to_col)), dtype=np.int16) - dummy_entities = np.zeros((0, len(Entity.State.attr_name_to_col)), dtype=np.int16) - dummy_inventory = np.zeros((0, len(Item.State.attr_name_to_col)), dtype=np.int16) - dummy_market = np.zeros((0, len(Item.State.attr_name_to_col)), dtype=np.int16) - return Observation(self.config, self.realm.tick, 0, self._dummy_task_embedding, - dummy_tiles, dummy_entities, dummy_inventory, dummy_market) - def _compute_observations(self): - # Clean up unnecessary observations, which cause memory leaks - if self.obs is not None: - for agent_id, agent_obs in self.obs.items(): - agent_obs.clear_cache() # clear the lru_cache - self.obs[agent_id] = None - del agent_obs - self.obs = None - - obs = {} - market = Item.Query.for_sale(self.realm.datastore) - - # get tile map, to bypass the expensive tile window query - tile_map = Tile.Query.get_map(self.realm.datastore, self.config.MAP_SIZE) radius = self.config.PLAYER_VISION_RADIUS - tile_obs_size = ((2*radius+1)**2, len(Tile.State.attr_name_to_col)) + market = Item.Query.for_sale(self.realm.datastore) \ + if self.config.EXCHANGE_SYSTEM_ENABLED else None + self._update_comm_obs() + if self.config.PROVIDE_DEATH_FOG_OBS: + self.tile_map[:, :, -1] = np.round(self.realm.fog_map) - for agent_id in self.agents: + for agent_id in self._current_agents: if agent_id not in self.realm.players: - # return dummy obs for the agents in dead_this_tick - dummy_obs = copy(self._dummy_obs) - dummy_obs.current_tick = self.realm.tick - dummy_obs.agent_id = agent_id - obs[agent_id] = dummy_obs + self.obs[agent_id].set_agent_dead() else: - agent = self.realm.players.get(agent_id) - agent_r = agent.row.val - agent_c = agent.col.val - - visible_entities = Entity.Query.window( - self.realm.datastore, - agent_r, agent_c, - radius - ) - visible_tiles = tile_map[agent_r-radius:agent_r+radius+1, - agent_c-radius:agent_c+radius+1,:].reshape(tile_obs_size) - - inventory = Item.Query.owned_by(self.realm.datastore, agent_id) - - # NOTE: the tasks for each agent is in self.agent_task_map, and task embeddings are - # available in each task instance, via task.embedding - # For now, each agent is assigned to a single task, so we just use the first task - # TODO: can the embeddings of multiple tasks be superposed while preserving the - # task-specific information? This needs research - task_embedding = self._dummy_task_embedding - if agent_id in self.agent_task_map: - task_embedding = self.agent_task_map[agent_id][0].embedding # NOTE: first task only - obs[agent_id] = Observation(self.config, self.realm.tick, agent_id, task_embedding, - visible_tiles, visible_entities, inventory, market) - return obs + r, c = self.realm.players.get(agent_id).pos + visible_entities = Entity.Query.window(self.realm.datastore, r, c, radius) + visible_tiles = self.tile_map[r-radius:r+radius+1, + c-radius:c+radius+1, :].reshape(self.tile_obs_shape) + inventory = Item.Query.owned_by(self.realm.datastore, agent_id) \ + if self.config.ITEM_SYSTEM_ENABLED else None + comm_obs = self._comm_obs[agent_id] \ + if self.config.COMMUNICATION_SYSTEM_ENABLED else None + self.obs[agent_id].update(self.realm.tick, visible_tiles, visible_entities, + inventory=inventory, market=market, comm=comm_obs) + + def _update_comm_obs(self): + if not self.config.COMMUNICATION_SYSTEM_ENABLED: + return + comm_obs = Entity.Query.comm_obs(self.realm.datastore) + agent_ids = comm_obs[:, Entity.State.attr_name_to_col['id']] + self._comm_obs.clear() + for agent_id in self.realm.players: + if agent_id not in self._comm_obs: + my_team = [agent_id] if agent_id not in self.agent_task_map \ + else self.agent_task_map[agent_id][0].assignee # NOTE: first task only + team_obs = [comm_obs[agent_ids == eid] for eid in my_team] + if len(team_obs) == 1: + team_obs = team_obs[0] + else: + team_obs = np.concatenate(team_obs, axis=0) + for eid in my_team: + self._comm_obs[eid] = team_obs def _compute_rewards(self): - '''Computes the reward for the specified agent - - Override this method to create custom reward functions. You have full - access to the environment state via self.realm. Our baselines do not - modify this method; specify any changes when comparing to baselines - - Args: - player: player object - - Returns: - reward: - The reward for the actions on the previous timestep of the - entity identified by ent_id. - ''' # Initialization - agents = set(self.agents) + agents = set(self._current_agents) infos = {agent_id: {'task': {}} for agent_id in agents} rewards = defaultdict(int) @@ -511,9 +499,17 @@ def _compute_rewards(self): else: task.close() # To prevent memory leak - # Make sure the dead agents return the rewards of -1 - for agent_id in self._dead_this_tick: - rewards[agent_id] = -1 + # Reward for frozen agents (recon, resurrected, frozen) is 0 because they cannot act + for agent_id, agent in self.realm.players.items(): + if agent.status.frozen: + rewards[agent_id] = 0 + + # Reward for dead agents is defined by the game + # NOTE: Resurrected agents are frozen and in the realm.players, so run through + # self._dead_this_tick to give out the dead reward + if self.game.assign_dead_reward: + for agent_id in self._dead_this_tick: + rewards[agent_id] = -1 return rewards, infos @@ -526,9 +522,8 @@ def render(self, mode='human'): @property def agents(self) -> List[AgentID]: - '''For conformity with the PettingZoo API''' - # returns the list of "current" agents, both alive and dead_this_tick - return self._agents + '''For conformity with the PettingZoo API; retuning only the alive agents''' + return self._alive_agents def close(self): '''For conformity with the PettingZoo API only; rendering is external''' diff --git a/nmmo/core/game_api.py b/nmmo/core/game_api.py new file mode 100644 index 000000000..48f88bea4 --- /dev/null +++ b/nmmo/core/game_api.py @@ -0,0 +1,353 @@ +# pylint: disable=no-member,bare-except +from abc import ABC, abstractmethod +from typing import Dict, List +import dill +import numpy as np + +from nmmo.task import task_api, task_spec, base_predicates +from nmmo.lib import team_helper, utils + +GAME_MODE = ["agent_training", "team_training", "team_battle"] + + +class Game(ABC): + game_mode = None + + def __init__(self, env, sampling_weight=None): + self.config = env.config + self.realm = env.realm + self._np_random = env._np_random + self.sampling_weight = sampling_weight or 1.0 + self.tasks = None + self.assign_dead_reward = True + self._next_tasks = None + self._agent_stats = {} + self._winners = None + self._game_done = False + self.history: List[Dict] = [] + assert self.is_compatible(), "Game is not compatible with the config" + + @abstractmethod + def is_compatible(self): + """Check if the game is compatible with the config (e.g., required systems)""" + raise NotImplementedError + + @property + def name(self): + return self.__class__.__name__ + + @property + def winners(self): + return self._winners + + @property + def winning_score(self): + if self._winners: + # CHECK ME: should we return the winners" tasks" reward multiplier? + return 1.0 # default score for task completion + return 0.0 + + def reset(self, np_random, map_dict, tasks=None): + self._np_random = np_random + self._set_config() + self._set_realm(map_dict) + if tasks: + # tasks comes from env.reset() + self.tasks = tasks + elif self._next_tasks: + # env.reset() cannot take both game and tasks + # so set next_tasks in the game first + self.tasks = self._next_tasks + self._next_tasks = None + else: + self.tasks = self._define_tasks() + self._post_setup() + self._reset_stats() + + def _set_config(self): # pylint: disable=unused-argument + """Set config for the episode. Can customize config using config.set_for_episode()""" + self.config.reset() + + def _set_realm(self, map_dict): + """Set up the realm for the episode. Can customize map and spawn""" + self.realm.reset(self._np_random, map_dict, custom_spawn=False) + + def _post_setup(self): + """Post-setup processes, e.g., attach team tags, etc.""" + + def _reset_stats(self): + """Reset stats for the episode""" + self._agent_stats.clear() + self._winners = None + self._game_done = False + # result = False means the game ended without a winner + self.history.append({"result": False, "winners": None, "winning_score": None}) + + @abstractmethod + def _define_tasks(self): + """Define tasks for the episode.""" + # NOTE: Task embeddings should be provided somehow, e.g., from curriculum file. + # Otherwise, policies cannot be task-conditioned. + raise NotImplementedError + + def set_next_tasks(self, tasks): + """Set the next task to be completed""" + self._next_tasks = tasks + + def update(self, terminated, dead_players, dead_npcs): + """Process dead players/npcs, update the game stats, winners, etc.""" + self._process_dead_players(terminated, dead_players) + self._process_dead_npcs(dead_npcs) + self._winners = self._check_winners(terminated) + if self._winners and not self._game_done: + self._game_done = self.history[-1]["result"] = True + self.history[-1]["winners"] = self._winners + self.history[-1]["winning_score"] = self.winning_score + self.history[-1]["winning_tick"] = self.realm.tick + self.history[-1].update(self.get_episode_stats()) + + def _process_dead_players(self, terminated, dead_players): + for agent_id in terminated: + if terminated[agent_id]: + agent = dead_players[agent_id] if agent_id in dead_players\ + else self.realm.players[agent_id] + self._agent_stats[agent_id] = {"time_alive": self.realm.tick, + "progress_to_center": agent.history.exploration} + + def _process_dead_npcs(self, dead_npcs): + if self.config.NPC_SYSTEM_ENABLED and self.config.NPC_DEFAULT_REFILL_DEAD_NPCS: + for npc in dead_npcs.values(): + if npc.spawn_danger: + self.realm.npcs.spawn_dangers.append(npc.spawn_danger) + # refill npcs to target config.NPC_N, within config.NPC_SPAWN_ATTEMPTS + self.realm.npcs.default_spawn() + + def _check_winners(self, terminated): + # Determine winners for the default task + if self.realm.num_players == 1: # only one survivor + return list(self.realm.players.keys()) + if all(terminated.values()): + # declare all winners when they died at the same time + return list(terminated.keys()) + if self.realm.tick >= self.config.HORIZON: + # declare all survivors as winners when the time is up + return [agent_id for agent_id, done in terminated.items() if not done] + return None + + @property + def is_over(self): + return self.winners is not None or self.realm.num_players == 0 or \ + self.realm.tick >= self.config.HORIZON + + def get_episode_stats(self): + """A helper function for trainers""" + total_agent_steps = 0 + progress_to_center = 0 + max_progress = self.config.PLAYER_N * self.config.MAP_SIZE // 2 + for stat in self._agent_stats.values(): + total_agent_steps += stat["time_alive"] + progress_to_center += stat["progress_to_center"] + return { + "total_agent_steps": total_agent_steps, + "norm_progress_to_center": float(progress_to_center) / max_progress + } + + ############################ + # Helper functions for Game + def _who_completed_task(self): + # Return all assignees who completed their tasks + winners = [] + for task in self.tasks: + if task.completed: + winners += task.assignee + return winners or None + + +class DefaultGame(Game): + """The default NMMO game""" + game_mode = "agent_training" + + def is_compatible(self): + return True + + def _define_tasks(self): + return task_api.nmmo_default_task(self.config.POSSIBLE_AGENTS) + +class AgentTraining(Game): + """Game setting for agent training tasks""" + game_mode = "agent_training" + + @property + def winning_score(self): + return 0.0 + + def is_compatible(self): + try: + # Check is the curriculum file exists and opens + with open(self.config.CURRICULUM_FILE_PATH, "rb") as f: + dill.load(f) # a list of TaskSpec + except: + return False + return True + + def _define_tasks(self): + with open(self.config.CURRICULUM_FILE_PATH, "rb") as f: + # curriculum file may have been changed, so read the file when sampling + curriculum = dill.load(f) # a list of TaskSpec + cand_specs = [spec for spec in curriculum if spec.reward_to == "agent"] + assert len(cand_specs) > 0, "No agent task is defined in the curriculum file" + + sampling_weights = [spec.sampling_weight for spec in cand_specs] + sampled_spec = self._np_random.choice(cand_specs, size=self.config.PLAYER_N, + p=sampling_weights/np.sum(sampling_weights)) + return task_spec.make_task_from_spec(self.config.POSSIBLE_AGENTS, sampled_spec) + +class TeamGameTemplate(Game): + """A helper class with common utils for team games""" + assign_dead_reward = False # Do NOT always assign -1 to dead agents + + def is_compatible(self): + try: + assert self.config.TEAMS is not None, "Team game requires TEAMS to be defined" + num_agents = sum(len(v) for v in self.config.TEAMS.values()) + assert self.config.PLAYER_N == num_agents,\ + "PLAYER_N must match the number of agents in TEAMS" + # Check is the curriculum file exists and opens + with open(self.config.CURRICULUM_FILE_PATH, "rb") as f: + dill.load(f) # a list of TaskSpec + except: + return False + return True + + def _set_realm(self, map_dict): + self.realm.reset(self._np_random, map_dict, custom_spawn=True) + # Custom spawning + team_loader = team_helper.TeamLoader(self.config, self._np_random) + self.realm.players.spawn(team_loader) + self.realm.npcs.default_spawn() + + def _post_setup(self): + self._attach_team_tag() + + @property + def teams(self): + return self.config.TEAMS + + def _attach_team_tag(self): + # setup team names + for team_id, members in self.teams.items(): + if isinstance(team_id, int): + team_id = f"Team{team_id:02d}" + for idx, agent_id in enumerate(members): + self.realm.players[agent_id].name = f"{team_id}_{agent_id}" + if idx == 0: + self.realm.players[agent_id].name = f"{team_id}_leader" + + def _get_cand_team_tasks(self, num_tasks, tags=None): + # NOTE: use different file to store different set of tasks? + with open(self.config.CURRICULUM_FILE_PATH, "rb") as f: + curriculum = dill.load(f) # a list of TaskSpec + cand_specs = [spec for spec in curriculum if spec.reward_to == "team"] + if tags: + cand_specs = [spec for spec in cand_specs if tags in spec.tags] + assert len(cand_specs) > 0, "No team task is defined in the curriculum file" + + sampling_weights = [spec.sampling_weight for spec in cand_specs] + sampled_spec = self._np_random.choice(cand_specs, size=num_tasks, + p=sampling_weights/np.sum(sampling_weights)) + return sampled_spec + +class TeamTraining(TeamGameTemplate): + """Game setting for team training tasks""" + game_mode = "team_training" + + def _define_tasks(self): + sampled_spec = self._get_cand_team_tasks(len(self.config.TEAMS)) + return task_spec.make_task_from_spec(self.config.TEAMS, sampled_spec) + +def team_survival_task(num_tick, embedding=None): + return task_spec.TaskSpec( + eval_fn=base_predicates.TickGE, + eval_fn_kwargs={"num_tick": num_tick}, + reward_to="team", + embedding=embedding) + +class TeamBattle(TeamGameTemplate): + """Game setting for team battle""" + game_mode = "team_battle" + + def __init__(self, env, sampling_weight=None): + super().__init__(env, sampling_weight) + self.task_embedding = utils.get_hash_embedding(base_predicates.TickGE, + self.config.TASK_EMBED_DIM) + + def is_compatible(self): + assert self.config.are_systems_enabled(["COMBAT"]), "Combat system must be enabled" + assert self.config.TEAMS is not None, "Team battle mode requires TEAMS to be defined" + num_agents = sum(len(v) for v in self.config.TEAMS.values()) + assert self.config.PLAYER_N == num_agents,\ + "PLAYER_N must match the number of agents in TEAMS" + return True + + def _define_tasks(self): + # NOTE: Teams can win by eliminating all other teams, + # or fully cooperating to survive for the entire episode + survive_task = team_survival_task(self.config.HORIZON, self.task_embedding) + return task_spec.make_task_from_spec(self.config.TEAMS, + [survive_task] * len(self.config.TEAMS)) + + def _check_winners(self, terminated): + # A team is won, when their task is completed first or only one team remains + current_teams = self._check_remaining_teams() + if len(current_teams) == 1: + winner_team = list(current_teams.keys())[0] + return self.config.TEAMS[winner_team] + + # Return all assignees who completed their tasks + # Assuming the episode gets ended externally + return self._who_completed_task() + + def _check_remaining_teams(self): + current_teams = {} + for team_id, team in self.config.TEAMS.items(): + alive_members = [agent_id for agent_id in team if agent_id in self.realm.players] + if len(alive_members) > 0: + current_teams[team_id] = alive_members + return current_teams + +class ProtectTheKing(TeamBattle): + def __init__(self, env, sampling_weight=None): + super().__init__(env, sampling_weight) + self.team_helper = team_helper.TeamHelper(self.config.TEAMS) + self.task_embedding = utils.get_hash_embedding(base_predicates.ProtectLeader, + self.config.TASK_EMBED_DIM) + + def _define_tasks(self): + protect_task = task_spec.TaskSpec( + eval_fn=base_predicates.ProtectLeader, + eval_fn_kwargs={ + "target_protect": "my_team_leader", + "target_destroy": "all_foe_leaders", + }, + reward_to="team" + ) + return task_spec.make_task_from_spec(self.config.TEAMS, + [protect_task] * len(self.config.TEAMS)) + + def update(self, terminated, dead_players, dead_npcs): + # If a team's leader is dead, the whole team is dead + for team_id, members in self.config.TEAMS.items(): + if self.team_helper.get_target_agent(team_id, "my_team_leader") in dead_players: + for agent_id in members: + if agent_id in self.realm.players: + self.realm.players[agent_id].health.update(0) + + # Addition dead players cull + for agent in [agent for agent in self.realm.players.values() if not agent.alive]: + agent_id = agent.ent_id + self.realm.players.dead_this_tick[agent_id] = agent + self.realm.players.cull_entity(agent) + agent.datastore_record.delete() + terminated[agent_id] = True + + super().update(terminated, dead_players, dead_npcs) diff --git a/nmmo/core/log_helper.py b/nmmo/core/log_helper.py deleted file mode 100644 index 8f6a318c6..000000000 --- a/nmmo/core/log_helper.py +++ /dev/null @@ -1,135 +0,0 @@ -from __future__ import annotations - -from typing import Dict - -from nmmo.core.agent import Agent -from nmmo.entity.player import Player -from nmmo.lib.log import Logger, MilestoneLogger - - -class LogHelper: - @staticmethod - def create(realm) -> LogHelper: - if realm.config.LOG_ENV: - return SimpleLogHelper(realm) - return DummyLogHelper() - -class DummyLogHelper(LogHelper): - def reset(self) -> None: - pass - - def update(self, dead_players: Dict[int, Player]) -> None: - pass - - def log_milestone(self, milestone: str, value: float) -> None: - pass - - def log_event(self, event: str, value: float) -> None: - pass - -class SimpleLogHelper(LogHelper): - def __init__(self, realm) -> None: - self.realm = realm - self.config = realm.config - - self.reset() - - def reset(self): - self._env_logger = Logger() - self._player_logger = Logger() - self._event_logger = DummyLogHelper() - self._milestone_logger = DummyLogHelper() - - if self.config.LOG_EVENTS: - self._event_logger = Logger() - - if self.config.LOG_MILESTONES: - self._milestone_logger = MilestoneLogger(self.config.LOG_FILE) - - self._player_stats_funcs = {} - self._register_player_stats() - - def log_milestone(self, milestone: str, value: float) -> None: - if self.config.LOG_MILESTONES: - self._milestone_logger.log(milestone, value) - - def log_event(self, event: str, value: float) -> None: - if self.config.LOG_EVENTS: - self._event_logger.log(event, value) - - @property - def packet(self): - packet = {'Env': self._env_logger.stats, - 'Player': self._player_logger.stats} - - if self.config.LOG_EVENTS: - packet['Event'] = self._event_logger.stats - else: - packet['Event'] = 'Unavailable: config.LOG_EVENTS = False' - - if self.config.LOG_MILESTONES: - packet['Milestone'] = self._event_logger.stats - else: - packet['Milestone'] = 'Unavailable: config.LOG_MILESTONES = False' - - return packet - - def _register_player_stat(self, name: str, func: callable): - assert name not in self._player_stats_funcs - self._player_stats_funcs[name] = func - - def _register_player_stats(self): - self._register_player_stat('Basic/TimeAlive', lambda player: player.history.time_alive.val) - # Skills - if self.config.PROGRESSION_SYSTEM_ENABLED: - if self.config.COMBAT_SYSTEM_ENABLED: - self._register_player_stat('Skill/Mage', lambda player: player.skills.mage.level.val) - self._register_player_stat('Skill/Range', lambda player: player.skills.range.level.val) - self._register_player_stat('Skill/Melee', lambda player: player.skills.melee.level.val) - if self.config.PROFESSION_SYSTEM_ENABLED: - self._register_player_stat('Skill/Fishing', lambda player: player.skills.fishing.level.val) - self._register_player_stat('Skill/Herbalism', - lambda player: player.skills.herbalism.level.val) - self._register_player_stat('Skill/Prospecting', - lambda player: player.skills.prospecting.level.val) - self._register_player_stat('Skill/Carving', - lambda player: player.skills.carving.level.val) - self._register_player_stat('Skill/Alchemy', - lambda player: player.skills.alchemy.level.val) - if self.config.EQUIPMENT_SYSTEM_ENABLED: - self._register_player_stat('Item/Held-Level', - lambda player: player.inventory.equipment.held.item.level.val \ - if player.inventory.equipment.held.item else 0) - self._register_player_stat('Item/Equipment-Total', - lambda player: player.equipment.total(lambda e: e.level)) - - if self.config.EXCHANGE_SYSTEM_ENABLED: - self._register_player_stat('Exchange/Player-Sells', lambda player: player.sells) - self._register_player_stat('Exchange/Player-Buys', lambda player: player.buys) - self._register_player_stat('Exchange/Player-Wealth', lambda player: player.gold.val) - - # Item usage - if self.config.PROFESSION_SYSTEM_ENABLED: - self._register_player_stat('Item/Ration-Consumed', lambda player: player.ration_consumed) - self._register_player_stat('Item/Poultice-Consumed', lambda player: player.poultice_consumed) - self._register_player_stat('Item/Ration-Level', lambda player: player.ration_level_consumed) - self._register_player_stat('Item/Poultice-Level', - lambda player: player.poultice_level_consumed) - - def update(self, dead_players: Dict[int, Player]) -> None: - for player in dead_players.values(): - for key, val in self._player_stats(player).items(): - self._player_logger.log(key, val) - - # TODO: handle env logging - - def _player_stats(self, player: Agent) -> Dict[str, float]: - stats = {} - policy = player.policy - - for key, stat_func in self._player_stats_funcs.items(): - stats[f'{key}_{policy}'] = stat_func(player) - - stats['Time_Alive'] = player.history.time_alive.val - - return stats diff --git a/nmmo/core/map.py b/nmmo/core/map.py index 870ddabe5..d0d7b54df 100644 --- a/nmmo/core/map.py +++ b/nmmo/core/map.py @@ -1,10 +1,15 @@ -import os -import logging +from typing import List, Tuple import numpy as np from ordered_set import OrderedSet from nmmo.core.tile import Tile -from nmmo.lib import material +from nmmo.lib import material, utils +from nmmo.core.terrain import ( + fractal_to_material, + process_map_border, + spawn_profession_resources, + scatter_extra_resources, +) class Map: @@ -20,16 +25,21 @@ def __init__(self, config, realm, np_random): self.pathfinding_cache = {} # Avoid recalculating A*, paths don't move sz = config.MAP_SIZE - self.tiles = np.zeros((sz, sz), dtype=object) - self.habitable_tiles = np.zeros((sz,sz)) + self.tiles = np.zeros((sz,sz), dtype=object) + self.habitable_tiles = np.zeros((sz,sz), dtype=np.int8) for r in range(sz): for c in range(sz): self.tiles[r, c] = Tile(realm, r, c, np_random) - self.dist_border_center = config.MAP_CENTER // 2 - self.center_coord = (config.MAP_BORDER + self.dist_border_center, - config.MAP_BORDER + self.dist_border_center) + # the map center, and the centers in each quadrant are important targets + self.dist_border_center = None + self.center_coord = None + self.quad_centers = None + self.seize_targets: List[Tuple] = None # a list of (r, c) coords + + # used to place border + self.l1 = utils.l1_map(sz) @property def packet(self): @@ -44,55 +54,123 @@ def repr(self): '''Flat matrix of tile material indices''' if not self._repr: self._repr = [[t.material.index for t in row] for row in self.tiles] - return self._repr - def reset(self, map_id, np_random): + def reset(self, map_dict, np_random, seize_targets=None): '''Reuse the current tile objects to load a new map''' config = self.config - self.update_list = OrderedSet() # critical for determinism - - path_map_suffix = config.PATH_MAP_SUFFIX.format(map_id) - f_path = os.path.join(config.PATH_CWD, config.PATH_MAPS, path_map_suffix) + assert map_dict["map"].shape == (config.MAP_SIZE,config.MAP_SIZE),\ + "Map shape is inconsistent with config.MAP_SIZE" - try: - map_file = np.load(f_path) - except FileNotFoundError: - logging.error('Maps not found') - raise + # NOTE: MAP_CENTER and MAP_BORDER can change from episode to episode + self.center_coord = (config.MAP_SIZE//2, config.MAP_SIZE//2) + self.dist_border_center = config.MAP_CENTER // 2 + half_dist = self.dist_border_center // 2 + self.quad_centers = { + "first": (self.center_coord[0] + half_dist, self.center_coord[1] + half_dist), + "second": (self.center_coord[0] - half_dist, self.center_coord[1] + half_dist), + "third": (self.center_coord[0] - half_dist, self.center_coord[1] - half_dist), + "fourth": (self.center_coord[0] + half_dist, self.center_coord[1] - half_dist), + } + assert config.MAP_BORDER > config.PLAYER_VISION_RADIUS,\ + "MAP_BORDER must be greater than PLAYER_VISION_RADIUS" + self._repr = None + self.update_list = OrderedSet() # critical for determinism + self.seize_targets = [] + if seize_targets: + assert isinstance(seize_targets, list), "seize_targets must be a list of reserved words" + for target in seize_targets: + # pylint: disable=consider-iterating-dictionary + assert target in list(self.quad_centers.keys()) + ["center"], "Invalid seize target" + self.seize_targets.append(self.center_coord if target == "center" + else self.quad_centers[target]) + + # process map_np_array according to config + matl_map = self._process_map(map_dict, np_random) + if "mark_center" in map_dict and map_dict["mark_center"]: + self._mark_tile(matl_map, *self.center_coord) + for r, c in self.seize_targets: + self._mark_tile(matl_map, r, c) + + # reset tiles with new materials materials = {mat.index: mat for mat in material.All} - r, c = 0, 0 - for r, row in enumerate(map_file): + for r, row in enumerate(matl_map): for c, idx in enumerate(row): - mat = materials[idx] + mat = materials[idx] tile = self.tiles[r, c] tile.reset(mat, config, np_random) self.habitable_tiles[r, c] = tile.habitable - assert c == config.MAP_SIZE - 1 - assert r == config.MAP_SIZE - 1 - - self._repr = None + def _process_map(self, map_dict, np_random): + map_np_array = map_dict["map"] + if not self.config.TERRAIN_SYSTEM_ENABLED: + map_np_array[:] = material.Grass.index + else: + if self.config.MAP_RESET_FROM_FRACTAL: + map_tiles = fractal_to_material(self.config, map_dict["fractal"], + self.config.TERRAIN_RESET_TO_GRASS) + # Place materials here, before converting map_tiles into an int array + if self.config.PROFESSION_SYSTEM_ENABLED: + spawn_profession_resources(self.config, map_tiles, np_random) + if self.config.TERRAIN_SCATTER_EXTRA_RESOURCES: + scatter_extra_resources(self.config, map_tiles, np_random) + map_np_array = map_tiles.astype(int) + + # Disable materials here + if self.config.TERRAIN_DISABLE_STONE: + map_np_array[map_np_array == material.Stone.index] = material.Grass.index + + # Make the edge tiles habitable, and place the void tiles outside the border + map_np_array = process_map_border(self.config, map_np_array, self.l1) + return map_np_array + + @staticmethod + def _mark_tile(map_np_array, row, col, dist=2): + map_np_array[row-dist:row+dist+1,col-dist:col+dist+1] = material.Grass.index + map_np_array[row,col] = material.Herb.index def step(self): '''Evaluate updatable tiles''' - self.realm.log_milestone('Resource_Depleted', len(self.update_list), - f'RESOURCE: Depleted {len(self.update_list)} resource tiles') - - for e in self.update_list.copy(): - if not e.depleted: - self.update_list.remove(e) - e.step() + for tile in self.update_list.copy(): + if not tile.depleted: + self.update_list.remove(tile) + tile.step() + if self.seize_targets: + for r, c in self.seize_targets: + self.tiles[r, c].update_seize() def harvest(self, r, c, deplete=True): '''Called by actions that harvest a resource tile''' - if deplete: self.update_list.add(self.tiles[r, c]) - return self.tiles[r, c].harvest(deplete) def is_valid_pos(self, row, col): '''Check if a position is valid''' return 0 <= row < self.config.MAP_SIZE and 0 <= col < self.config.MAP_SIZE + + def make_spawnable(self, row, col, radius=2): + '''Make the area centered around row, col spawnable''' + assert self._repr is None, "Cannot make spawnable after map is generated" + assert radius > 0, "Radius must be positive" + assert self.config.MAP_BORDER < row-radius and self.config.MAP_BORDER < col-radius \ + and row+radius < self.config.MAP_SIZE-self.config.MAP_BORDER \ + and col+radius < self.config.MAP_SIZE-self.config.MAP_BORDER,\ + "Cannot make spawnable near the border" + for r in range(row-radius, row+radius+1): + for c in range(col-radius, col+radius+1): + tile = self.tiles[r, c] + # pylint: disable=protected-access + tile.reset(material.Grass, self.config, self.realm._np_random) + self.habitable_tiles[r, c] = tile.habitable # must be true + + @property + def seize_status(self): + if self.seize_targets is None: + return {} + return { + (r, c): self.tiles[r, c].seize_history[-1] + for r, c in self.seize_targets + if self.tiles[r, c].seize_history + } diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index 9205ef98c..272275b2f 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -1,5 +1,5 @@ -from functools import lru_cache, cached_property - +# pylint: disable=no-member,c-extension-no-member +from functools import lru_cache import numpy as np from nmmo.core.tile import TileState @@ -7,17 +7,33 @@ from nmmo.systems.item import ItemState import nmmo.systems.item as item_system from nmmo.core import action -from nmmo.lib import material, utils +from nmmo.lib import material +import nmmo.lib.cython_helper as chp + +ROW_DELTA = np.array([-1, 1, 0, 0], dtype=np.int64) +COL_DELTA = np.array([0, 0, 1, -1], dtype=np.int64) +EMPTY_TILE = TileState.parse_array( + np.array([0, 0, material.Void.index], dtype=np.int16)) class BasicObs: - def __init__(self, values, id_col): - self.values = values - self.ids = values[:, id_col] + def __init__(self, id_col, obs_dim): + self.values = None + self.ids = None + self.id_col = id_col + self.obs_dim = obs_dim + + def reset(self): + self.values = None + self.ids = None + + def update(self, values): + self.values = values[:self.obs_dim] + self.ids = values[:, self.id_col] - @cached_property + @property def len(self): - return len(self.ids) + return self.ids.shape[0] def id(self, i): return self.ids[i] if i < self.len else None @@ -25,10 +41,14 @@ def id(self, i): def index(self, val): return np.nonzero(self.ids == val)[0][0] if val in self.ids else None - class InventoryObs(BasicObs): - def __init__(self, values, id_col): - super().__init__(values, id_col) + def __init__(self, id_col, obs_dim): + super().__init__(id_col, obs_dim) + self.inv_type = None + self.inv_level = None + + def update(self, values): + super().update(values) self.inv_type = self.values[:,ItemState.State.attr_name_to_col["type_id"]] self.inv_level = self.values[:,ItemState.State.attr_name_to_col["level"]] @@ -36,51 +56,218 @@ def sig(self, item: item_system.Item, level: int): idx = np.nonzero((self.inv_type == item.ITEM_TYPE_ID) & (self.inv_level == level))[0] return idx[0] if len(idx) else None +class GymObs: + keys_to_clear = ["Tile", "Entity", "Inventory", "Market", "Communication"] -class Observation: - def __init__(self, - config, - current_tick: int, - agent_id: int, - task_embedding, - tiles, - entities, - inventory, - market) -> None: + def __init__(self, config, agent_id): + self.config = config + self.agent_id = agent_id + self.values = self._make_empty_obs() + + def reset(self, task_embedding=None): + self.clear() + self.values["Task"][:] = 0 if task_embedding is None else task_embedding + + def clear(self, tick=None): + self.values["CurrentTick"] = tick or 0 + for key in self.keys_to_clear: + if key in self.values: + if key == "Inventory" and not self.config.ITEM_SYSTEM_ENABLED: + continue + if key == "Market" and not self.config.EXCHANGE_SYSTEM_ENABLED: + continue + if key == "Communication" and not self.config.COMMUNICATION_SYSTEM_ENABLED: + continue + self.values[key][:] = 0 + + def _make_empty_obs(self): + num_tile_attributes = TileState.State.num_attributes + num_tile_attributes += 1 if self.config.original["PROVIDE_DEATH_FOG_OBS"] else 0 + gym_obs = { + "CurrentTick": 0, + "AgentId": self.agent_id, + "Task": np.zeros(self.config.TASK_EMBED_DIM, dtype=np.float16), + "Tile": np.zeros((self.config.MAP_N_OBS, num_tile_attributes), dtype=np.int16), + "Entity": np.zeros((self.config.PLAYER_N_OBS, + EntityState.State.num_attributes), dtype=np.int16)} + if self.config.original["ITEM_SYSTEM_ENABLED"]: + gym_obs["Inventory"] = np.zeros((self.config.INVENTORY_N_OBS, + ItemState.State.num_attributes), dtype=np.int16) + if self.config.original["EXCHANGE_SYSTEM_ENABLED"]: + gym_obs["Market"] = np.zeros((self.config.MARKET_N_OBS, + ItemState.State.num_attributes), dtype=np.int16) + if self.config.original["COMMUNICATION_SYSTEM_ENABLED"]: + gym_obs["Communication"] = np.zeros((self.config.COMMUNICATION_N_OBS, + len(EntityState.State.comm_attr_map)), + dtype=np.int16) + return gym_obs + def set_arr_values(self, key, values): + obs_shape = self.values[key].shape + self.values[key][:values.shape[0], :] = values[:, :obs_shape[1]] + + def export(self): + return self.values.copy() # shallow copy + +class ActionTargets: + no_op_keys = ["Direction", "Target", "InventoryItem", "MarketItem"] + all_ones = ["Style", "Price", "Token"] + + def __init__(self, config): + self.config = config + if not self.config.original["PROVIDE_ACTION_TARGETS"]: + return + + self._no_op = 1 if config.original["PROVIDE_NOOP_ACTION_TARGET"] else 0 + self.values = self._make_empty_targets() + self.keys_to_clear = None + self.clear(reset=True) # to set the no-op option to 1, if needed + + def _get_keys_to_clear(self): + keys = [] + if self.config.COMBAT_SYSTEM_ENABLED: + keys.append("Attack") + if self.config.ITEM_SYSTEM_ENABLED: + keys.extend(["Use", "Give", "Destroy"]) + if self.config.EXCHANGE_SYSTEM_ENABLED: + keys.extend(["Sell", "Buy", "GiveGold"]) + if self.config.COMMUNICATION_SYSTEM_ENABLED: + keys.append("Comm") + return keys + + def reset(self): + if not self.config.original["PROVIDE_ACTION_TARGETS"]: + return + self.keys_to_clear = self._get_keys_to_clear() + self.clear(reset=True) + + def clear(self, reset=False): + if not self.config.original["PROVIDE_ACTION_TARGETS"]: + return + for key, mask in self.values.items(): + if reset is True or key in self.keys_to_clear: + for sub_key in mask: + mask[sub_key][:] = 1 if sub_key in self.all_ones else 0 + if self._no_op > 0 and sub_key in self.no_op_keys: + mask[sub_key][-1] = 1 # set the no-op option to 1 + + def _make_empty_targets(self): + masks = {} + masks["Move"] = {"Direction": np.zeros(len(action.Direction.edges), dtype=np.int8)} + if self.config.original["COMBAT_SYSTEM_ENABLED"]: + masks["Attack"] = { + "Style": np.ones(len(action.Style.edges), dtype=np.int8), + "Target": np.zeros(self.config.PLAYER_N_OBS + self._no_op, dtype=np.int8)} + if self.config.original["ITEM_SYSTEM_ENABLED"]: + masks["Use"] = { + "InventoryItem": np.zeros(self.config.INVENTORY_N_OBS + self._no_op, dtype=np.int8)} + masks["Give"] = { + "InventoryItem": np.zeros(self.config.INVENTORY_N_OBS + self._no_op, dtype=np.int8), + "Target": np.zeros(self.config.PLAYER_N_OBS + self._no_op, dtype=np.int8)} + masks["Destroy"] = { + "InventoryItem": np.zeros(self.config.INVENTORY_N_OBS + self._no_op, dtype=np.int8)} + if self.config.original["EXCHANGE_SYSTEM_ENABLED"]: + masks["Sell"] = { + "InventoryItem": np.zeros(self.config.INVENTORY_N_OBS + self._no_op, dtype=np.int8), + "Price": np.ones(self.config.PRICE_N_OBS, dtype=np.int8)} + masks["Buy"] = { + "MarketItem": np.zeros(self.config.MARKET_N_OBS + self._no_op, dtype=np.int8)} + masks["GiveGold"] = { + "Price": np.ones(self.config.PRICE_N_OBS, dtype=np.int8), + "Target": np.zeros(self.config.PLAYER_N_OBS + self._no_op, dtype=np.int8)} + if self.config.original["COMMUNICATION_SYSTEM_ENABLED"]: + masks["Comm"] = {"Token": np.ones(self.config.COMMUNICATION_NUM_TOKENS, dtype=np.int8)} + return masks + +class Observation: + def __init__(self, config, agent_id: int) -> None: self.config = config - self.current_tick = current_tick self.agent_id = agent_id - self.task_embedding = task_embedding + self.agent = None + + self.current_tick = None + self._is_agent_dead = None + self.habitable_tiles = None + self.agent_in_combat = None + self.gym_obs = GymObs(config, agent_id) + self.empty_obs = GymObs(config, agent_id).export() + self.action_targets = ActionTargets(config) + if self.config.original["PROVIDE_ACTION_TARGETS"]: + self.empty_obs["ActionTargets"] = ActionTargets(config).values + + self.vision_radius = self.config.PLAYER_VISION_RADIUS + self.vision_diameter = self.config.PLAYER_VISION_DIAMETER + self._noop_action = 1 if config.original["PROVIDE_NOOP_ACTION_TARGET"] else 0 + self.tiles = None + self.entities = BasicObs(EntityState.State.attr_name_to_col["id"], + config.PLAYER_N_OBS) + self.inventory = InventoryObs(ItemState.State.attr_name_to_col["id"], + config.INVENTORY_N_OBS) \ + if config.original["ITEM_SYSTEM_ENABLED"] else None + self.market = BasicObs(ItemState.State.attr_name_to_col["id"], + config.MARKET_N_OBS) \ + if config.original["EXCHANGE_SYSTEM_ENABLED"] else None + self.comm = BasicObs(EntityState.State.attr_name_to_col["id"], + config.COMMUNICATION_N_OBS) \ + if config.original["COMMUNICATION_SYSTEM_ENABLED"] else None + + def reset(self, habitable_tiles, task_embedding=None): + self.gym_obs.reset(task_embedding) + self.action_targets.reset() + self.habitable_tiles = habitable_tiles + self._is_agent_dead = False + self.agent_in_combat = None + + self.current_tick = 0 + self.tiles = None + self.entities.reset() + if self.config.ITEM_SYSTEM_ENABLED: + self.inventory.reset() + if self.config.EXCHANGE_SYSTEM_ENABLED: + self.market.reset() + if self.config.COMMUNICATION_SYSTEM_ENABLED: + self.comm.reset() + return self - self.tiles = tiles[0:config.MAP_N_OBS] - self.entities = BasicObs(entities[0:config.PLAYER_N_OBS], - EntityState.State.attr_name_to_col["id"]) + @property + def return_dummy_obs(self): + return self._is_agent_dead - self.dummy_obs = self.agent() is None - if config.COMBAT_SYSTEM_ENABLED and not self.dummy_obs: - latest_combat_tick = self.agent().latest_combat_tick - self.agent_in_combat = False if latest_combat_tick == 0 else \ - (current_tick - latest_combat_tick) < config.COMBAT_STATUS_DURATION - else: - self.agent_in_combat = False + def set_agent_dead(self): + self._is_agent_dead = True - if config.ITEM_SYSTEM_ENABLED: - self.inventory = InventoryObs(inventory[0:config.INVENTORY_N_OBS], - ItemState.State.attr_name_to_col["id"]) - else: - assert inventory.size == 0 + def update(self, tick, visible_tiles, visible_entities, + inventory=None, market=None, comm=None): + if self._is_agent_dead: + return - if config.EXCHANGE_SYSTEM_ENABLED: - self.market = BasicObs(market[0:config.MARKET_N_OBS], - ItemState.State.attr_name_to_col["id"]) - else: - assert market.size == 0 + # cache has previous tick's data, so clear it + self.clear_cache() - self._noop_action = 1 if config.PROVIDE_NOOP_ACTION_TARGET else 0 + # update the obs + self.current_tick = tick + self.tiles = visible_tiles # assert len(visible_tiles) == self.config.MAP_N_OBS + self.entities.update(visible_entities) + if self.config.ITEM_SYSTEM_ENABLED: + assert inventory is not None, "Inventory must be provided if ITEM_SYSTEM_ENABLED" + self.inventory.update(inventory) + if self.config.EXCHANGE_SYSTEM_ENABLED: + assert market is not None, "Market must be provided if EXCHANGE_SYSTEM_ENABLED" + self.market.update(market) + if self.config.COMMUNICATION_SYSTEM_ENABLED: + assert comm is not None, "Comm must be provided if COMMUNICATION_SYSTEM_ENABLED" + self.comm.update(comm) + + # update helper vars + self.agent = self.entity(self.agent_id) + if self.config.COMBAT_SYSTEM_ENABLED: + latest_combat_tick = self.agent.latest_combat_tick + self.agent_in_combat = False if latest_combat_tick == 0 else \ + (tick - latest_combat_tick) < self.config.COMBAT_STATUS_DURATION + else: + self.agent_in_combat = False - # pylint: disable=method-cache-max-size-none - @lru_cache(maxsize=None) + @lru_cache def tile(self, r_delta, c_delta): '''Return the array object corresponding to a nearby tile @@ -91,73 +278,39 @@ def tile(self, r_delta, c_delta): Returns: Vector corresponding to the specified tile ''' - agent = self.agent() - center = self.config.PLAYER_VISION_RADIUS - tile_dim = self.config.PLAYER_VISION_DIAMETER - mat_map = self.tiles[:,2].reshape(tile_dim,tile_dim) - new_row = agent.row + r_delta - new_col = agent.col + c_delta - if (0 <= new_row < self.config.MAP_SIZE) & \ - (0 <= new_col < self.config.MAP_SIZE): - return TileState.parse_array([new_row, new_col, mat_map[center+r_delta,center+c_delta]]) - - # return a dummy void tile at (inf, inf) - return TileState.parse_array([np.inf, np.inf, material.Void.index]) - - # pylint: disable=method-cache-max-size-none - @lru_cache(maxsize=None) + idx_1d = (self.vision_radius+r_delta)*self.vision_diameter + self.vision_radius+c_delta + try: + return TileState.parse_array(self.tiles[idx_1d]) + except IndexError: + return EMPTY_TILE + + @lru_cache def entity(self, entity_id): rows = self.entities.values[self.entities.ids == entity_id] if rows.shape[0] == 0: return None return EntityState.parse_array(rows[0]) - # pylint: disable=method-cache-max-size-none - @lru_cache(maxsize=None) - def agent(self): - return self.entity(self.agent_id) - def clear_cache(self): - # clear the cache, so that this object can be garbage collected - self.agent.cache_clear() + # clear the outdated cache self.entity.cache_clear() self.tile.cache_clear() - def get_empty_obs(self): - gym_obs = { - "CurrentTick": self.current_tick, - "AgentId": self.agent_id, - "Task": self.task_embedding, - "Tile": None, # np.zeros((self.config.MAP_N_OBS, self.tiles.shape[1])), - "Entity": np.zeros((self.config.PLAYER_N_OBS, - self.entities.values.shape[1]), dtype=np.int16)} - if self.config.ITEM_SYSTEM_ENABLED: - gym_obs["Inventory"] = np.zeros((self.config.INVENTORY_N_OBS, - self.inventory.values.shape[1]), dtype=np.int16) - if self.config.EXCHANGE_SYSTEM_ENABLED: - gym_obs["Market"] = np.zeros((self.config.MARKET_N_OBS, - self.market.values.shape[1]), dtype=np.int16) - return gym_obs - def to_gym(self): '''Convert the observation to a format that can be used by OpenAI Gym''' - gym_obs = self.get_empty_obs() - if self.dummy_obs: - # return empty obs for the dead agents - gym_obs['Tile'] = np.zeros((self.config.MAP_N_OBS, self.tiles.shape[1]), dtype=np.int16) - if self.config.PROVIDE_ACTION_TARGETS: - gym_obs["ActionTargets"] = self._make_action_targets() - return gym_obs - + if self.return_dummy_obs: + return self.empty_obs + self.gym_obs.clear(self.current_tick) # NOTE: assume that all len(self.tiles) == self.config.MAP_N_OBS - gym_obs['Tile'] = self.tiles - gym_obs['Entity'][:self.entities.values.shape[0],:] = self.entities.values - + self.gym_obs.set_arr_values('Tile', self.tiles) + self.gym_obs.set_arr_values('Entity', self.entities.values) if self.config.ITEM_SYSTEM_ENABLED: - gym_obs["Inventory"][:self.inventory.values.shape[0],:] = self.inventory.values - + self.gym_obs.set_arr_values('Inventory', self.inventory.values) if self.config.EXCHANGE_SYSTEM_ENABLED: - gym_obs["Market"][:self.market.values.shape[0],:] = self.market.values + self.gym_obs.set_arr_values('Market', self.market.values) + if self.config.COMMUNICATION_SYSTEM_ENABLED: + self.gym_obs.set_arr_values('Communication', self.comm.values) + gym_obs = self.gym_obs.export() if self.config.PROVIDE_ACTION_TARGETS: gym_obs["ActionTargets"] = self._make_action_targets() @@ -165,112 +318,81 @@ def to_gym(self): return gym_obs def _make_action_targets(self): - masks = {} - masks["Move"] = { - "Direction": self._make_move_mask() - } - + self.action_targets.clear() + masks = self.action_targets.values + self._make_move_mask(masks["Move"]) if self.config.COMBAT_SYSTEM_ENABLED: # Test below. see tests/core/test_observation_tile.py, test_action_target_consts() # assert len(action.Style.edges) == 3 - masks["Attack"] = { - "Style": np.ones(3, dtype=np.int8), - "Target": self._make_attack_mask() - } - + self._make_attack_mask(masks["Attack"]) if self.config.ITEM_SYSTEM_ENABLED: - masks["Use"] = { - "InventoryItem": self._make_use_mask() - } - masks["Give"] = { - "InventoryItem": self._make_sell_mask(), - "Target": self._make_give_target_mask() - } - masks["Destroy"] = { - "InventoryItem": self._make_destroy_item_mask() - } - + self._make_use_mask(masks["Use"]) + self._make_destroy_item_mask(masks["Destroy"]) + self._make_give_mask(masks["Give"]) if self.config.EXCHANGE_SYSTEM_ENABLED: - masks["Sell"] = { - "InventoryItem": self._make_sell_mask(), - "Price": np.ones(self.config.PRICE_N_OBS, dtype=np.int8) - } - masks["Buy"] = { - "MarketItem": self._make_buy_mask() - } - masks["GiveGold"] = { - "Price": self._make_give_gold_mask(), # reusing Price - "Target": self._make_give_gold_target_mask() - } - - if self.config.COMMUNICATION_SYSTEM_ENABLED: - masks["Comm"] = { - "Token":np.ones(self.config.COMMUNICATION_NUM_TOKENS, dtype=np.int8) - } - + self._make_sell_mask(masks["Sell"]) + self._make_give_gold_mask(masks["GiveGold"]) + self._make_buy_mask(masks["Buy"]) return masks - def _make_move_mask(self): - if self.dummy_obs: - mask = np.zeros(len(action.Direction.edges), dtype=np.int8) - mask[-1] = 1 # for no-op - return mask - - # pylint: disable=not-an-iterable - return np.array([self.tile(*d.delta).material_id in material.Habitable.indices - for d in action.Direction.edges], dtype=np.int8) + def _make_move_mask(self, move_mask, use_cython=None): + use_cython = use_cython or self.config.USE_CYTHON + if use_cython: + chp.make_move_mask(move_mask["Direction"], self.habitable_tiles, + self.agent.row, self.agent.col, ROW_DELTA, COL_DELTA) + return + move_mask["Direction"][:4] = self.habitable_tiles[self.agent.row+ROW_DELTA, + self.agent.col+COL_DELTA] + + def _make_attack_mask(self, attack_mask, use_cython=None): + if self.config.COMBAT_ALLOW_FLEXIBLE_STYLE: + # NOTE: if the style is flexible, then the reach of all styles should be the same + assert self.config.COMBAT_MELEE_REACH == self.config.COMBAT_RANGE_REACH + assert self.config.COMBAT_MELEE_REACH == self.config.COMBAT_MAGE_REACH + assert self.config.COMBAT_RANGE_REACH == self.config.COMBAT_MAGE_REACH + + if not self.config.COMBAT_SYSTEM_ENABLED or self.return_dummy_obs: + return + + use_cython = use_cython or self.config.USE_CYTHON + if use_cython: + chp.make_attack_mask( + attack_mask["Target"], self.entities.values, EntityState.State.attr_name_to_col, + {"agent_id": self.agent_id, "row": self.agent.row, "col": self.agent.col, + "immunity": self.config.COMBAT_SPAWN_IMMUNITY, + "attack_range": self.config.COMBAT_RANGE_REACH}) + return - def _make_attack_mask(self): - # NOTE: Currently, all attacks have the same range - # if we choose to make ranges different, the masks - # should be differently generated by attack styles - assert self.config.COMBAT_MELEE_REACH == self.config.COMBAT_RANGE_REACH - assert self.config.COMBAT_MELEE_REACH == self.config.COMBAT_MAGE_REACH - assert self.config.COMBAT_RANGE_REACH == self.config.COMBAT_MAGE_REACH - - attack_mask = np.zeros(self.config.PLAYER_N_OBS + self._noop_action, dtype=np.int8) - if self.config.PROVIDE_NOOP_ACTION_TARGET: - attack_mask[-1] = 1 + # allow friendly fire but no self shooting + targetable = self.entities.ids != self.agent.id - if self.dummy_obs: - return attack_mask - - agent = self.agent() - within_range = np.maximum( # calculating the l-inf dist - np.abs(self.entities.values[:,EntityState.State.attr_name_to_col["row"]] - agent.row), - np.abs(self.entities.values[:,EntityState.State.attr_name_to_col["col"]] - agent.col) - ) <= self.config.COMBAT_MELEE_REACH + # NOTE: this is a hack. Only target "normal" agents, which has npc_type of 0, 1, 2, 3 + # For example, immortal "scout" agents has npc_type of -1 + targetable &= self.entities.values[:,EntityState.State.attr_name_to_col["npc_type"]] >= 0 immunity = self.config.COMBAT_SPAWN_IMMUNITY - if agent.time_alive < immunity: + if self.agent.time_alive < immunity: # NOTE: CANNOT attack players during immunity, thus mask should set to 0 - no_spawn_immunity = ~(self.entities.ids > 0) # ids > 0 equals entity.is_player - else: - no_spawn_immunity = np.ones(self.entities.len, dtype=bool) + targetable &= ~(self.entities.ids > 0) # ids > 0 equals entity.is_player - # allow friendly fire but no self shooting - not_me = self.entities.ids != agent.id + within_range = np.maximum( # calculating the l-inf dist + np.abs(self.entities.values[:,EntityState.State.attr_name_to_col["row"]] - self.agent.row), + np.abs(self.entities.values[:,EntityState.State.attr_name_to_col["col"]] - self.agent.col) + ) <= self.config.COMBAT_MELEE_REACH - attack_mask[:self.entities.len] = within_range & not_me & no_spawn_immunity - if sum(attack_mask[:self.entities.len]) > 0: + attack_mask["Target"][:self.entities.len] = targetable & within_range + if np.count_nonzero(attack_mask["Target"][:self.entities.len]): # Mask the no-op option, since there should be at least one allowed move # NOTE: this will make agents always attack if there is a valid target - attack_mask[-1] = 0 - - return attack_mask + attack_mask["Target"][-1] = 0 - def _make_use_mask(self): + def _make_use_mask(self, use_mask): # empty inventory -- nothing to use - use_mask = np.zeros(self.config.INVENTORY_N_OBS + self._noop_action, dtype=np.int8) - if self.config.PROVIDE_NOOP_ACTION_TARGET: - use_mask[-1] = 1 - if not (self.config.ITEM_SYSTEM_ENABLED and self.inventory.len > 0)\ - or self.dummy_obs or self.agent_in_combat: - return use_mask + or self.return_dummy_obs or self.agent_in_combat: + return item_skill = self._item_skill() - not_listed = self.inventory.values[:,ItemState.State.attr_name_to_col["listed_price"]] == 0 item_type = self.inventory.values[:,ItemState.State.attr_name_to_col["type_id"]] item_level = self.inventory.values[:,ItemState.State.attr_name_to_col["level"]] @@ -281,12 +403,10 @@ def _make_use_mask(self): item_type = np.tile(np.transpose(np.atleast_2d(item_type)), (1,len(item_skill))) item_level = np.tile(np.transpose(np.atleast_2d(item_level)), (1,len(item_skill))) level_satisfied = np.any((item_type==type_flt) & (item_level<=level_flt), axis=1) - - use_mask[:self.inventory.len] = not_listed & level_satisfied - return use_mask + use_mask["InventoryItem"][:self.inventory.len] = not_listed & level_satisfied def _item_skill(self): - agent = self.agent() + agent = self.agent # the minimum agent level is 1 level = max(1, agent.melee_level, agent.range_level, agent.mage_level, @@ -311,110 +431,81 @@ def _item_skill(self): item_system.Potion.ITEM_TYPE_ID: level } - def _make_destroy_item_mask(self): - destroy_mask = np.zeros(self.config.INVENTORY_N_OBS + self._noop_action, dtype=np.int8) - if self.config.PROVIDE_NOOP_ACTION_TARGET: - destroy_mask[-1] = 1 - + def _make_destroy_item_mask(self, destroy_mask): # empty inventory -- nothing to destroy if not (self.config.ITEM_SYSTEM_ENABLED and self.inventory.len > 0)\ - or self.dummy_obs or self.agent_in_combat: - return destroy_mask - + or self.return_dummy_obs or self.agent_in_combat: + return # not equipped items in the inventory can be destroyed not_equipped = self.inventory.values[:,ItemState.State.attr_name_to_col["equipped"]] == 0 + destroy_mask["InventoryItem"][:self.inventory.len] = not_equipped - destroy_mask[:self.inventory.len] = not_equipped - return destroy_mask - - def _make_give_target_mask(self): - give_mask = np.zeros(self.config.PLAYER_N_OBS + self._noop_action, dtype=np.int8) - if self.config.PROVIDE_NOOP_ACTION_TARGET: - give_mask[-1] = 1 - - if not self.config.ITEM_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat\ + def _make_give_mask(self, give_mask): + if not self.config.ITEM_SYSTEM_ENABLED or self.return_dummy_obs or self.agent_in_combat\ or self.inventory.len == 0: - return give_mask + return - agent = self.agent() - entities_pos = self.entities.values[:,[EntityState.State.attr_name_to_col["row"], - EntityState.State.attr_name_to_col["col"]]] - same_tile = utils.linf(entities_pos, (agent.row, agent.col)) == 0 - not_me = self.entities.ids != self.agent_id - player = (self.entities.values[:,EntityState.State.attr_name_to_col["npc_type"]] == 0) - - give_mask[:self.entities.len] = same_tile & player & not_me - return give_mask - - def _make_give_gold_target_mask(self): - give_mask = np.zeros(self.config.PLAYER_N_OBS + self._noop_action, dtype=np.int8) - if self.config.PROVIDE_NOOP_ACTION_TARGET: - give_mask[-1] = 1 + # InventoryItem + not_equipped = self.inventory.values[:,ItemState.State.attr_name_to_col["equipped"]] == 0 + not_listed = self.inventory.values[:,ItemState.State.attr_name_to_col["listed_price"]] == 0 + give_mask["InventoryItem"][:self.inventory.len] = not_equipped & not_listed - if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat\ - or int(self.agent().gold) == 0: - return give_mask + # Give Target + # NOTE: Allow give to entities within visual range. So no distance check is needed + # entities_pos = self.entities.values[:,[EntityState.State.attr_name_to_col["row"], + # EntityState.State.attr_name_to_col["col"]]] + # same_tile = utils.linf(entities_pos, (self.agent.row, self.agent.col)) == 0 - agent = self.agent() - entities_pos = self.entities.values[:,[EntityState.State.attr_name_to_col["row"], - EntityState.State.attr_name_to_col["col"]]] - same_tile = utils.linf(entities_pos, (agent.row, agent.col)) == 0 not_me = self.entities.ids != self.agent_id player = (self.entities.values[:,EntityState.State.attr_name_to_col["npc_type"]] == 0) + give_mask["Target"][:self.entities.len] = player & not_me - give_mask[:self.entities.len] = same_tile & player & not_me - return give_mask - - def _make_give_gold_mask(self): - mask = np.zeros(self.config.PRICE_N_OBS, dtype=np.int8) - mask[0] = 1 # To avoid all-0 masks. If the agent has no gold, this action will be ignored. - if self.dummy_obs or self.agent_in_combat: - return mask - - gold = int(self.agent().gold) - mask[:gold] = 1 # NOTE that action.Price starts from Discrete_1 - return mask - - def _make_sell_mask(self): - sell_mask = np.zeros(self.config.INVENTORY_N_OBS + self._noop_action, dtype=np.int8) - if self.config.PROVIDE_NOOP_ACTION_TARGET: - sell_mask[-1] = 1 - + def _make_sell_mask(self, sell_mask): # empty inventory -- nothing to sell if not (self.config.EXCHANGE_SYSTEM_ENABLED and self.inventory.len > 0) \ - or self.dummy_obs or self.agent_in_combat: - return sell_mask + or self.return_dummy_obs or self.agent_in_combat: + return not_equipped = self.inventory.values[:,ItemState.State.attr_name_to_col["equipped"]] == 0 not_listed = self.inventory.values[:,ItemState.State.attr_name_to_col["listed_price"]] == 0 + sell_mask["InventoryItem"][:self.inventory.len] = not_equipped & not_listed + + def _make_give_gold_mask(self, give_mask): + if not self.config.EXCHANGE_SYSTEM_ENABLED or self.return_dummy_obs or self.agent_in_combat\ + or int(self.agent.gold) <= 2: # NOTE: this is a hack to reduce mask computation + return + + # GiveGold Target + # NOTE: Allow give to entities within visual range. So no distance check is needed + # entities_pos = self.entities.values[:,[EntityState.State.attr_name_to_col["row"], + # EntityState.State.attr_name_to_col["col"]]] + # same_tile = utils.linf(entities_pos, (self.agent.row, self.agent.col)) == 0 + not_me = self.entities.ids != self.agent_id + player = (self.entities.values[:,EntityState.State.attr_name_to_col["npc_type"]] == 0) + give_mask["Target"][:self.entities.len] = player & not_me - sell_mask[:self.inventory.len] = not_equipped & not_listed - return sell_mask - - def _make_buy_mask(self): - buy_mask = np.zeros(self.config.MARKET_N_OBS + self._noop_action, dtype=np.int8) - if self.config.PROVIDE_NOOP_ACTION_TARGET: - buy_mask[-1] = 1 + # GiveGold Amount (Price) + gold = int(self.agent.gold) + give_mask["Price"][gold:] = 0 # NOTE: Price masks starts with all ones - if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat \ + def _make_buy_mask(self, buy_mask): + if not self.config.EXCHANGE_SYSTEM_ENABLED or self.return_dummy_obs or self.agent_in_combat \ or self.market.len == 0: - return buy_mask + return - agent = self.agent() market_items = self.market.values not_mine = market_items[:,ItemState.State.attr_name_to_col["owner_id"]] != self.agent_id - # if the inventory is full, one can only buy existing ammo stack # otherwise, one can buy anything owned by other, having enough money if self.inventory.len >= self.config.ITEM_INVENTORY_CAPACITY: exist_ammo_listings = self._existing_ammo_listings() if not np.any(exist_ammo_listings): - return buy_mask + return not_mine &= exist_ammo_listings - enough_gold = market_items[:,ItemState.State.attr_name_to_col["listed_price"]] <= agent.gold - buy_mask[:self.market.len] = not_mine & enough_gold - return buy_mask + enough_gold = market_items[:,ItemState.State.attr_name_to_col["listed_price"]] \ + <= self.agent.gold + buy_mask["MarketItem"][:self.market.len] = not_mine & enough_gold def _existing_ammo_listings(self): sig_col = (ItemState.State.attr_name_to_col["type_id"], diff --git a/nmmo/core/realm.py b/nmmo/core/realm.py index eb8723ff1..a1291af1b 100644 --- a/nmmo/core/realm.py +++ b/nmmo/core/realm.py @@ -1,19 +1,18 @@ from __future__ import annotations - -import logging from collections import defaultdict from typing import Dict +import numpy as np import nmmo -from nmmo.core.log_helper import LogHelper from nmmo.core.map import Map from nmmo.core.tile import TileState -from nmmo.core.action import Action, Buy +from nmmo.core.action import Action, Buy, Comm from nmmo.entity.entity import EntityState -from nmmo.entity.entity_manager import NPCManager, PlayerManager +from nmmo.entity.entity_manager import PlayerManager +from nmmo.entity.npc_manager import NPCManager from nmmo.datastore.numpy_datastore import NumpyDatastore from nmmo.systems.exchange import Exchange -from nmmo.systems.item import Item, ItemState +from nmmo.systems.item import ItemState from nmmo.lib.event_log import EventLogger, EventState from nmmo.render.replay_helper import ReplayHelper @@ -37,22 +36,17 @@ def __init__(self, config, np_random): Action.hook(config) - # Generate maps if they do not exist - # NOTE: Map generation interferes with determinism. - # To ensure determinism, provide seed to env.reset() - config.MAP_GENERATOR(config).generate_all_maps(self._np_random) - self.datastore = NumpyDatastore() for s in [TileState, EntityState, ItemState, EventState]: self.datastore.register_object_type(s._name, s.State.num_attributes) self.tick = None # to use as a "reset" checker - self.exchange = None # Load the world file self.map = Map(config, self, self._np_random) + self.fog_map = np.zeros((config.MAP_SIZE, config.MAP_SIZE), dtype=np.float16) - self.log_helper = LogHelper.create(self) + # Event logger self.event_log = EventLogger(self) # Entity handlers @@ -62,32 +56,37 @@ def __init__(self, config, np_random): # Global item registry self.items = {} + # Global item exchange + self.exchange = Exchange(self) + # Replay helper self._replay_helper = None # Initialize actions nmmo.Action.init(config) - def reset(self, np_random, map_id: int = None): - """Reset the environment and load the specified map - - Args: - idx: Map index to load - """ + def reset(self, np_random, map_dict, + custom_spawn=False, + seize_targets=None, + delete_dead_player=True): + """Reset the sub-systems and load the provided map""" self._np_random = np_random - self.log_helper.reset() - self.event_log.reset() - - map_id = map_id or self._np_random.integers(self.config.MAP_N) + 1 - self.map.reset(map_id, self._np_random) self.tick = 0 + self.update_fog_map(reset=True) + #self.event_log.reset() + self.items.clear() + self.exchange.reset() + if self._replay_helper is not None: + self._replay_helper.reset() + + # Load the map np array into the map, tiles and reset + self.map.reset(map_dict, self._np_random, seize_targets) # EntityState and ItemState tables must be empty after players/npcs.reset() - self.players.reset(self._np_random) + self.players.reset(self._np_random, delete_dead_player) self.npcs.reset(self._np_random) - assert EntityState.State.table(self.datastore).is_empty(), \ - "EntityState table is not empty" - # TODO: fix the item leak, then uncomment the below -- print out the table? + # assert EntityState.State.table(self.datastore).is_empty(), \ + # "EntityState table is not empty" # assert ItemState.State.table(self.datastore).is_empty(), \ # "ItemState table is not empty" @@ -95,18 +94,12 @@ def reset(self, np_random, map_id: int = None): EntityState.State.table(self.datastore).reset() ItemState.State.table(self.datastore).reset() - self.players.spawn() - self.npcs.spawn() + self.event_log.reset() # reset this last for debugging - # Global item exchange - self.exchange = Exchange(self) - - # Global item registry - Item.INSTANCE_ID = 0 - self.items = {} - - if self._replay_helper is not None: - self._replay_helper.reset() + if custom_spawn is False: + # NOTE: custom spawning npcs and agents can be done outside, after reset() + self.npcs.default_spawn() + self.players.spawn() def packet(self): """Client packet""" @@ -122,9 +115,13 @@ def packet(self): @property def num_players(self): - """Number of player agents""" + """Number of alive player agents""" return len(self.players.entities) + @property + def seize_status(self): + return self.map.seize_status + def entity(self, ent_id): e = self.entity_or_none(ent_id) assert e is not None, f"Entity {ent_id} does not exist" @@ -150,7 +147,7 @@ def step(self, actions): dead: List of dead agents """ # Prioritize actions - npc_actions = self.npcs.actions(self) + npc_actions = self.npcs.actions() merged = defaultdict(list) prioritized(actions, merged) prioritized(npc_actions, merged) @@ -179,36 +176,52 @@ def step(self, actions): # ent_id, (atn, args) = merged[priority][0] for ent_id, (atn, args) in merged[priority]: ent = self.entity(ent_id) - if ent.alive: + if (ent.alive and not ent.status.frozen) or \ + (ent.is_recon and priority == Comm.priority): # recons can always comm atn.call(self, ent, *args) - dead = self.players.cull() - self.npcs.cull() + dead_players = self.players.cull() + dead_npcs = self.npcs.cull() - # Update map + self.tick += 1 + + # These require the updated tick self.map.step() - self.exchange.step(self.tick) - self.log_helper.update(dead) + self.update_fog_map() + self.exchange.step() self.event_log.update() if self._replay_helper is not None: self._replay_helper.update() - self.tick += 1 - - return dead - - def log_milestone(self, category: str, value: float, message: str = None, tags: Dict = None): - self.log_helper.log_milestone(category, value) - self.log_helper.log_event(category, value) - - if self.config.LOG_VERBOSE: - # TODO: more general handling of tags, if necessary - if tags and 'player_id' in tags: - logging.info("Milestone (Player %d): %s %s %s", tags['player_id'], category, value, message) - else: - logging.info("Milestone: %s %s %s", category, value, message) + return dead_players, dead_npcs + + def update_fog_map(self, reset=False): + fog_start_tick = self.config.DEATH_FOG_ONSET + if fog_start_tick is None: + return + + fog_speed = self.config.DEATH_FOG_SPEED + center = self.config.MAP_SIZE // 2 + safe = self.config.DEATH_FOG_FINAL_SIZE + + if reset: + dist = -self.config.MAP_BORDER + for i in range(center): + l, r = i, self.config.MAP_SIZE - i + # positive value represents the poison strength + # negative value represents the shortest distance to poison area + self.fog_map[l:r, l:r] = -dist + dist += 1 + # mark the safe area + self.fog_map[center-safe:center+safe+1, center-safe:center+safe+1] = -self.config.MAP_SIZE + return + + # consider the map border so that the fog can hit the border at fog_start_tick + if self.tick >= fog_start_tick: + self.fog_map += fog_speed + # mark the safe area + self.fog_map[center-safe:center+safe+1, center-safe:center+safe+1] = -self.config.MAP_SIZE def record_replay(self, replay_helper: ReplayHelper) -> ReplayHelper: self._replay_helper = replay_helper self._replay_helper.set_realm(self) - return replay_helper diff --git a/nmmo/core/terrain.py b/nmmo/core/terrain.py index 4aa983d8f..76d7accdc 100644 --- a/nmmo/core/terrain.py +++ b/nmmo/core/terrain.py @@ -6,7 +6,7 @@ from imageio.v2 import imread, imsave from scipy import stats -from nmmo import material +from nmmo.lib import material, seeding, utils def sharp(noise): @@ -24,9 +24,9 @@ def render(mats, lookup, path): @staticmethod def fractal(terrain, path): - '''Render raw noise fractal to png''' - frac = (256*terrain).astype(np.uint8) - imsave(path, frac) + '''Save fractal to both png and npy''' + imsave(os.path.join(path, 'fractal.png'), (256*terrain).astype(np.uint8)) + np.save(os.path.join(path, 'fractal.npy'), terrain.astype(np.float16)) @staticmethod def as_numpy(mats, path): @@ -73,10 +73,7 @@ def generate_terrain(config, map_id, interpolaters): val[:, :, idx] = vec_noise.snoise2(seed*size + freq*X, idx*size + freq*Y) #Compute L1 distance - x = np.abs(np.arange(size) - size//2) - X, Y = np.meshgrid(x, x) - data = np.stack((X, Y), -1) - l1 = np.max(abs(data), -1) + l1 = utils.l1_map(size) #Interpolation Weights rrange = np.linspace(-1, 1, 2*octaves-1) @@ -118,47 +115,71 @@ def generate_terrain(config, map_id, interpolaters): val = std * val / np.std(val) val = 0.5 + np.clip(val, -1, 1)/2 - #Threshold to materials - matl = np.zeros((size, size), dtype=object) - for y in range(size): - for x in range(size): - v = val[y, x] - if v <= config.TERRAIN_WATER: - mat = Terrain.WATER - elif v <= config.TERRAIN_GRASS: - mat = Terrain.GRASS - elif v <= config.TERRAIN_FOILAGE: - mat = Terrain.FOILAGE - else: - mat = Terrain.STONE - matl[y, x] = mat - - # Void and grass border - matl[l1 > size/2 - border] = Terrain.VOID - matl[l1 == size//2 - border] = Terrain.GRASS - - edge = l1 == size//2 - border - 1 - stone = (matl == Terrain.STONE) | (matl == Terrain.WATER) - matl[edge & stone] = Terrain.FOILAGE + # Transform fractal noise to terrain + matl = fractal_to_material(config, val) + matl = process_map_border(config, matl, l1) return val, matl, interpolaters -def place_fish(tiles, np_random): - placed = False - allow = {Terrain.GRASS} - +def fractal_to_material(config, fractal, all_grass=False): + size = config.MAP_SIZE + matl_map = np.zeros((size, size), dtype=np.int16) + for y in range(size): + for x in range(size): + if all_grass: + matl_map[y, x] = Terrain.GRASS + continue + + v = fractal[y, x] + if v <= config.TERRAIN_WATER: + mat = Terrain.WATER + elif v <= config.TERRAIN_GRASS: + mat = Terrain.GRASS + elif v <= config.TERRAIN_FOILAGE: + mat = Terrain.FOILAGE + else: + mat = Terrain.STONE + matl_map[y, x] = mat + return matl_map + +def process_map_border(config, matl_map, l1=None): + size = config.MAP_SIZE + border = config.MAP_BORDER + if l1 is None: + l1 = utils.l1_map(size) + + # Void and grass border + matl_map[l1 > size/2 - border] = material.Void.index + matl_map[l1 == size//2 - border] = material.Grass.index + edge = l1 == size//2 - border - 1 + stone = (matl_map == material.Stone.index) | (matl_map == material.Water.index) + matl_map[edge & stone] = material.Foilage.index + return matl_map + +def place_fish(tiles, mmin, mmax, np_random, num_fish): + placed = 0 + + # if USE_CYTHON: + # water_loc = chp.tile_where(tiles, Terrain.WATER, mmin, mmax) + # else: water_loc = np.where(tiles == Terrain.WATER) - water_loc = list(zip(water_loc[0], water_loc[1])) + water_loc = [(r, c) for r, c in zip(water_loc[0], water_loc[1]) + if mmin < r < mmax and mmin < c < mmax] + if len(water_loc) < num_fish: + raise RuntimeError('Not enough water tiles to place fish.') + np_random.shuffle(water_loc) + allow = {Terrain.GRASS} # Fish should be placed adjacent to grass for r, c in water_loc: if tiles[r-1, c] in allow or tiles[r+1, c] in allow or \ tiles[r, c-1] in allow or tiles[r, c+1] in allow: tiles[r, c] = Terrain.FISH - placed = True + placed += 1 + if placed == num_fish: break - if not placed: + if placed < num_fish: raise RuntimeError('Could not find the water tile to place fish.') def uniform(config, tiles, mat, mmin, mmax, np_random): @@ -206,7 +227,34 @@ def spawn_profession_resources(config, tiles, np_random=None): for _ in range(config.PROGRESSION_SPAWN_UNIFORMS): uniform(config, tiles, Terrain.HERB, mmin, mmax, np_random) - place_fish(tiles, np_random) + place_fish(tiles, mmin, mmax, np_random, + config.PROGRESSION_SPAWN_UNIFORMS) + +def try_add_tile(map_tiles, row, col, tile_to_add): + if map_tiles[row, col] == Terrain.GRASS: + map_tiles[row, col] = tile_to_add + return True + return False + +def scatter_extra_resources(config, tiles, np_random=None, + density_factor=6): + if np_random is None: + np_random = np.random + center = config.MAP_CENTER + mmin = config.MAP_BORDER + 1 + mmax = config.MAP_SIZE - config.MAP_BORDER - 1 + + water_to_add, water_added = (center//density_factor)**2, 0 + food_to_add, food_added = (center//density_factor)**2, 0 + while True: + if water_added >= water_to_add and food_added >= food_to_add: + break + r, c = tuple(np_random.integers(mmin, mmax, size=(2,))) + if water_added < water_to_add: + water_added += 1 if try_add_tile(tiles, r, c, Terrain.WATER) else 0 + if food_added < food_to_add: + food_added += 1 if try_add_tile(tiles, r, c, Terrain.FOILAGE) else 0 + class MapGenerator: '''Procedural map generation''' @@ -227,12 +275,13 @@ def load_textures(self): setattr(Terrain, key.upper(), mat.index) self.textures = lookup - def generate_all_maps(self, np_random=None): - '''Generates NMAPS maps according to generate_map + def generate_all_maps(self, seed=None): + '''Generates MAP_N maps according to generate_map Provides additional utilities for saving to .npy and rendering png previews''' config = self.config + np_random, _ = seeding.np_random(seed) #Only generate if maps are not cached path_maps = os.path.join(config.PATH_CWD, config.PATH_MAPS) @@ -258,10 +307,10 @@ def generate_all_maps(self, np_random=None): #Save/render Save.as_numpy(tiles, path) + Save.fractal(terrain, path) if config.MAP_GENERATE_PREVIEWS: b = config.MAP_BORDER tiles = [e[b:-b+1] for e in tiles][b:-b+1] - Save.fractal(terrain, path+'/fractal.png') Save.render(tiles, self.textures, path+'/map.png') def generate_map(self, idx, np_random=None): diff --git a/nmmo/core/tile.py b/nmmo/core/tile.py index 9d9cb33ca..69852154d 100644 --- a/nmmo/core/tile.py +++ b/nmmo/core/tile.py @@ -1,7 +1,7 @@ from types import SimpleNamespace from nmmo.datastore.serialized import SerializedState -from nmmo.lib import material +from nmmo.lib import material, event_code # pylint: disable=no-member,protected-access TileState = SerializedState.subclass( @@ -40,9 +40,16 @@ def __init__(self, realm, r, c, np_random): self.state = None self.material = None self.depleted = False - self.tex = None - self.entities = {} + self.seize_history = [] + + @property + def occupied(self): + # NOTE: ONLY players consider whether the tile is occupied or not + # NPCs can move into occupied tiles. + # Surprisingly, this has huge effect on training, so be careful. + # Tried this -- "sum(1 for ent_id in self.entities if ent_id > 0) > 0" + return len(self.entities) > 0 @property def repr(self): @@ -64,16 +71,26 @@ def impassible(self): def void(self): return self.material == material.Void + @property + def tex(self): + return self.state.tex + def reset(self, mat, config, np_random): self._np_random = np_random # reset the RNG - self.state = mat(config) + self.entities = {} + self.seize_history.clear() self.material = mat(config) + self._respawn() + + def set_depleted(self): + self.depleted = True + self.state = self.material.deplete self.material_id.update(self.state.index) + def _respawn(self): self.depleted = False - self.tex = self.material.tex - - self.entities = {} + self.state = self.material + self.material_id.update(self.state.index) def add_entity(self, ent): assert ent.ent_id not in self.entities @@ -81,23 +98,31 @@ def add_entity(self, ent): def remove_entity(self, ent_id): assert ent_id in self.entities - del self.entities[ent_id] + self.entities.pop(ent_id) def step(self): - if not self.depleted or self._np_random.random() > self.material.respawn: + if not self.depleted or self.material.respawn == 0: return - - self.depleted = False - self.state = self.material - self.material_id.update(self.state.index) + if self._np_random.random() < self.material.respawn: + self._respawn() def harvest(self, deplete): assert not self.depleted, f'{self.state} is depleted' assert self.state in material.Harvestable, f'{self.state} not harvestable' - if deplete: - self.depleted = True - self.state = self.material.deplete(self.config) - self.material_id.update(self.state.index) - + self.set_depleted() return self.material.harvest() + + def update_seize(self): + if len(self.entities) != 1: # only one entity can seize a tile + return + ent_id, entity = list(self.entities.items())[0] + if ent_id < 0: # not counting npcs + return + team_members = entity.my_task.assignee # NOTE: only one task per player + if self.seize_history and self.seize_history[-1][0] in team_members: + # no need to add another entry if the last entry is from the same team (incl. self) + return + self.seize_history.append((ent_id, self.realm.tick)) + if self.realm.event_log: + self.realm.event_log.record(event_code.EventCode.SEIZE_TILE, entity, tile=self.pos) diff --git a/nmmo/datastore/numpy_datastore.py b/nmmo/datastore/numpy_datastore.py index 2bced2d46..52f6ab55e 100644 --- a/nmmo/datastore/numpy_datastore.py +++ b/nmmo/datastore/numpy_datastore.py @@ -32,8 +32,11 @@ def where_eq(self, col: int, value): def where_neq(self, col: int, value): return self._data[self._data[:,col] != value] + def where_gt(self, col: int, value): + return self._data[self._data[:,col] > value] + def where_in(self, col: int, values: List): - return self._data[np.isin(self._data[:,col], values)] + return self._data[np.in1d(self._data[:,col], values)] def window(self, row_idx: int, col_idx: int, row: int, col: int, radius: int): return self._data[( @@ -60,7 +63,7 @@ def _expand(self, max_rows: int): self._data = data def is_empty(self) -> bool: - all_data_zero = np.sum(self._data)==0 + all_data_zero = np.all(self._data == 0) # 0th row is reserved as padding, so # of free ids is _max_rows-1 all_id_free = len(self._id_allocator.free) == self._max_rows-1 return all_data_zero and all_id_free diff --git a/nmmo/datastore/serialized.py b/nmmo/datastore/serialized.py index a6201ba6a..6c91e70bf 100644 --- a/nmmo/datastore/serialized.py +++ b/nmmo/datastore/serialized.py @@ -1,3 +1,4 @@ +# pylint: disable=bare-except,c-extension-no-member from __future__ import annotations from ast import Tuple @@ -5,6 +6,11 @@ from types import SimpleNamespace from typing import Dict, List from nmmo.datastore.datastore import Datastore, DatastoreRecord +try: + import nmmo.lib.cython_helper as chp + USE_CYTHON = True +except: + USE_CYTHON = False """ This code defines classes for serializing and deserializing data @@ -106,7 +112,7 @@ def __init__(self, datastore: Datastore, SerializedAttribute(attr, self.datastore_record, col, *limits.get(attr, (-math.inf, math.inf)))) except Exception as exc: - raise RuntimeError('Failed to set attribute' + attr) from exc + raise RuntimeError('Failed to set attribute "' + attr + '"') from exc @classmethod def parse_array(cls, data) -> SimpleNamespace: @@ -115,6 +121,10 @@ def parse_array(cls, data) -> SimpleNamespace: # data array. assert len(data) == cls.State.num_attributes, \ f"Expected {cls.State.num_attributes} attributes, got {len(data)}" + + if USE_CYTHON: + return chp.parse_array(data, cls.State.attr_name_to_col) + return SimpleNamespace(**{ attr: data[col] for attr, col in cls.State.attr_name_to_col.items() }) diff --git a/nmmo/entity/entity.py b/nmmo/entity/entity.py index 41620f442..84ede6eba 100644 --- a/nmmo/entity/entity.py +++ b/nmmo/entity/entity.py @@ -1,13 +1,10 @@ - import math from types import SimpleNamespace - import numpy as np -from nmmo.core.config import Config from nmmo.datastore.serialized import SerializedState from nmmo.systems import inventory -from nmmo.lib.log import EventCode +from nmmo.lib.event_code import EventCode # pylint: disable=no-member EntityState = SerializedState.subclass( @@ -56,13 +53,13 @@ EntityState.Limits = lambda config: { **{ "id": (-math.inf, math.inf), - "npc_type": (0, 4), + "npc_type": (-1, 3), # -1 for immortal "row": (0, config.MAP_SIZE-1), "col": (0, config.MAP_SIZE-1), "damage": (0, math.inf), "time_alive": (0, math.inf), - "freeze": (0, 3), - "item_level": (0, 5*config.NPC_LEVEL_MAX), + "freeze": (0, math.inf), + "item_level": (0, math.inf), "attacker_id": (-np.inf, math.inf), "latest_combat_tick": (0, math.inf), "health": (0, config.PLAYER_BASE_HEALTH), @@ -95,6 +92,10 @@ } if config.PROGRESSION_SYSTEM_ENABLED else {}), } +EntityState.State.comm_attr_map = {name: EntityState.State.attr_name_to_col[name] + for name in ["id", "row", "col", "message"]} +CommAttr = np.array(list(EntityState.State.comm_attr_map.values()), dtype=np.int64) + EntityState.Query = SimpleNamespace( # Whole table table=lambda ds: ds.table("Entity").where_neq( @@ -113,8 +114,13 @@ EntityState.State.attr_name_to_col["row"], EntityState.State.attr_name_to_col["col"], r, c, radius), + + # Communication obs + comm_obs=lambda ds: ds.table("Entity").where_gt( + EntityState.State.attr_name_to_col["id"], 0)[:, CommAttr] ) + class Resources: def __init__(self, ent, config): self.config = config @@ -129,8 +135,8 @@ def __init__(self, ent, config): self.water.update(config.RESOURCE_BASE) self.food.update(config.RESOURCE_BASE) - def update(self): - if not self.config.RESOURCE_SYSTEM_ENABLED: + def update(self, immortal=False): + if not self.config.RESOURCE_SYSTEM_ENABLED or immortal: return regen = self.config.RESOURCE_HEALTH_RESTORE_FRACTION @@ -162,16 +168,19 @@ def update(self): def packet(self): data = {} data['health'] = { 'val': self.health.val, 'max': self.config.PLAYER_BASE_HEALTH } - data['food'] = { 'val': self.food.val, 'max': self.config.RESOURCE_BASE } - data['water'] = { 'val': self.water.val, 'max': self.config.RESOURCE_BASE } + data['food'] = data['water'] = { 'val': 0, 'max': 0 } + if self.config.RESOURCE_SYSTEM_ENABLED: + data['food'] = { 'val': self.food.val, 'max': self.config.RESOURCE_BASE } + data['water'] = { 'val': self.water.val, 'max': self.config.RESOURCE_BASE } return data + class Status: def __init__(self, ent): self.freeze = ent.freeze def update(self): - if self.freeze.val > 0: + if self.frozen: self.freeze.decrement(1) def packet(self): @@ -179,6 +188,10 @@ def packet(self): data['freeze'] = self.freeze.val return data + @property + def frozen(self): + return self.freeze.val > 0 + # NOTE: History.packet() is actively used in visulazing attacks class History: @@ -214,7 +227,6 @@ def packet(self): data['timeAlive'] = self.time_alive.val data['damage_inflicted'] = self.damage_inflicted data['damage_received'] = self.damage_received - if self.attack is not None: data['attack'] = self.attack @@ -245,20 +257,18 @@ def __init__(self, realm, pos, entity_id, name): super().__init__(realm.datastore, EntityState.Limits(realm.config)) self.realm = realm - self.config: Config = realm.config + self.config = realm.config # TODO: do not access realm._np_random directly # related to the whole NPC, scripted logic # pylint: disable=protected-access self._np_random = realm._np_random - self.policy = name - self.entity_id = entity_id self.repr = None - self.name = name + str(entity_id) - self.row.update(pos[0]) - self.col.update(pos[1]) + self._pos = None + self.set_pos(*pos) + self.ent_id = entity_id self.id.update(entity_id) self.vision = self.config.PLAYER_VISION_RADIUS @@ -267,6 +277,8 @@ def __init__(self, realm, pos, entity_id, name): self.target = None self.closest = None self.spawn_pos = pos + self._immortal = False # used for testing/player recon + self._recon = False # Submodules self.status = Status(self) @@ -274,9 +286,9 @@ def __init__(self, realm, pos, entity_id, name): self.resources = Resources(self, self.config) self.inventory = inventory.Inventory(realm, self) - @property - def ent_id(self): - return self.id.val + # @property + # def ent_id(self): + # return self.id.val def packet(self): data = {} @@ -285,17 +297,17 @@ def packet(self): data['inventory'] = self.inventory.packet() data['alive'] = self.alive data['base'] = { - 'r': self.row.val, - 'c': self.col.val, + 'r': self.pos[0], + 'c': self.pos[1], 'name': self.name, 'level': self.attack_level, - 'item_level': self.item_level.val, - } - + 'item_level': self.item_level.val,} return data def update(self, realm, actions): '''Update occurs after actions, e.g. does not include history''' + self._pos = None + if self.history.damage == 0: self.attacker = None self.attacker_id.update(0) @@ -341,14 +353,22 @@ def apply_damage(self, dmg, style): @property def pos(self): - return self.row.val, self.col.val + if self._pos is None: + self._pos = (self.row.val, self.col.val) + return self._pos + + def set_pos(self, row, col): + self._pos = (row, col) + self.row.update(row) + self.col.update(col) @property def alive(self): - if self.resources.health.empty: - return False + return self.resources.health.val > 0 - return True + @property + def immortal(self): + return self._immortal @property def is_player(self) -> bool: @@ -358,12 +378,15 @@ def is_player(self) -> bool: def is_npc(self) -> bool: return False + @property + def is_recon(self): + return self._recon + @property def attack_level(self) -> int: melee = self.skills.melee.level.val ranged = self.skills.range.level.val mage = self.skills.mage.level.val - return int(max(melee, ranged, mage)) @property @@ -371,5 +394,4 @@ def in_combat(self) -> bool: # NOTE: the initial latest_combat_tick is 0, and valid values are greater than 0 if not self.config.COMBAT_SYSTEM_ENABLED or self.latest_combat_tick.val == 0: return False - return (self.realm.tick - self.latest_combat_tick.val) < self.config.COMBAT_STATUS_DURATION diff --git a/nmmo/entity/entity_manager.py b/nmmo/entity/entity_manager.py index 956363232..319c8c8a6 100644 --- a/nmmo/entity/entity_manager.py +++ b/nmmo/entity/entity_manager.py @@ -1,11 +1,9 @@ from collections.abc import Mapping from typing import Dict -from nmmo.entity.entity import Entity -from nmmo.entity.npc import NPC +from nmmo.entity.entity import Entity, EntityState from nmmo.entity.player import Player -from nmmo.lib import spawn -from nmmo.systems import combat +from nmmo.lib import spawn, event_code class EntityGroup(Mapping): @@ -14,9 +12,11 @@ def __init__(self, realm, np_random): self.realm = realm self.config = realm.config self._np_random = np_random + self._entity_table = EntityState.Query.table(self.datastore) self.entities: Dict[int, Entity] = {} self.dead_this_tick: Dict[int, Entity] = {} + self._delete_dead_entity = True # is default def __len__(self): return len(self.entities) @@ -41,8 +41,9 @@ def corporeal(self): def packet(self): return {k: v.packet() for k, v in self.corporeal.items()} - def reset(self, np_random): + def reset(self, np_random, delete_dead_entity=True): self._np_random = np_random # reset the RNG + self._delete_dead_entity = delete_dead_entity for ent in self.entities.values(): # destroy the items if self.config.ITEM_SYSTEM_ENABLED: @@ -50,113 +51,54 @@ def reset(self, np_random): item.destroy() ent.datastore_record.delete() - self.entities = {} - self.dead_this_tick = {} + self.entities.clear() + self.dead_this_tick.clear() - def spawn(self, entity): + def spawn_entity(self, entity): pos, ent_id = entity.pos, entity.id.val self.realm.map.tiles[pos].add_entity(entity) self.entities[ent_id] = entity - def cull(self): - self.dead_this_tick = {} - for ent_id in list(self.entities): - player = self.entities[ent_id] - if not player.alive: - r, c = player.pos - ent_id = player.ent_id - self.dead_this_tick[ent_id] = player - - self.realm.map.tiles[r, c].remove_entity(ent_id) - - # destroy the remaining items (of starved/dehydrated players) - # of the agents who don't go through receive_damage() - if self.config.ITEM_SYSTEM_ENABLED: - for item in list(player.inventory.items): - item.destroy() - - self.entities[ent_id].datastore_record.delete() - del self.entities[ent_id] + def cull_entity(self, entity): + pos, ent_id = entity.pos, entity.id.val + self.realm.map.tiles[pos].remove_entity(ent_id) + self.entities.pop(ent_id) + # destroy the remaining items (of starved/dehydrated players) + # of the agents who don't go through receive_damage() + if self.config.ITEM_SYSTEM_ENABLED: + for item in list(entity.inventory.items): + item.destroy() + if ent_id > 0: + self.realm.event_log.record(event_code.EventCode.AGENT_CULLED, entity) + def cull(self): + self.dead_this_tick.clear() + for ent in [ent for ent in self.entities.values() if not ent.alive]: + self.dead_this_tick[ent.ent_id] = ent + self.cull_entity(ent) + if self._delete_dead_entity: + ent.datastore_record.delete() return self.dead_this_tick def update(self, actions): + # # batch updates + # # time_alive, damage are from entity.py, History.update() + # ent_idx = self._entity_table[:, EntityState.State.attr_name_to_col["id"]] != 0 + # self._entity_table[ent_idx, EntityState.State.attr_name_to_col["time_alive"]] += 1 + # self._entity_table[ent_idx, EntityState.State.attr_name_to_col["damage"]] = 0 + # # freeze from entity.py, Status.update() + # freeze_idx = self._entity_table[:, EntityState.State.attr_name_to_col["freeze"]] > 0 + # self._entity_table[freeze_idx, EntityState.State.attr_name_to_col["freeze"]] -= 1 + for entity in self.entities.values(): entity.update(self.realm, actions) - -class NPCManager(EntityGroup): - def __init__(self, realm, np_random): - super().__init__(realm, np_random) - self.next_id = -1 - self.spawn_dangers = [] - - def reset(self, np_random): - super().reset(np_random) - self.next_id = -1 - self.spawn_dangers = [] - - def spawn(self): - config = self.config - - if not config.NPC_SYSTEM_ENABLED: - return - - for _ in range(config.NPC_SPAWN_ATTEMPTS): - if len(self.entities) >= config.NPC_N: - break - - if self.spawn_dangers: - danger = self.spawn_dangers[-1] - r, c = combat.spawn(config, danger, self._np_random) - else: - center = config.MAP_CENTER - border = self.config.MAP_BORDER - # pylint: disable=unbalanced-tuple-unpacking - r, c = self._np_random.integers(border, center+border, 2).tolist() - - npc = NPC.spawn(self.realm, (r, c), self.next_id, self._np_random) - if npc: - super().spawn(npc) - self.next_id -= 1 - - if self.spawn_dangers: - self.spawn_dangers.pop() - - def cull(self): - for entity in super().cull().values(): - self.spawn_dangers.append(entity.spawn_danger) - - # refill npcs to target config.NPC_N, within config.NPC_SPAWN_ATTEMPTS - self.spawn() - - def actions(self, realm): - actions = {} - for idx, entity in self.entities.items(): - actions[idx] = entity.decide(realm) - return actions - class PlayerManager(EntityGroup): - def __init__(self, realm, np_random): - super().__init__(realm, np_random) - self.loader_class = self.realm.config.PLAYER_LOADER - self._agent_loader: spawn.SequentialLoader = None - self.spawned = None - - def reset(self, np_random): - super().reset(np_random) - self._agent_loader = self.loader_class(self.config, self._np_random) - self.spawned = set() - - def spawn_individual(self, r, c, idx, resilient=False): - agent = next(self._agent_loader) - agent = agent(self.config, idx) - player = Player(self.realm, (r, c), agent, resilient) - super().spawn(player) - self.spawned.add(idx) - - def spawn(self): - # Check and assign the constant heal flag + def spawn(self, agent_loader: spawn.SequentialLoader = None): + if agent_loader is None: + agent_loader = self.config.PLAYER_LOADER(self.config, self._np_random) + + # Check and assign the reslient flag resilient_flag = [False] * self.config.PLAYER_N if self.config.RESOURCE_SYSTEM_ENABLED: num_resilient = round(self.config.RESOURCE_RESILIENT_POPULATION * self.config.PLAYER_N) @@ -165,15 +107,14 @@ def spawn(self): self._np_random.shuffle(resilient_flag) # Spawn the players - idx = 0 - while idx < self.config.PLAYER_N: - idx += 1 - r, c = self._agent_loader.get_spawn_position(idx) - - if idx in self.entities: - continue + for agent_id in self.config.POSSIBLE_AGENTS: + r, c = agent_loader.get_spawn_position(agent_id) - if idx in self.spawned: + if agent_id in self.entities: continue - self.spawn_individual(r, c, idx, resilient_flag[idx-1]) + # NOTE: put spawn_individual() here. Is a separate function necessary? + agent = next(agent_loader) # get agent cls from config.PLAYERS + agent = agent(self.config, agent_id) + player = Player(self.realm, (r, c), agent, resilient_flag[agent_id-1]) + super().spawn_entity(player) diff --git a/nmmo/entity/npc.py b/nmmo/entity/npc.py index 211eb4076..7afb30e56 100644 --- a/nmmo/entity/npc.py +++ b/nmmo/entity/npc.py @@ -1,11 +1,65 @@ +import numpy as np from nmmo.entity import entity from nmmo.core import action as Action from nmmo.systems import combat, droptable -from nmmo.systems.ai import policy from nmmo.systems import item as Item from nmmo.systems import skill from nmmo.systems.inventory import EquipmentSlot -from nmmo.lib.log import EventCode +from nmmo.lib.event_code import EventCode +from nmmo.lib import utils, astar + + +DIRECTIONS = [ # row delta, col delta, action + (-1, 0, Action.North), + (1, 0, Action.South), + (0, -1, Action.West), + (0, 1, Action.East)] * 2 +DELTA_TO_DIR = {(r, c): atn for r, c, atn in DIRECTIONS} +DELTA_TO_DIR[(0, 0)] = None + +def get_habitable_dir(ent): + r, c = ent.pos + is_habitable = ent.realm.map.habitable_tiles + start = ent._np_random.get_direction() # pylint: disable=protected-access + for i in range(4): + delta_r, delta_c, direction = DIRECTIONS[start + i] + if is_habitable[r + delta_r, c + delta_c]: + return direction + return Action.North + +def meander_toward(ent, goal, dist_crit=10, toward_weight=3): + r, c = ent.pos + delta_r, delta_c = goal[0] - r, goal[1] - c + abs_dr, abs_dc = abs(delta_r), abs(delta_c) + dist_l1 = abs_dr + abs_dc + # If close (less than dist_crit), use expensive aStar + if dist_l1 <= dist_crit: + delta = astar.aStar(ent.realm.map, ent.pos, goal) + return move_action(DELTA_TO_DIR[delta] if delta in DELTA_TO_DIR else None) + + # Otherwise, use a weighted random walk + cand_dirs = [] + weights = [] + for i in range(4): + r_offset, c_offset, direction = DIRECTIONS[i] + if ent.realm.map.habitable_tiles[r + r_offset, c + c_offset]: + cand_dirs.append(direction) + weights.append(1) + if r_offset * delta_r > 0: + weights[-1] += toward_weight * abs_dr/dist_l1 + if c_offset * delta_c > 0: + weights[-1] += toward_weight * abs_dc/dist_l1 + if len(cand_dirs) == 0: + return move_action(Action.North) + if len(cand_dirs) == 1: + return move_action(cand_dirs[0]) + weights = np.array(weights) + # pylint: disable=protected-access + return move_action(ent._np_random.choice(cand_dirs, p=weights/np.sum(weights))) + +def move_action(direction): + return {Action.Move: {Action.Direction: direction}} if direction else {} + class Equipment: def __init__(self, total, @@ -30,15 +84,13 @@ def total(self, getter): @property def packet(self): packet = {} - - packet['item_level'] = self.total - packet['melee_attack'] = self.melee_attack - packet['range_attack'] = self.range_attack - packet['mage_attack'] = self.mage_attack - packet['melee_defense'] = self.melee_defense - packet['range_defense'] = self.range_defense - packet['mage_defense'] = self.mage_defense - + packet["item_level"] = self.total + packet["melee_attack"] = self.melee_attack + packet["range_attack"] = self.range_attack + packet["mage_attack"] = self.mage_attack + packet["melee_defense"] = self.melee_defense + packet["range_defense"] = self.range_defense + packet["mage_defense"] = self.mage_defense return packet @@ -54,6 +106,10 @@ def __init__(self, realm, pos, iden, name, npc_type): self.equipment = None self.npc_type.update(npc_type) + @property + def is_npc(self) -> bool: + return True + def update(self, realm, actions): super().update(realm, actions) @@ -63,6 +119,46 @@ def update(self, realm, actions): self.resources.health.increment(1) self.last_action = actions + def can_see(self, target): + if target is None or target.immortal: + return False + distance = utils.linf_single(self.pos, target.pos) + return distance <= self.vision + + def _move_toward(self, goal): + delta = astar.aStar(self.realm.map, self.pos, goal) + return move_action(DELTA_TO_DIR[delta] if delta in DELTA_TO_DIR else None) + + def _meander(self): + return move_action(get_habitable_dir(self)) + + def can_attack(self, target): + if target is None or not self.config.NPC_SYSTEM_ENABLED or target.immortal: + return False + if not self.config.NPC_ALLOW_ATTACK_OTHER_NPCS and target.is_npc: + return False + distance = utils.linf_single(self.pos, target.pos) + return distance <= self.skills.style.attack_range(self.realm.config) + + def _has_target(self, search=False): + if self.target and (not self.target.alive or not self.can_see(self.target)): + self.target = None + # NOTE: when attacked by several agents, this will always target the last attacker + if self.attacker and self.target is None: + self.target = self.attacker + if self.target is None and search is True: + self.target = utils.identify_closest_target(self) + return self.target + + def _add_attack_action(self, actions, target): + actions.update({Action.Attack: {Action.Style: self.skills.style, Action.Target: target}}) + + def _charge_toward(self, target): + actions = self._move_toward(target.pos) + if self.can_attack(target): + self._add_attack_action(actions, target) + return actions + # Returns True if the entity is alive def receive_damage(self, source, dmg): if super().receive_damage(source, dmg): @@ -73,24 +169,23 @@ def receive_damage(self, source, dmg): # pylint: disable=no-member if self.gold.val > 0: source.gold.increment(self.gold.val) - self.realm.event_log.record(EventCode.EARN_GOLD, source, amount=self.gold.val) + self.realm.event_log.record(EventCode.LOOT_GOLD, source, amount=self.gold.val, target=self) self.gold.update(0) - for item in self.droptable.roll(self.realm, self.attack_level): - if source.is_player and source.inventory.space: - # inventory.receive() returns True if the item is received - # if source doesn't have space, inventory.receive() destroys the item - if source.inventory.receive(item): - self.realm.event_log.record(EventCode.LOOT_ITEM, source, item=item) - else: - item.destroy() + if self.droptable: + for item in self.droptable.roll(self.realm, self.attack_level): + if source.is_player and source.inventory.space: + # inventory.receive() returns True if the item is received + # if source does not have space, inventory.receive() destroys the item + if source.inventory.receive(item): + self.realm.event_log.record(EventCode.LOOT_ITEM, source, item=item, target=self) + else: + item.destroy() return False - # NOTE: passing np_random here is a hack - # Ideally, it should be passed to __init__ and also used in action generation @staticmethod - def spawn(realm, pos, iden, np_random): + def default_spawn(realm, pos, iden, np_random, danger=None): config = realm.config # check the position @@ -98,7 +193,7 @@ def spawn(realm, pos, iden, np_random): return None # Select AI Policy - danger = combat.danger(config, pos) + danger = danger or combat.danger(config, pos) if danger >= config.NPC_SPAWN_AGGRESSIVE: ent = Aggressive(realm, pos, iden) elif danger >= config.NPC_SPAWN_NEUTRAL: @@ -147,8 +242,11 @@ def spawn(realm, pos, iden, np_random): lvl = level - np_random.random() ilvl = int(5 * lvl) - offense = int(config.NPC_BASE_DAMAGE + lvl*config.NPC_LEVEL_DAMAGE) - defense = int(config.NPC_BASE_DEFENSE + lvl*config.NPC_LEVEL_DEFENSE) + level_damage = config.NPC_LEVEL_DAMAGE * config.NPC_LEVEL_MULTIPLIER + level_defense = config.NPC_LEVEL_DEFENSE * config.NPC_LEVEL_MULTIPLIER + + offense = int(config.NPC_BASE_DAMAGE + lvl * level_damage) + defense = int(config.NPC_BASE_DEFENSE + lvl * level_defense) ent.equipment = Equipment(ilvl, offense, offense, offense, defense, defense, defense) @@ -163,34 +261,84 @@ def spawn(realm, pos, iden, np_random): def packet(self): data = super().packet() - - data['skills'] = self.skills.packet() - data['resource'] = { 'health': { - 'val': self.resources.health.val, 'max': self.config.PLAYER_BASE_HEALTH } } - + data["skills"] = self.skills.packet() + data["resource"] = { "health": { + "val": self.resources.health.val, "max": self.config.PLAYER_BASE_HEALTH } } return data - @property - def is_npc(self) -> bool: - return True - class Passive(NPC): - def __init__(self, realm, pos, iden): - super().__init__(realm, pos, iden, 'Passive', 1) + def __init__(self, realm, pos, iden, name=None): + super().__init__(realm, pos, iden, name or "Passive", 1) - def decide(self, realm): - return policy.passive(realm, self) + def decide(self): + # Move only, no attack + return self._meander() class PassiveAggressive(NPC): - def __init__(self, realm, pos, iden): - super().__init__(realm, pos, iden, 'Neutral', 2) + def __init__(self, realm, pos, iden, name=None): + super().__init__(realm, pos, iden, name or "Neutral", 2) - def decide(self, realm): - return policy.neutral(realm, self) + def decide(self): + if self._has_target() is None: + return self._meander() + return self._charge_toward(self.target) class Aggressive(NPC): - def __init__(self, realm, pos, iden): - super().__init__(realm, pos, iden, 'Hostile', 3) - - def decide(self, realm): - return policy.hostile(realm, self) + def __init__(self, realm, pos, iden, name=None): + super().__init__(realm, pos, iden, name or "Hostile", 3) + + def decide(self): + if self._has_target(search=True) is None: + return self._meander() + return self._charge_toward(self.target) + +class Soldier(NPC): + def __init__(self, realm, pos, iden, name, order): + super().__init__(realm, pos, iden, name or "Soldier", 3) # Hostile with order + self.target_entity = None + self.rally_point = None + self._process_order(order) + + def _process_order(self, order): + if order is None: + return + if "destroy" in order: # destroy the specified entity id + self.target_entity = self.realm.entity(order["destroy"]) + if "rally" in order: + # rally until spotting an enemy + self.rally_point = order["rally"] # (row, col) + + def _is_order_done(self, radius=5): + if self.target_entity and not self.target_entity.alive: + self.target_entity = None + if self.rally_point and utils.linf_single(self.pos, self.rally_point) <= radius: + self.rally_point = None + + def decide(self): + self._is_order_done() + # NOTE: destroying the target entity is the highest priority + if self.target_entity is None and self._has_target(search=True): + if self.can_attack(self.target): + return self._charge_toward(self.target) + + actions = self._decide_move_action() + self._decide_attack_action(actions) + return actions + + def _decide_move_action(self): + # in the order of priority + if self.target_entity: + return self._move_toward(self.target_entity.pos) + if self.target: + # If it"s close enough, it will use A*. Otherwise, random. + return meander_toward(self, self.target.pos) + if self.rally_point: + return meander_toward(self, self.rally_point) + return self._meander() + + def _decide_attack_action(self, actions): + # The default is to attack the target entity, if within range + if self.target_entity and self.can_attack(self.target_entity): + self._add_attack_action(actions, self.target_entity) + elif self.can_attack(self.target): + self._add_attack_action(actions, self.target) diff --git a/nmmo/entity/npc_manager.py b/nmmo/entity/npc_manager.py new file mode 100644 index 000000000..1b6dd6de2 --- /dev/null +++ b/nmmo/entity/npc_manager.py @@ -0,0 +1,90 @@ +from typing import Callable +from nmmo.entity.entity_manager import EntityGroup +from nmmo.entity.npc import NPC, Soldier, Aggressive, PassiveAggressive, Passive +from nmmo.core import action +from nmmo.systems import combat +from nmmo.lib import spawn + + +class NPCManager(EntityGroup): + def __init__(self, realm, np_random): + super().__init__(realm, np_random) + self.next_id = -1 + self.spawn_dangers = [] + + def reset(self, np_random): + super().reset(np_random) + self.next_id = -1 + self.spawn_dangers.clear() + + def actions(self): + return {idx: entity.decide() for idx, entity in self.entities.items()} + + def default_spawn(self): + config = self.config + if not config.NPC_SYSTEM_ENABLED: + return + + for _ in range(config.NPC_SPAWN_ATTEMPTS): + if len(self.entities) >= config.NPC_N: + break + + if len(self.spawn_dangers) > 0: + danger = self.spawn_dangers.pop(0) # FIFO + r, c = combat.spawn(config, danger, self._np_random) + else: + center = config.MAP_CENTER + border = self.config.MAP_BORDER + # pylint: disable=unbalanced-tuple-unpacking + r, c = self._np_random.integers(border, center+border, 2).tolist() + + npc = NPC.default_spawn(self.realm, (r, c), self.next_id, self._np_random) + if npc: + super().spawn_entity(npc) + self.next_id -= 1 + + def spawn_npc(self, r, c, danger=None, name=None, order=None, + apply_beta_to_danger=True): + if not self.realm.map.tiles[r, c].habitable: + return None + + if danger and apply_beta_to_danger: + danger = min(1.0, max(0.0, danger)) # normalize + danger = self._np_random.beta(10*danger+0.01, 10.01-10*danger) # beta cannot take 0 + if danger is None: + npc = Soldier(self.realm, (r, c), self.next_id, name, order) + elif danger >= self.config.NPC_SPAWN_AGGRESSIVE: + npc = Aggressive(self.realm, (r, c), self.next_id, name) + elif danger >= self.config.NPC_SPAWN_NEUTRAL: + npc = PassiveAggressive(self.realm, (r, c), self.next_id, name) + elif danger >= self.config.NPC_SPAWN_PASSIVE: + npc = Passive(self.realm, (r, c), self.next_id, name) + else: + return None + + if npc: + super().spawn_entity(npc) + self.next_id -= 1 + # NOTE: randomly set the combat style. revisit later + npc.skills.style = self._np_random.choice([action.Melee, action.Range, action.Mage]) + return npc + + def area_spawn(self, r_min, r_max, c_min, c_max, num_spawn, + npc_init_fn: Callable): + assert r_min < r_max and c_min < c_max, "Invalid area" + assert num_spawn > 0, "Invalid number of spawns" + while num_spawn > 0: + r = self._np_random.integers(r_min, r_max+1) + c = self._np_random.integers(c_min, c_max+1) + if npc_init_fn(r, c): + num_spawn -= 1 + + def edge_spawn(self, num_spawn, npc_init_fn: Callable): + assert num_spawn > 0, "Invalid number of spawns" + edge_locs = spawn.get_edge_tiles(self.config, self._np_random, shuffle=True) + assert len(edge_locs) >= num_spawn, "Not enough edge locations" + while num_spawn > 0: + r, c = edge_locs.pop() + npc = npc_init_fn(r, c) + if npc: + num_spawn -= 1 diff --git a/nmmo/entity/player.py b/nmmo/entity/player.py index 73c9b4bc7..5bec24e98 100644 --- a/nmmo/entity/player.py +++ b/nmmo/entity/player.py @@ -1,6 +1,7 @@ from nmmo.systems.skill import Skills from nmmo.entity import entity -from nmmo.lib.log import EventCode +from nmmo.lib.event_code import EventCode +from nmmo.lib import spawn # pylint: disable=no-member class Player(entity.Entity): @@ -8,8 +9,10 @@ def __init__(self, realm, pos, agent, resilient=False): super().__init__(realm, pos, agent.iden, agent.policy) self.agent = agent - self.immortal = realm.config.IMMORTAL + self._immortal = realm.config.IMMORTAL self.resources.resilient = resilient + self.my_task = None + self._make_mortal_tick = None # set to realm.tick when the player is made mortal # Scripted hooks self.target = None @@ -49,6 +52,27 @@ def level(self) -> int: # which are harvesting food/water and don't progress return max(e.level.val for e in self.skills.skills) + def _set_immortal(self, value=True, duration=None): + self._immortal = value + # NOTE: a hack to mark the player as immortal in action targets + self.npc_type.update(-1 if value else 0) + + if value and duration is not None: + self._make_mortal_tick = self.realm.tick + duration + if value is False: + self._make_mortal_tick = None + + def make_recon(self, new_pos=None): + # NOTE: scout cannot act and cannot die + self.status.freeze.update(self.config.MAX_HORIZON) + self._set_immortal() + self._recon = True + if new_pos is not None: + if self.ent_id in self.realm.map.tiles[self.pos].entities: + self.realm.map.tiles[self.pos].remove_entity(self.ent_id) + self.realm.map.tiles[new_pos].add_entity(self) + self.set_pos(*new_pos) + def apply_damage(self, dmg, style): super().apply_damage(dmg, style) self.skills.apply_damage(style) @@ -69,7 +93,7 @@ def receive_damage(self, source, dmg): if self.config.EXCHANGE_SYSTEM_ENABLED and source is not None: if self.gold.val > 0: source.gold.increment(self.gold.val) - self.realm.event_log.record(EventCode.EARN_GOLD, source, amount=self.gold.val) + self.realm.event_log.record(EventCode.LOOT_GOLD, source, amount=self.gold.val, target=self) self.gold.update(0) # TODO: make source receive the highest-level items first @@ -84,7 +108,7 @@ def receive_damage(self, source, dmg): # inventory.receive() returns True if the item is received # if source doesn't have space, inventory.receive() destroys the item if source.inventory.receive(item): - self.realm.event_log.record(EventCode.LOOT_ITEM, source, item=item) + self.realm.event_log.record(EventCode.LOOT_ITEM, source, item=item, target=self) else: item.destroy() @@ -102,38 +126,64 @@ def packet(self): data['resource'] = self.resources.packet() data['skills'] = self.skills.packet() data['inventory'] = self.inventory.packet() - + # added for the 2.0 web client + data["metrics"] = { + "PlayerDefeats": self.history.player_kills, + "TimeAlive": self.time_alive.val, + "Gold": self.gold.val, + "DamageTaken": self.history.damage_received,} return data def update(self, realm, actions): '''Post-action update. Do not include history''' super().update(realm, actions) - # Spawsn battle royale style death fog + # Spawn battle royale style death fog # Starts at 0 damage on the specified config tick # Moves in from the edges by 1 damage per tile per tick # So after 10 ticks, you take 10 damage at the edge and 1 damage # 10 tiles in, 0 damage in farther # This means all agents will be force killed around # MAP_CENTER / 2 + 100 ticks after spawning - fog = self.config.PLAYER_DEATH_FOG + fog = self.config.DEATH_FOG_ONSET if fog is not None and self.realm.tick >= fog: - row, col = self.pos - cent = self.config.MAP_BORDER + self.config.MAP_CENTER // 2 - - # Distance from center of the map - dist = max(abs(row - cent), abs(col - cent)) - - # Safe final area - if dist > self.config.PLAYER_DEATH_FOG_FINAL_SIZE: - # Damage based on time and distance from center - time_dmg = self.config.PLAYER_DEATH_FOG_SPEED * (self.realm.tick - fog + 1) - dist_dmg = dist - self.config.MAP_CENTER // 2 - dmg = max(0, dist_dmg + time_dmg) - self.receive_damage(None, dmg) + dmg = self.realm.fog_map[self.pos] + if dmg > 0.5: # fog_map has float values + self.receive_damage(None, round(dmg)) if not self.alive: return - self.resources.update() + if self.config.PLAYER_HEALTH_INCREMENT > 0: + self.resources.health.increment(self.config.PLAYER_HEALTH_INCREMENT) + self.resources.update(self.immortal) self.skills.update() + + if self._make_mortal_tick is not None and self.realm.tick >= self._make_mortal_tick: + self._set_immortal(False) + + def resurrect(self, health_prop=0.5, freeze_duration=10, edge_spawn=True): + # Respawn dead players at the edge + assert not self.alive, "Player is not dead" + self.status.freeze.update(freeze_duration) + self.resources.health.update(self.config.PLAYER_BASE_HEALTH*health_prop) + if self.config.RESOURCE_SYSTEM_ENABLED: + self.resources.water.update(self.config.RESOURCE_BASE) + self.resources.food.update(self.config.RESOURCE_BASE) + + if edge_spawn: + new_spawn_pos = spawn.get_random_coord(self.config, self._np_random, edge=True) + else: + while True: + new_spawn_pos = spawn.get_random_coord(self.config, self._np_random, edge=False) + if self.realm.map.tiles[new_spawn_pos].habitable: + break + + self.set_pos(*new_spawn_pos) + self.message.update(0) + self.realm.players.spawn_entity(self) # put back to the system + self._set_immortal(duration=freeze_duration) + if self.my_task and len(self.my_task.assignee) == 1: + # NOTE: Only one task per agent is supported for now + # Agent's task progress need to be reset ONLY IF the task is an agent task + self.my_task.reset() diff --git a/nmmo/lib/astar.py b/nmmo/lib/astar.py new file mode 100644 index 000000000..f8bd113a2 --- /dev/null +++ b/nmmo/lib/astar.py @@ -0,0 +1,75 @@ +#pylint: disable=invalid-name +import heapq +from nmmo.lib.utils import in_bounds + +CUTOFF = 100 + +def l1(start, goal): + sr, sc = start + gr, gc = goal + return abs(gr - sr) + abs(gc - sc) + +def adjacentPos(pos): + r, c = pos + return [(r - 1, c), (r, c - 1), (r + 1, c), (r, c + 1)] + +def aStar(realm_map, start, goal, cutoff = CUTOFF): + tiles = realm_map.tiles + if start == goal: + return (0, 0) + if (start, goal) in realm_map.pathfinding_cache: + return realm_map.pathfinding_cache[(start, goal)] + initial_goal = goal + pq = [(0, start)] + + backtrace = {} + cost = {start: 0} + + closestPos = start + closestHeuristic = l1(start, goal) + closestCost = closestHeuristic + + while pq: + # Use approximate solution if budget exhausted + cutoff -= 1 + if cutoff <= 0: + if goal not in backtrace: + goal = closestPos + break + + priority, cur = heapq.heappop(pq) + + if cur == goal: + break + + for nxt in adjacentPos(cur): + if not in_bounds(*nxt, tiles.shape) or realm_map.habitable_tiles[nxt] == 0: + continue + + newCost = cost[cur] + 1 + if nxt not in cost or newCost < cost[nxt]: + cost[nxt] = newCost + heuristic = l1(goal, nxt) + priority = newCost + heuristic + + # Compute approximate solution + if heuristic < closestHeuristic or ( + heuristic == closestHeuristic and priority < closestCost): + closestPos = nxt + closestHeuristic = heuristic + closestCost = priority + + heapq.heappush(pq, (priority, nxt)) + backtrace[nxt] = cur + + while goal in backtrace and backtrace[goal] != start: + gr, gc = goal + goal = backtrace[goal] + sr, sc = goal + realm_map.pathfinding_cache[(goal, initial_goal)] = (gr - sr, gc - sc) + + sr, sc = start + gr, gc = goal + realm_map.pathfinding_cache[(start, initial_goal)] = (gr - sr, gc - sc) + return (gr - sr, gc - sc) +# End A* diff --git a/nmmo/lib/cython_helper.pyx b/nmmo/lib/cython_helper.pyx new file mode 100644 index 000000000..06cb77ccd --- /dev/null +++ b/nmmo/lib/cython_helper.pyx @@ -0,0 +1,64 @@ +#cython: boundscheck=True +#cython: wraparound=True +#cython: nonecheck=True + +from types import SimpleNamespace +import numpy as np +cimport numpy as cnp + +# for array indexing +cnp.import_array() + +def make_move_mask(cnp.ndarray[cnp.int8_t] mask, + cnp.ndarray[cnp.int8_t, ndim=2] habitable_tiles, + short row, short col, + cnp.ndarray[cnp.int64_t] row_delta, + cnp.ndarray[cnp.int64_t] col_delta): + for i in range(4): + mask[i] = habitable_tiles[row_delta[i] + row, col_delta[i] + col] + +# NOTE: assume that incoming mask are all zeros +def make_attack_mask(cnp.ndarray[cnp.int8_t] mask, + cnp.ndarray[cnp.int16_t, ndim=2] entities, + dict entity_attr, + dict my_info): + cdef short idx + cdef short num_valid_target = 0 + cdef short attr_id = entity_attr["id"] + cdef short attr_time_alive = entity_attr["time_alive"] + cdef short attr_npc_type = entity_attr["npc_type"] + cdef short attr_row = entity_attr["row"] + cdef short attr_col = entity_attr["col"] + + for idx in range(len(entities)): + # skip empty row + if entities[idx, attr_id] == 0: + continue + # out of range + if abs(entities[idx, attr_row] - my_info["row"]) > my_info["attack_range"] or \ + abs(entities[idx, attr_col] - my_info["col"]) > my_info["attack_range"]: + continue + # cannot attack during immunity + if entities[idx, attr_id] > 0 and \ + entities[idx, attr_time_alive] < my_info["immunity"]: + continue + # cannot attack self + if entities[idx, attr_id] == my_info["agent_id"]: + continue + # npc_type must be 0, 1, 2, 3 + if entities[idx, attr_npc_type] < 0: # immortal (-1) + continue + mask[idx] = 1 + num_valid_target += 1 + + # cython: wraparound need to be True + # if any valid target, set the no-op to 0 + mask[-1] = 0 if num_valid_target > 0 else 1 + +def parse_array(short[:] data, dict attr_name_to_col): + cdef short col + cdef str attr + cdef dict result = {} + for attr, col in attr_name_to_col.items(): + result[attr] = data[col] + return SimpleNamespace(**result) diff --git a/nmmo/lib/event_code.py b/nmmo/lib/event_code.py new file mode 100644 index 000000000..5c11a6a36 --- /dev/null +++ b/nmmo/lib/event_code.py @@ -0,0 +1,32 @@ +class EventCode: + # Move + EAT_FOOD = 1 + DRINK_WATER = 2 + GO_FARTHEST = 3 # record when breaking the previous record + SEIZE_TILE = 4 + + # Attack + SCORE_HIT = 11 + PLAYER_KILL = 12 + FIRE_AMMO = 13 + + # Item + CONSUME_ITEM = 21 + GIVE_ITEM = 22 + DESTROY_ITEM = 23 + HARVEST_ITEM = 24 + EQUIP_ITEM = 25 + LOOT_ITEM = 26 + + # Exchange + GIVE_GOLD = 31 + LIST_ITEM = 32 + EARN_GOLD = 33 + BUY_ITEM = 34 + LOOT_GOLD = 35 + + # Level up + LEVEL_UP = 41 + + # System-related + AGENT_CULLED = 91 # player is removed from the realm (culled) diff --git a/nmmo/lib/event_log.py b/nmmo/lib/event_log.py index e11b1bee1..0614ff01a 100644 --- a/nmmo/lib/event_log.py +++ b/nmmo/lib/event_log.py @@ -1,13 +1,14 @@ from types import SimpleNamespace from typing import List from copy import deepcopy +from collections import defaultdict import numpy as np from nmmo.datastore.serialized import SerializedState from nmmo.entity import Entity from nmmo.systems.item import Item -from nmmo.lib.log import EventCode +from nmmo.lib.event_code import EventCode # pylint: disable=no-member EventState = SerializedState.subclass("Event", [ @@ -37,16 +38,16 @@ # defining col synoyms for different event types ATTACK_COL_MAP = { 'combat_style': EventAttr['type'], - 'damage': EventAttr['number'] } - + 'damage': EventAttr['number']} ITEM_COL_MAP = { 'item_type': EventAttr['type'], 'quantity': EventAttr['number'], - 'price': EventAttr['gold'] } - -LEVEL_COL_MAP = { 'skill': EventAttr['type'] } - -EXPLORE_COL_MAP = { 'distance': EventAttr['number'] } + 'price': EventAttr['gold'], + 'item_id': EventAttr['target_ent']} +LEVEL_COL_MAP = {'skill': EventAttr['type']} +EXPLORE_COL_MAP = {'distance': EventAttr['number']} +TILE_COL_MAP = {'tile_row': EventAttr['number'], + 'tile_col': EventAttr['gold']} class EventLogger(EventCode): @@ -67,6 +68,7 @@ def __init__(self, realm): self.attr_to_col.update(ITEM_COL_MAP) self.attr_to_col.update(LEVEL_COL_MAP) self.attr_to_col.update(EXPLORE_COL_MAP) + self.attr_to_col.update(TILE_COL_MAP) def reset(self): EventState.State.table(self.datastore).reset() @@ -85,7 +87,7 @@ def _create_event(self, entity: Entity, event_code: int): def record(self, event_code: int, entity: Entity, **kwargs): if event_code in [EventCode.EAT_FOOD, EventCode.DRINK_WATER, EventCode.GIVE_ITEM, EventCode.DESTROY_ITEM, - EventCode.GIVE_GOLD]: + EventCode.GIVE_GOLD, EventCode.AGENT_CULLED]: # Logs for these events are for counting only self._create_event(entity, event_code) return @@ -99,10 +101,12 @@ def record(self, event_code: int, entity: Entity, **kwargs): if event_code == EventCode.SCORE_HIT: # kwargs['combat_style'] should be Skill.CombatSkill if ('combat_style' in kwargs and kwargs['combat_style'].SKILL_ID in [1, 2, 3]) & \ + ('target' in kwargs and isinstance(kwargs['target'], Entity)) & \ ('damage' in kwargs and kwargs['damage'] >= 0): log = self._create_event(entity, event_code) log.type.update(kwargs['combat_style'].SKILL_ID) log.number.update(kwargs['damage']) + log.target_ent.update(kwargs['target'].ent_id) return if event_code == EventCode.PLAYER_KILL: @@ -110,23 +114,37 @@ def record(self, event_code: int, entity: Entity, **kwargs): target = kwargs['target'] log = self._create_event(entity, event_code) log.target_ent.update(target.ent_id) - - # CHECK ME: attack_level or "general" level?? need to clarify log.level.update(target.attack_level) return + if event_code == EventCode.LOOT_ITEM: + if ('item' in kwargs and isinstance(kwargs['item'], Item)) & \ + ('target' in kwargs and isinstance(kwargs['target'], Entity)): + item = kwargs['item'] + log = self._create_event(entity, event_code) + log.type.update(item.ITEM_TYPE_ID) + log.level.update(item.level.val) + log.number.update(item.quantity.val) + log.target_ent.update(item.id.val) + return + + if event_code == EventCode.LOOT_GOLD: + if ('amount' in kwargs and kwargs['amount'] > 0) & \ + ('target' in kwargs and isinstance(kwargs['target'], Entity)): + log = self._create_event(entity, event_code) + log.gold.update(kwargs['amount']) + log.target_ent.update(kwargs['target'].ent_id) + return + if event_code in [EventCode.CONSUME_ITEM, EventCode.HARVEST_ITEM, EventCode.EQUIP_ITEM, - EventCode.LOOT_ITEM]: - # CHECK ME: item types should be checked. For example, - # Only Ration and Potion can be consumed - # Only Ration, Potion, Whetstone, Arrow, Runes can be produced - # The quantity should be 1 for all of these events + EventCode.FIRE_AMMO]: if ('item' in kwargs and isinstance(kwargs['item'], Item)): item = kwargs['item'] log = self._create_event(entity, event_code) log.type.update(item.ITEM_TYPE_ID) log.level.update(item.level.val) log.number.update(item.quantity.val) + log.target_ent.update(item.id.val) return if event_code in [EventCode.LIST_ITEM, EventCode.BUY_ITEM]: @@ -138,9 +156,9 @@ def record(self, event_code: int, entity: Entity, **kwargs): log.level.update(item.level.val) log.number.update(item.quantity.val) log.gold.update(kwargs['price']) + log.target_ent.update(item.id.val) return - # NOTE: do we want to separate the source of income? from selling vs looting if event_code == EventCode.EARN_GOLD: if ('amount' in kwargs and kwargs['amount'] > 0): log = self._create_event(entity, event_code) @@ -156,18 +174,27 @@ def record(self, event_code: int, entity: Entity, **kwargs): log.level.update(kwargs['level']) return + if event_code == EventCode.SEIZE_TILE: + if ('tile' in kwargs and isinstance(kwargs['tile'], tuple)): + log = self._create_event(entity, event_code) + log.number.update(kwargs['tile'][0]) # row + log.gold.update(kwargs['tile'][1]) # col + return + # If reached here, then something is wrong # CHECK ME: The below should be commented out after debugging raise ValueError(f"Event code: {event_code}", kwargs) def update(self): - curr_tick = self.realm.tick + 1 # update happens before the tick update + curr_tick = self.realm.tick if curr_tick > self._last_tick: self._data_by_tick[curr_tick] = EventState.Query.by_tick(self.datastore, curr_tick) self._last_tick = curr_tick def get_data(self, event_code=None, agents: List[int]=None, tick: int=None) -> np.ndarray: if tick is not None: + if tick == -1: + tick = self._last_tick if tick not in self._data_by_tick: return self._empty_data event_data = self._data_by_tick[tick] @@ -184,3 +211,54 @@ def get_data(self, event_code=None, agents: List[int]=None, tick: int=None) -> n return event_data[flt_idx] return self._empty_data + + def get_stat(self): + event_stat = defaultdict(lambda: defaultdict(int)) + event_data = EventState.Query.table(self.datastore) + for row in event_data: + agent_id = row[EventAttr['ent_id']] + if agent_id > 0: + key = extract_event_key(row) + if key is None: + continue + + if key[0] == EventCode.GO_FARTHEST: + event_stat[agent_id][key] = max(event_stat[agent_id][key], + row[EventAttr['number']]) # distance + elif key[0] in [EventCode.LEVEL_UP, EventCode.EQUIP_ITEM]: + event_stat[agent_id][key] = max(event_stat[agent_id][key], + row[EventAttr['level']]) + elif key[0] == EventCode.AGENT_CULLED: + event_stat[agent_id][key] = row[EventAttr['tick']] # lifespan + else: + event_stat[agent_id][key] += 1 + + return event_stat + +def extract_event_key(event_row): + event_code = event_row[EventAttr['event']] + + if event_code in [ + EventCode.EAT_FOOD, + EventCode.DRINK_WATER, + EventCode.GO_FARTHEST, + EventCode.AGENT_CULLED, + ]: + return (event_code,) + + if event_code in [ + EventCode.SCORE_HIT, + EventCode.FIRE_AMMO, + EventCode.LEVEL_UP, + EventCode.HARVEST_ITEM, + EventCode.CONSUME_ITEM, + EventCode.EQUIP_ITEM, + EventCode.LIST_ITEM, + EventCode.BUY_ITEM, + ]: + return (event_code, event_row[EventAttr['type']]) + + if event_code == EventCode.PLAYER_KILL: + return (event_code, int(event_row[EventAttr['target_ent']] > 0)) # if target is agent or npc + + return None diff --git a/nmmo/lib/log.py b/nmmo/lib/log.py deleted file mode 100644 index 8ced7f7a4..000000000 --- a/nmmo/lib/log.py +++ /dev/null @@ -1,64 +0,0 @@ -from collections import defaultdict - -import logging - - -class Logger: - def __init__(self): - self.stats = defaultdict(list) - - def log(self, key, val): - if not isinstance(val, (int, float)): - raise RuntimeError(f'{val} must be int or float') - - self.stats[key].append(val) - return True - -class MilestoneLogger(Logger): - def __init__(self, log_file): - super().__init__() - logging.basicConfig(format='%(levelname)s:%(message)s', - level=logging.INFO, filename=log_file, filemode='w') - - def log_min(self, key, val): - if key in self.stats and val >= self.stats[key][-1]: - return False - - self.log(key, val) - return True - - def log_max(self, key, val): - if key in self.stats and val <= self.stats[key][-1]: - return False - - self.log(key, val) - return True - - -class EventCode: - # Move - EAT_FOOD = 1 - DRINK_WATER = 2 - GO_FARTHEST = 3 # record when breaking the previous record - - # Attack - SCORE_HIT = 11 - PLAYER_KILL = 12 - - # Item - CONSUME_ITEM = 21 - GIVE_ITEM = 22 - DESTROY_ITEM = 23 - HARVEST_ITEM = 24 - EQUIP_ITEM = 25 - LOOT_ITEM = 26 - - # Exchange - GIVE_GOLD = 31 - LIST_ITEM = 32 - EARN_GOLD = 33 - BUY_ITEM = 34 - #SPEND_GOLD = 35 # BUY_ITEM, price has the same info - - # Level up - LEVEL_UP = 41 diff --git a/nmmo/lib/seeding.py b/nmmo/lib/seeding.py index 1e75c066c..46593483e 100644 --- a/nmmo/lib/seeding.py +++ b/nmmo/lib/seeding.py @@ -1,12 +1,8 @@ # copied from https://github.com/openai/gym/blob/master/gym/utils/seeding.py - """Set of random number generator functions: seeding, generator, hashing seeds.""" from typing import Any, Optional, Tuple - import numpy as np -from gym import error - class RandomNumberGenerator(np.random.Generator): def __init__(self, bit_generator): @@ -34,7 +30,7 @@ def np_random(seed: Optional[int] = None) -> Tuple[np.random.Generator, Any]: Error: Seed must be a non-negative integer or omitted """ if seed is not None and not (isinstance(seed, int) and 0 <= seed): - raise error.Error(f"Seed must be a non-negative integer or omitted, not {seed}") + raise ValueError(f"Seed must be a non-negative integer or omitted, not {seed}") seed_seq = np.random.SeedSequence(seed) np_seed = seed_seq.entropy diff --git a/nmmo/lib/spawn.py b/nmmo/lib/spawn.py index aed9949c7..81e04e5c9 100644 --- a/nmmo/lib/spawn.py +++ b/nmmo/lib/spawn.py @@ -1,13 +1,19 @@ +from itertools import chain + class SequentialLoader: '''config.PLAYER_LOADER that spreads out agent populations''' - def __init__(self, config, np_random): + def __init__(self, config, np_random, + candidate_spawn_pos=None): items = config.PLAYERS self.items = items self.idx = -1 - # np_random is the env-level rng - self.candidate_spawn_pos = spawn_concurrent(config, np_random) + if candidate_spawn_pos: + self.candidate_spawn_pos = candidate_spawn_pos + else: + # np_random is the env-level rng + self.candidate_spawn_pos = get_edge_tiles(config, np_random, shuffle=True) def __iter__(self): return self @@ -21,7 +27,7 @@ def get_spawn_position(self, agent_id): # the basic SequentialLoader just provides a random spawn position return self.candidate_spawn_pos.pop() -def spawn_continuous(config, np_random): +def get_random_coord(config, np_random, edge=True): '''Generates spawn positions for new agents Randomly selects spawn positions around @@ -33,20 +39,24 @@ def spawn_continuous(config, np_random): position: The position (row, col) to spawn the given agent ''' - #Spawn at edges mmax = config.MAP_CENTER + config.MAP_BORDER mmin = config.MAP_BORDER # np_random is the env-level RNG, a drop-in replacement of numpy.random - var = np_random.integers(mmin, mmax) - fixed = np_random.choice([mmin, mmax]) - r, c = int(var), int(fixed) - if np_random.random() > 0.5: - r, c = c, r + if edge: + var = np_random.integers(mmin, mmax) + fixed = np_random.choice([mmin, mmax]) + r, c = int(var), int(fixed) + if np_random.random() > 0.5: + r, c = c, r + else: + r, c = np_random.integers(mmin, mmax, 2).tolist() return (r, c) -def get_edge_tiles(config): - '''Returns a list of all edge tiles''' +def get_edge_tiles(config, np_random=None, shuffle=False): + '''Returns a list of all edge tiles. + To shuffle the tile, provide a np_random object + ''' # Accounts for void borders in coord calcs left = config.MAP_BORDER right = config.MAP_CENTER + config.MAP_BORDER @@ -60,56 +70,7 @@ def get_edge_tiles(config): sides.append(list(zip(inc, highs))) sides.append(list(zip(highs, inc[::-1]))) sides.append(list(zip(inc[::-1], lows))) - - return sides - -def spawn_concurrent(config, np_random): - '''Generates spawn positions for new agents - - Evenly spaces agents around the borders - of the square game map, assuming the edge tiles are all habitable - - Returns: - list of tuple(int, int): - - position: - The position (row, col) to spawn the given agent - ''' - team_size = config.PLAYER_TEAM_SIZE - team_n = len(config.PLAYERS) - teammate_sep = config.PLAYER_SPAWN_TEAMMATE_DISTANCE - - # Number of total border tiles - total_tiles = 4 * config.MAP_CENTER - - # Number of tiles, including within-team sep, occupied by each team - tiles_per_team = teammate_sep*(team_size-1) + team_size - - # Number of total tiles dedicated to separating teams - buffer_tiles = 0 - if team_n > 1: - buffer_tiles = total_tiles - tiles_per_team*team_n - - # Number of tiles between teams - team_sep = buffer_tiles // team_n - - sides = [] - for side in get_edge_tiles(config): - sides += side - - if team_n > 1: - # Space across and within teams - spawn_positions = [] - for idx in range(team_sep//2, len(sides), tiles_per_team+team_sep): - for offset in list(range(0, tiles_per_team, teammate_sep+1)): - if len(spawn_positions) >= config.PLAYER_N: - continue - pos = sides[idx + offset] - spawn_positions.append(pos) - else: - # team_n = 1: to fit 128 agents in a small map, ignore spacing and spawn randomly - # np_random is the env-level RNG, a drop-in replacement of numpy.random - np_random.shuffle(sides) - spawn_positions = sides[:config.PLAYER_N] - - return spawn_positions + tiles = list(chain(*sides)) + if shuffle and np_random: + np_random.shuffle(tiles) + return tiles diff --git a/nmmo/lib/team_helper.py b/nmmo/lib/team_helper.py index 9256cd036..19a437ce1 100644 --- a/nmmo/lib/team_helper.py +++ b/nmmo/lib/team_helper.py @@ -1,9 +1,21 @@ -from typing import Dict, List +from typing import Any, Dict, List +import numpy.random +from nmmo.lib import spawn -class TeamHelper(): - def __init__(self, teams: Dict[int, List[int]]): + +def make_teams(config, num_teams): + num_per_team = config.PLAYER_N // num_teams + teams = {} + for team_id in range(num_teams): + range_max = (team_id+1)*num_per_team+1 if team_id < num_teams-1 else config.PLAYER_N+1 + teams[team_id] = list(range(team_id*num_per_team+1, range_max)) + return teams + +class TeamHelper: + def __init__(self, teams: Dict[Any, List[int]], np_random=None): self.teams = teams self.num_teams = len(teams) + self.team_list = list(teams.keys()) self.team_size = {} self.team_and_position_for_agent = {} self.agent_for_team_and_position = {} @@ -14,36 +26,116 @@ def __init__(self, teams: Dict[int, List[int]]): self.team_and_position_for_agent[agent_id] = (team_id, position) self.agent_for_team_and_position[team_id, position] = agent_id + # Left/right team order is determined by team_list, so shuffling it + # TODO: check if this is correct + np_random = np_random or numpy.random + # np_random.shuffle(self.team_list) + def agent_position(self, agent_id: int) -> int: return self.team_and_position_for_agent[agent_id][1] - def agent_id(self, team_id: int, position: int) -> int: + def agent_id(self, team_id: Any, position: int) -> int: return self.agent_for_team_and_position[team_id, position] - def is_agent_in_team(self, agent_id:int , team_id: int) -> bool: + def is_agent_in_team(self, agent_id:int , team_id: Any) -> bool: return agent_id in self.teams[team_id] - def get_target_agent(self, team_id: int, target: str): - team_ids = list(self.teams.keys()) - idx = team_ids.index(team_id) + def get_team_idx(self, agent_id:int) -> int: + team_id, _ = self.team_and_position_for_agent[agent_id] + return self.team_list.index(team_id) + + def get_target_agent(self, team_id: Any, target: str): + idx = self.team_list.index(team_id) if target == "left_team": - target_id = team_ids[(idx+1) % self.num_teams] + target_id = self.team_list[(idx+1) % self.num_teams] return self.teams[target_id] if target == "left_team_leader": - target_id = team_ids[(idx+1) % self.num_teams] + target_id = self.team_list[(idx+1) % self.num_teams] return self.teams[target_id][0] if target == "right_team": - target_id = team_ids[(idx-1) % self.num_teams] + target_id = self.team_list[(idx-1) % self.num_teams] return self.teams[target_id] if target == "right_team_leader": - target_id = team_ids[(idx-1) % self.num_teams] + target_id = self.team_list[(idx-1) % self.num_teams] return self.teams[target_id][0] if target == "my_team_leader": return self.teams[team_id][0] if target == "all_foes": all_foes = [] - for foe_team_id in team_ids: + for foe_team_id in self.team_list: if foe_team_id != team_id: all_foes += self.teams[foe_team_id] return all_foes + if target == "all_foe_leaders": + leaders = [] + for foe_team_id in self.team_list: + if foe_team_id != team_id: + leaders.append(self.teams[foe_team_id][0]) + return leaders return None + +class RefillPopper: + def __init__(self, original_list, np_random=None): + assert isinstance(original_list, list), "original_list must be a list of (row, col) tuples" + self._original_list = original_list + self._np_random = np_random or numpy.random + self._refill_list = list(original_list) # copy + + def pop(self): + if len(self._original_list) == 1: + return self._original_list[0] + if not self._refill_list: + self._refill_list = list(self._original_list) + pop_idx = self._np_random.integers(len(self._refill_list)) + return self._refill_list.pop(pop_idx) + +class TeamLoader(spawn.SequentialLoader): + def __init__(self, config, np_random, + candidate_spawn_pos: List[List] = None): + assert config.TEAMS is not None, "config.TEAMS must be specified" + self.team_helper = TeamHelper(config.TEAMS, np_random) + # Check if the team specification is valid for spawning + assert len(self.team_helper.team_and_position_for_agent.keys()) == config.PLAYER_N,\ + "Number of agents in config.TEAMS must be equal to config.PLAYER_N" + for agent_id in range(1, config.PLAYER_N + 1): + assert agent_id in self.team_helper.team_and_position_for_agent,\ + f"Agent id {agent_id} is not specified in config.TEAMS" + super().__init__(config, np_random) + + if candidate_spawn_pos is None: + candidate_spawn_pos = spawn_team_together(config, self.team_helper.num_teams) + elif not isinstance(candidate_spawn_pos[0], list): + # candidate_spawn_pos for teams should be List[List] + candidate_spawn_pos = [[pos] for pos in candidate_spawn_pos] + + np_random.shuffle(candidate_spawn_pos) + self.candidate_spawn_pos = [RefillPopper(pos_list, np_random) + for pos_list in candidate_spawn_pos] + + def get_spawn_position(self, agent_id): + idx = self.team_helper.get_team_idx(agent_id) + return self.candidate_spawn_pos[idx].pop() + +def spawn_team_together(config, num_teams): + '''Generates spawn positions for new teams + Agents in the same team spawn together in the same tile + Evenly spaces teams around the square map borders + Returns: + list of tuple(int, int): + position: + The position (row, col) to spawn the given teams + ''' + teams_per_sides = (num_teams + 3) // 4 # 1-4 -> 1, 5-8 -> 2, etc. + + tiles = spawn.get_edge_tiles(config) + each_side = len(tiles) // 4 + assert each_side > 4*teams_per_sides, 'Map too small for teams' + sides = [tiles[i*each_side:(i+1)*each_side] for i in range(4)] + + team_spawn_positions = [] + for side in sides: + for i in range(teams_per_sides): + idx = int(len(side)*(i+1)/(teams_per_sides + 1)) + team_spawn_positions.append([side[idx]]) + + return team_spawn_positions diff --git a/nmmo/lib/utils.py b/nmmo/lib/utils.py index 31b73d0b5..1aba60375 100644 --- a/nmmo/lib/utils.py +++ b/nmmo/lib/utils.py @@ -1,10 +1,11 @@ # pylint: disable=all - import inspect from collections import deque - +import hashlib import numpy as np +from nmmo.entity.entity import Entity, EntityState +EntityAttr = EntityState.State.attr_name_to_col class staticproperty(property): def __get__(self, cls, owner): @@ -85,3 +86,50 @@ def in_bounds(r, c, shape, border=0): c < C - border ) +def l1_map(size): + # l1 distance from the center tile (size//2, size//2) + x = np.abs(np.arange(size) - size//2) + X, Y = np.meshgrid(x, x) + data = np.stack((X, Y), -1) + return np.max(abs(data), -1) + +def get_hash_embedding(func, embed_dim): + # NOTE: This is a hacky way to get a hash embedding for a function + # TODO: Can we get more meaningful embedding? coding LLMs are good but huge + func_src = inspect.getsource(func) + hash_object = hashlib.sha256(func_src.encode()) + hex_digest = hash_object.hexdigest() + + # Convert the hexadecimal hash to a numpy array with float16 data type + hash_bytes = bytes.fromhex(hex_digest) + hash_array = np.frombuffer(hash_bytes, dtype=np.float16) + hash_array = np.nan_to_num(hash_array, nan=1, posinf=1, neginf=1) + hash_array = np.log(abs(hash_array.astype(float))) + hash_array -= hash_array.mean() + hash_array /= hash_array.std() + embedding = np.zeros(embed_dim, dtype=np.float16) + embedding[:len(hash_array)] = hash_array + return embedding + +def identify_closest_target(entity): + realm = entity.realm + radius = realm.config.PLAYER_VISION_RADIUS + visible_entities = Entity.Query.window( + realm.datastore, entity.pos[0], entity.pos[1], radius) + dist = linf(visible_entities[:,EntityAttr["row"]:EntityAttr["col"]+1], entity.pos) + entity_ids = visible_entities[:,EntityAttr["id"]] + + # Filter out the entities that are not attackable + flt_idx = visible_entities[:,EntityAttr["npc_type"]] >= 0 # no immortal (-1) + if entity.config.NPC_SYSTEM_ENABLED and not entity.config.NPC_ALLOW_ATTACK_OTHER_NPCS: + flt_idx &= entity_ids > 0 + dist = dist[flt_idx] + entity_ids = entity_ids[flt_idx] + + # TODO: this could be made smarter/faster, or perhaps consider health + if len(dist) > 1: + closest_idx = np.argmin(dist) + return realm.entity(entity_ids[closest_idx]) + if len(dist) == 1: + return realm.entity(entity_ids[0]) + return None diff --git a/nmmo/minigames/__init__.py b/nmmo/minigames/__init__.py new file mode 100644 index 000000000..ed5fc5806 --- /dev/null +++ b/nmmo/minigames/__init__.py @@ -0,0 +1,7 @@ +from .center_race import RacetoCenter, ProgressTowardCenter +from .king_hill import KingoftheHill +from .sandwich import Sandwich +from .comm_together import CommTogether +from .radio_raid import RadioRaid + +AVAILABLE_GAMES = [RacetoCenter, KingoftheHill, Sandwich, CommTogether, RadioRaid] diff --git a/nmmo/minigames/center_race.py b/nmmo/minigames/center_race.py new file mode 100644 index 000000000..008dec1d8 --- /dev/null +++ b/nmmo/minigames/center_race.py @@ -0,0 +1,139 @@ +# pylint: disable=invalid-name, duplicate-code, unused-argument +import time +from nmmo.core.game_api import Game +from nmmo.task import task_api +from nmmo.task.base_predicates import ProgressTowardCenter +from nmmo.lib import utils + + +class RacetoCenter(Game): + required_systems = ["TERRAIN", "RESOURCE"] + + def __init__(self, env, sampling_weight=None): + super().__init__(env, sampling_weight) + + self._map_size = 40 # determines the difficulty + self.adaptive_difficulty = True + self.num_game_won = 1 # at the same map size, threshold to increase the difficulty + self.step_size = 8 + self.num_player_resurrect = 0 + + # NOTE: This is a hacky way to get a hash embedding for a function + # TODO: Can we get more meaningful embedding? coding LLMs are good but huge + self.task_embedding = utils.get_hash_embedding(ProgressTowardCenter, + self.config.TASK_EMBED_DIM) + + @property + def map_size(self): + return self._map_size + + def set_map_size(self, map_size): + self._map_size = map_size + + def is_compatible(self): + return self.config.are_systems_enabled(self.required_systems) + + def reset(self, np_random, map_dict, tasks=None): + assert self.map_size >= self.config.PLAYER_N//4,\ + f"self.map_size({self.map_size}) must be >= {self.config.PLAYER_N//4}" + map_dict["mark_center"] = True # mark the center tile + super().reset(np_random, map_dict) + self.history[-1]["map_size"] = self.map_size + self.num_player_resurrect = 0 + + def _set_config(self): + self.config.reset() + self.config.toggle_systems(self.required_systems) + self.config.set_for_episode("ALLOW_MOVE_INTO_OCCUPIED_TILE", False) + + # Regenerate the map from fractal to have less obstacles + self.config.set_for_episode("MAP_RESET_FROM_FRACTAL", True) + self.config.set_for_episode("TERRAIN_WATER", 0.05) + self.config.set_for_episode("TERRAIN_FOILAGE", 0.95) # prop of stone tiles: 0.05 + self.config.set_for_episode("TERRAIN_SCATTER_EXTRA_RESOURCES", True) + + # Activate death fog + self.config.set_for_episode("DEATH_FOG_ONSET", None) # 32 + # self.config.set_for_episode("DEATH_FOG_SPEED", 1/6) + # # Only the center tile is safe + # self.config.set_for_episode("DEATH_FOG_FINAL_SIZE", 0) + + self._determine_difficulty() # sets the map_size + self.config.set_for_episode("MAP_CENTER", self.map_size) + + def _determine_difficulty(self): + # Determine the difficulty (the map size) based on the previous results + if self.adaptive_difficulty and self.history \ + and self.history[-1]["result"]: # the last game was won + last_results = [r["result"] for r in self.history if r["map_size"] == self.map_size] + if sum(last_results) >= self.num_game_won \ + and self.map_size <= self.config.original["MAP_CENTER"] - self.step_size: + self._map_size += self.step_size + + def _set_realm(self, map_dict): + # NOTE: this game respawns dead players at the edge, so setting delete_dead_entity=False + self.realm.reset(self._np_random, map_dict, delete_dead_player=False) + + def _define_tasks(self): + return task_api.make_same_task(ProgressTowardCenter, self.config.POSSIBLE_AGENTS, + task_kwargs={"embedding": self.task_embedding}) + + def _process_dead_players(self, terminated, dead_players): + # Respawn dead players at the edge + for player in dead_players.values(): + player.resurrect(freeze_duration=10, health_prop=1, edge_spawn=True) + self.num_player_resurrect += 1 + + @property + def winning_score(self): + if self._winners: + time_limit = self.config.HORIZON + return (time_limit - self.realm.tick) / time_limit # speed bonus + # No one reached the center + return 0.0 + + def _check_winners(self, terminated): + return self._who_completed_task() + + @staticmethod + def test(env, horizon=30, seed=0): + game = RacetoCenter(env) + env.reset(game=game, seed=seed) + + # Check configs + config = env.config + assert config.are_systems_enabled(game.required_systems) + assert config.COMBAT_SYSTEM_ENABLED is False + assert config.ALLOW_MOVE_INTO_OCCUPIED_TILE is False + + start_time = time.time() + for _ in range(horizon): + _, r, terminated, _, _ = env.step({}) + print(f"Time taken: {time.time() - start_time:.3f} s") # pylint: disable=bad-builtin + + # Test if the difficulty increases + org_map_size = game.map_size + for result in [False]*7 + [True]*game.num_game_won: + game.history.append({"result": result, "map_size": game.map_size}) + game._determine_difficulty() # pylint: disable=protected-access + assert game.map_size == (org_map_size + game.step_size) + + # Check if returns of resurrect/frozen players are correct + for agent_id, player in env._dead_this_tick.items(): # pylint: disable=protected-access + assert player.alive, "Resurrected players should be alive" + assert player.status.frozen, "Resurrected players should be frozen" + assert player.my_task.progress == 0, "Resurrected players should have 0 progress" + assert terminated[agent_id], "Resurrected players should be done = True" + assert r[agent_id] == -1, "Resurrected players should have -1 reward" + +if __name__ == "__main__": + import nmmo + test_config = nmmo.config.Default() # Medium, AllGameSystems + test_env = nmmo.Env(test_config) + RacetoCenter.test(test_env) # 0.85 s + + # performance test + from tests.testhelpers import profile_env_step + test_tasks = task_api.make_same_task(ProgressTowardCenter, test_env.possible_agents) + profile_env_step(tasks=test_tasks) + # env._compute_rewards(): 1.9577480710031523 diff --git a/nmmo/minigames/comm_together.py b/nmmo/minigames/comm_together.py new file mode 100644 index 000000000..293e401fc --- /dev/null +++ b/nmmo/minigames/comm_together.py @@ -0,0 +1,163 @@ +# pylint: disable=duplicate-code, invalid-name, unused-argument +import time +from nmmo.core.game_api import TeamBattle +from nmmo.task import task_spec +from nmmo.task.base_predicates import AllMembersWithinRange +from nmmo.lib import utils, team_helper + + +def seek_task(within_dist): + return task_spec.TaskSpec( + eval_fn=AllMembersWithinRange, + eval_fn_kwargs={"dist": within_dist}, + reward_to="team") + +class CommTogether(TeamBattle): + _required_systems = ["TERRAIN", "COMMUNICATION", "COMBAT"] + + def __init__(self, env, sampling_weight=None): + super().__init__(env, sampling_weight) + + # NOTE: all should fit in a 8x8 square, in which all can see each other + self.team_within_dist = 7 # gather all team members within this distance + + self._map_size = 128 # determines the difficulty + self._spawn_immunity = 128 # so that agents can attack each other later + self.adaptive_difficulty = False + self.num_game_won = 1 # at the same map size, threshold to increase the difficulty + self.step_size = 8 + self._grass_map = False + self.num_player_resurrect = 0 + + # NOTE: This is a hacky way to get a hash embedding for a function + # TODO: Can we get more meaningful embedding? coding LLMs are good but heavy + self.task_embedding = utils.get_hash_embedding(seek_task, self.config.TASK_EMBED_DIM) + + @property + def required_systems(self): + return self._required_systems + + @property + def map_size(self): + return self._map_size + + def set_map_size(self, map_size): + self._map_size = map_size + + def set_spawn_immunity(self, spawn_immunity): + self._spawn_immunity = spawn_immunity + + def set_grass_map(self, grass_map): + self._grass_map = grass_map + + def is_compatible(self): + return self.config.are_systems_enabled(self.required_systems) + + def reset(self, np_random, map_dict, tasks=None): + super().reset(np_random, map_dict) + self.history[-1]["map_size"] = self.map_size + self._grass_map = False # reset to default + self.num_player_resurrect = 0 + + def _set_config(self): + self.config.reset() + self.config.toggle_systems(self.required_systems) + self.config.set_for_episode("ALLOW_MOVE_INTO_OCCUPIED_TILE", False) + # Regenerate the map from fractal to have less obstacles + self.config.set_for_episode("MAP_RESET_FROM_FRACTAL", True) + self.config.set_for_episode("TERRAIN_WATER", 0.1) + self.config.set_for_episode("TERRAIN_FOILAGE", 0.9) + self.config.set_for_episode("TERRAIN_RESET_TO_GRASS", self._grass_map) + # NO death fog + self.config.set_for_episode("DEATH_FOG_ONSET", None) + # Enable +10 hp per tick, so that getting hit once doesn't damage the agent + self.config.set_for_episode("PLAYER_HEALTH_INCREMENT", 10) + + self._determine_difficulty() # sets the map size + self.config.set_for_episode("MAP_CENTER", self.map_size) + self.config.set_for_episode("COMBAT_SPAWN_IMMUNITY", self._spawn_immunity) + + def _determine_difficulty(self): + # Determine the difficulty (the map size) based on the previous results + if self.adaptive_difficulty and self.history \ + and self.history[-1]["result"]: # the last game was won + last_results = [r["result"] for r in self.history if r["map_size"] == self.map_size] + if sum(last_results) >= self.num_game_won: + self._map_size = min(self.map_size + self.step_size, + self.config.original["MAP_CENTER"]) + # # Decrease the spawn immunity, to increase attack window + # if self._spawn_immunity > self.history[-1]["winning_tick"]: + # next_immunity = (self._spawn_immunity + self.history[-1]["winning_tick"]) / 2 + # self._spawn_immunity = max(next_immunity, 64) # 64 is the minimum + + def _set_realm(self, map_dict): + # NOTE: this game respawns dead players at the edge, so setting delete_dead_entity=False + self.realm.reset(self._np_random, map_dict, delete_dead_player=False) + + def _define_tasks(self): + spec_list = [seek_task(self.team_within_dist)] * len(self.teams) + return task_spec.make_task_from_spec(self.teams, spec_list) + + def _process_dead_players(self, terminated, dead_players): + # Respawn dead players at a random location + for player in dead_players.values(): + player.resurrect(freeze_duration=30, health_prop=1, edge_spawn=False) + self.num_player_resurrect += 1 + + def _check_winners(self, terminated): + # No winner game is possible + return self._who_completed_task() + + @property + def winning_score(self): + if self._winners: + time_limit = self.config.HORIZON + speed_bonus = (time_limit - self.realm.tick) / time_limit + return speed_bonus + return 0.0 + + @staticmethod + def test(env, horizon=30, seed=0): + # pylint: disable=protected-access + game = CommTogether(env) + env.reset(game=game, seed=seed) + + # Check configs + config = env.config + assert config.are_systems_enabled(game.required_systems) + assert config.DEATH_FOG_ONSET is None + assert config.ITEM_SYSTEM_ENABLED is False + assert config.ALLOW_MOVE_INTO_OCCUPIED_TILE is False + + start_time = time.time() + for _ in range(horizon): + env.step({}) + print(f"Time taken: {time.time() - start_time:.3f} s") # pylint: disable=bad-builtin + + # These should run without errors + game.history.append({"result": False, "map_size": 0, "winning_tick": 512}) + game._determine_difficulty() + game.history.append({"result": True, "winners": None, "map_size": 0, "winning_tick": 512}) + game._determine_difficulty() + + # Test if the difficulty changes + org_map_size = game.map_size + for result in [False]*7 + [True]*game.num_game_won: + game.history.append({"result": result, "map_size": game.map_size, "winning_tick": 128}) + game._determine_difficulty() + if game.adaptive_difficulty: + assert game.map_size == (org_map_size + game.step_size) + +if __name__ == "__main__": + import nmmo + test_config = nmmo.config.Default() # Medium, AllGameSystems + teams = team_helper.make_teams(test_config, num_teams=7) + test_config.set("TEAMS", teams) + test_env = nmmo.Env(test_config) + CommTogether.test(test_env) # 0.65 s + + # performance test + from tests.testhelpers import profile_env_step + test_tasks = task_spec.make_task_from_spec(teams, [seek_task(5)]*len(teams)) + profile_env_step(tasks=test_tasks) + # env._compute_rewards(): 0.27938533399719745 diff --git a/nmmo/minigames/king_hill.py b/nmmo/minigames/king_hill.py new file mode 100644 index 000000000..f08bdfa9e --- /dev/null +++ b/nmmo/minigames/king_hill.py @@ -0,0 +1,171 @@ +# pylint: disable=invalid-name, duplicate-code, unused-argument +import time +from nmmo.core.game_api import TeamBattle +from nmmo.task import task_spec, base_predicates +from nmmo.lib import utils, team_helper + + +def seize_task(dur_to_win): + return task_spec.TaskSpec( + eval_fn=base_predicates.SeizeCenter, + eval_fn_kwargs={"num_ticks": dur_to_win}, + reward_to="team") + +class KingoftheHill(TeamBattle): + required_systems = ["TERRAIN", "COMBAT", "RESOURCE", "COMMUNICATION"] + + def __init__(self, env, sampling_weight=None): + super().__init__(env, sampling_weight) + + self._seize_duration = 10 # determines the difficulty + self.dur_step_size = 10 + self.max_seize_duration = 200 + self.adaptive_difficulty = True + self.num_game_won = 2 # at the same duration, threshold to increase the difficulty + self.map_size = 40 + self.score_scaler = .5 + + # NOTE: This is a hacky way to get a hash embedding for a function + # TODO: Can we get more meaningful embedding? coding LLMs are good but huge + self.task_embedding = utils.get_hash_embedding(seize_task, + self.config.TASK_EMBED_DIM) + + @property + def seize_duration(self): + return self._seize_duration + + def set_seize_duration(self, seize_duration): + self._seize_duration = seize_duration + + def is_compatible(self): + return self.config.are_systems_enabled(self.required_systems) + + def reset(self, np_random, map_dict, tasks=None): + super().reset(np_random, map_dict) + self.history[-1]["map_size"] = self.map_size + self.history[-1]["seize_duration"] = self.seize_duration + + def _set_config(self): + self.config.reset() + self.config.toggle_systems(self.required_systems) + self.config.set_for_episode("MAP_CENTER", self.map_size) + self.config.set_for_episode("ALLOW_MOVE_INTO_OCCUPIED_TILE", False) + + # Regenerate the map from fractal to have less obstacles + self.config.set_for_episode("MAP_RESET_FROM_FRACTAL", True) + self.config.set_for_episode("TERRAIN_WATER", 0.05) + self.config.set_for_episode("TERRAIN_FOILAGE", 0.95) # prop of stone tiles: 0.05 + self.config.set_for_episode("TERRAIN_SCATTER_EXTRA_RESOURCES", True) + + # Activate death fog + self.config.set_for_episode("DEATH_FOG_ONSET", 32) + self.config.set_for_episode("DEATH_FOG_SPEED", 1/16) + self.config.set_for_episode("DEATH_FOG_FINAL_SIZE", 5) + + self._determine_difficulty() # sets the seize duration + + def _determine_difficulty(self): + # Determine the difficulty (the seize duration) based on the previous results + if self.adaptive_difficulty and self.history \ + and self.history[-1]["result"]: # the last game was won + last_results = [r["result"] for r in self.history + if r["seize_duration"] == self.seize_duration] + if sum(last_results) >= self.num_game_won: + self._seize_duration = min(self.seize_duration + self.dur_step_size, + self.max_seize_duration) + + def _set_realm(self, map_dict): + self.realm.reset(self._np_random, map_dict, custom_spawn=True, seize_targets=["center"]) + # team spawn requires custom spawning + team_loader = team_helper.TeamLoader(self.config, self._np_random) + self.realm.players.spawn(team_loader) + + def _define_tasks(self): + spec_list = [seize_task(self.seize_duration)] * len(self.teams) + return task_spec.make_task_from_spec(self.teams, spec_list) + + @property + def winning_score(self): + if self._winners: + time_limit = self.config.HORIZON + speed_bonus = (time_limit - self.realm.tick) / time_limit + alive_bonus = sum(1.0 for agent_id in self._winners if agent_id in self.realm.players)\ + / len(self._winners) + return (speed_bonus + alive_bonus) / 2 # set max to 1.0 + # No one succeeded + return 0.0 + + def _check_winners(self, terminated): + assert self.config.TEAMS is not None, "Team battle mode requires TEAMS to be defined" + winners = self._who_completed_task() + if winners is not None: + return winners + + if len(self.realm.seize_status) == 0: + return None + + seize_results = list(self.realm.seize_status.values()) + + # Time's up, and a team has seized the center + if self.realm.tick == self.config.HORIZON: + winners = [] + # Declare the latest seizing agent as the winner + for agent_id, _ in seize_results: + for task in self.tasks: + if agent_id in task.assignee: + winners += task.assignee + return winners or None + + # Only one team remains and they have seized the center + current_teams = self._check_remaining_teams() + if len(current_teams) == 1: + winning_team = list(current_teams.keys())[0] + team_members = self.config.TEAMS[winning_team] + for agent_id, _ in seize_results: + # Check if the agent is in the winning team + if agent_id in team_members: + return team_members + + # No team has seized the center + return None + + @staticmethod + def test(env, horizon=30, seed=0): + game = KingoftheHill(env) + env.reset(game=game, seed=seed) + + # Check configs + config = env.config + assert config.are_systems_enabled(game.required_systems) + assert config.TERRAIN_SYSTEM_ENABLED is True + assert config.RESOURCE_SYSTEM_ENABLED is True + assert config.COMBAT_SYSTEM_ENABLED is True + assert config.ALLOW_MOVE_INTO_OCCUPIED_TILE is False + assert config.DEATH_FOG_ONSET == 32 + assert env.realm.map.seize_targets == [(config.MAP_SIZE//2, config.MAP_SIZE//2)] + + start_time = time.time() + for _ in range(horizon): + env.step({}) + print(f"Time taken: {time.time() - start_time:.3f} s") # pylint: disable=bad-builtin + + # Test if the difficulty increases + org_seize_dur = game.seize_duration + for result in [False]*7 + [True]*game.num_game_won: + game.history.append({"result": result, "seize_duration": game.seize_duration}) + game._determine_difficulty() # pylint: disable=protected-access + assert game.seize_duration == (org_seize_dur + game.dur_step_size) + +if __name__ == "__main__": + import nmmo + test_config = nmmo.config.Default() # Medium, AllGameSystems + test_config.set("TEAMS", team_helper.make_teams(test_config, num_teams=7)) + test_env = nmmo.Env(test_config) + KingoftheHill.test(test_env) # 0.59 s + + # performance test + from tests.testhelpers import profile_env_step + teams = test_config.TEAMS + test_tasks = task_spec.make_task_from_spec(teams, [seize_task(30)]*len(teams)) + profile_env_step(tasks=test_tasks) + # env._compute_rewards(): 0.24291237899888074 diff --git a/nmmo/minigames/radio_raid.py b/nmmo/minigames/radio_raid.py new file mode 100644 index 000000000..30bafd4cd --- /dev/null +++ b/nmmo/minigames/radio_raid.py @@ -0,0 +1,207 @@ +# pylint: disable=duplicate-code, invalid-name, unused-argument +import time +from nmmo.core.game_api import TeamBattle +from nmmo.task import task_spec +from nmmo.task.base_predicates import DefeatEntity +from nmmo.lib import utils, team_helper + + +def hunt_task(num_npc): + return task_spec.TaskSpec( + eval_fn=DefeatEntity, + eval_fn_kwargs={"agent_type": "npc", "level": 0, "num_agent": num_npc}, + reward_to="team") + +class RadioRaid(TeamBattle): + required_systems = ["TERRAIN", "COMBAT", "COMMUNICATION", "NPC"] + num_teams = 8 + + def __init__(self, env, sampling_weight=None): + super().__init__(env, sampling_weight) + + self._goal_num_npc = 5 # determines the difficulty + self.adaptive_difficulty = True + self.num_game_won = 2 # at the same map size, threshold to increase the difficulty + self.step_size = 5 + self.quad_centers = None + self._grass_map = False + + # npc danger: 0=all npc are passive, 1=all npc are aggressive + self._npc_danger = 0 # increase by .1 per wave + self._danger_step_size = .1 + self._spawn_center_crit = 0.4 # if danger is less than crit, spawn at center + self.npc_wave_num = 10 # number of npc to spawn per wave + self._last_wave_tick = 0 + self.npc_spawn_crit = 3 + self.npc_spawn_radius = 5 + self.max_wave_interval = 20 + + # These will probably affect the difficulty + self.map_size = 48 + self.spawn_immunity = self.config.HORIZON + + # NOTE: This is a hacky way to get a hash embedding for a function + # TODO: Can we get more meaningful embedding? coding LLMs are good but heavy + self.task_embedding = utils.get_hash_embedding(hunt_task, self.config.TASK_EMBED_DIM) + + @property + def teams(self): + team_size = self.config.PLAYER_N // self.num_teams + teams = {i: list(range((i-1)*team_size+1, i*team_size+1)) + for i in range(1, self.num_teams)} + teams[self.num_teams] = \ + list(range((self.num_teams-1)*team_size+1, self.config.PLAYER_N+1)) + return teams + + @property + def goal_num_npc(self): + return self._goal_num_npc + + def set_goal_num_npc(self, goal_num_npc): + self._goal_num_npc = goal_num_npc + + def set_grass_map(self, grass_map): + self._grass_map = grass_map + + def is_compatible(self): + return self.config.are_systems_enabled(self.required_systems) + + def reset(self, np_random, map_dict, tasks=None): + super().reset(np_random, map_dict) + self.history[-1]["goal_num_npc"] = self.goal_num_npc + self._npc_danger = 0 + self._last_wave_tick = 0 + + def _set_config(self): + self.config.reset() + self.config.toggle_systems(self.required_systems) + self.config.set_for_episode("MAP_CENTER", self.map_size) + self.config.set_for_episode("COMBAT_SPAWN_IMMUNITY", self.spawn_immunity) + self.config.set_for_episode("ALLOW_MOVE_INTO_OCCUPIED_TILE", False) + self.config.set_for_episode("TEAMS", self.teams) + self.config.set_for_episode("NPC_DEFAULT_REFILL_DEAD_NPCS", False) + # Regenerate the map from fractal to have less obstacles + self.config.set_for_episode("MAP_RESET_FROM_FRACTAL", True) + self.config.set_for_episode("TERRAIN_WATER", 0.1) + self.config.set_for_episode("TERRAIN_FOILAGE", 0.95) + self.config.set_for_episode("TERRAIN_SCATTER_EXTRA_RESOURCES", False) + self.config.set_for_episode("TERRAIN_RESET_TO_GRASS", self._grass_map) + # NO death fog + self.config.set_for_episode("DEATH_FOG_ONSET", None) + # Enable +1 hp per tick -- restore health by eat/drink + self.config.set_for_episode("PLAYER_HEALTH_INCREMENT", 1) + # Make NPCs more aggressive + self.config.set_for_episode("NPC_SPAWN_NEUTRAL", 0.3) + self.config.set_for_episode("NPC_SPAWN_AGGRESSIVE", 0.8) + + self._determine_difficulty() # sets the goal_num_npc + + def _determine_difficulty(self): + # Determine the difficulty (the map size) based on the previous results + if self.adaptive_difficulty and self.history \ + and self.history[-1]["result"]: # the last game was won + last_results = [r["result"] for r in self.history if r["goal_num_npc"] == self.goal_num_npc] + if sum(last_results) >= self.num_game_won: + self._goal_num_npc = self._goal_num_npc + self.step_size + + def _set_realm(self, map_dict): + self.realm.reset(self._np_random, map_dict, custom_spawn=True) + # team spawn requires custom spawning + team_loader = team_helper.TeamLoader(self.config, self._np_random) + self.realm.players.spawn(team_loader) + + # from each team, pick 4 agents and place on each quad center as recons + self.quad_centers = list(self.realm.map.quad_centers.values()) + for members in self.teams.values(): + recons = self._np_random.choice(members, size=4, replace=False) + for idx, agent_id in enumerate(recons): + self.realm.players[agent_id].make_recon(new_pos=self.quad_centers[idx]) + + def _define_tasks(self): + spec_list = [hunt_task(self.goal_num_npc)] * len(self.teams) + return task_spec.make_task_from_spec(self.teams, spec_list) + + def _process_dead_npcs(self, dead_npcs): + npc_manager = self.realm.npcs + diff_player_npc = (self.realm.num_players - self.num_teams*4) - len(npc_manager) + # Spawn more NPCs if there are more players than NPCs + # If the gap is large, spawn in waves + # If the gap is small, spawn in small batches + if diff_player_npc >= 0 and (len(npc_manager) <= self.npc_spawn_crit or \ + self.realm.tick - self._last_wave_tick > self.max_wave_interval): + if self._npc_danger < self._spawn_center_crit: + spawn_pos = self.realm.map.center_coord + else: + spawn_pos = self._np_random.choice(self.quad_centers) + r_min, r_max = spawn_pos[0] - self.npc_spawn_radius, spawn_pos[0] + self.npc_spawn_radius + c_min, c_max = spawn_pos[1] - self.npc_spawn_radius, spawn_pos[1] + self.npc_spawn_radius + npc_manager.area_spawn(r_min, r_max, c_min, c_max, self.npc_wave_num, + lambda r, c: npc_manager.spawn_npc(r, c, danger=self._npc_danger)) + self._npc_danger += min(self._danger_step_size, 1) # max danger = 1 + self._last_wave_tick = self.realm.tick + + def _check_winners(self, terminated): + # No winner game is possible + return self._who_completed_task() + + @property + def is_over(self): + return self.winners is not None or self.realm.tick >= self.config.HORIZON or \ + self.realm.num_players <= (self.num_teams*4) # 4 immortal recons per team + + @property + def winning_score(self): + if self._winners: + time_limit = self.config.HORIZON + speed_bonus = (time_limit - self.realm.tick) / time_limit + alive_bonus = sum(1.0 for agent_id in self._winners if agent_id in self.realm.players)\ + / len(self._winners) + return (speed_bonus + alive_bonus) / 2 # set max to 1.0 + return 0.0 + + @staticmethod + def test(env, horizon=30, seed=0): + game = RadioRaid(env) + env.reset(game=game, seed=seed) + + # Check configs + config = env.config + assert config.are_systems_enabled(game.required_systems) + assert config.COMBAT_SYSTEM_ENABLED is True + assert config.RESOURCE_SYSTEM_ENABLED is False + assert config.COMMUNICATION_SYSTEM_ENABLED is True + assert config.ITEM_SYSTEM_ENABLED is False + assert config.DEATH_FOG_ONSET is None + assert config.ALLOW_MOVE_INTO_OCCUPIED_TILE is False + assert config.NPC_SYSTEM_ENABLED is True + assert config.NPC_DEFAULT_REFILL_DEAD_NPCS is False + + start_time = time.time() + for _ in range(horizon): + env.step({}) + print(f"Time taken: {time.time() - start_time:.3f} s") # pylint: disable=bad-builtin + + # pylint: disable=protected-access + # These should run without errors + game.history.append({"result": False, "goal_num_npc": game.goal_num_npc}) + game._determine_difficulty() + + # Test if the difficulty changes + org_goal_npc = game.goal_num_npc + for result in [False]*7 + [True]*game.num_game_won: + game.history.append({"result": result, "goal_num_npc": game.goal_num_npc}) + game._determine_difficulty() # pylint: disable=protected-access + assert game.goal_num_npc == (org_goal_npc + game.step_size) + +if __name__ == "__main__": + import nmmo + test_config = nmmo.config.Default() # Medium, AllGameSystems + test_env = nmmo.Env(test_config) + RadioRaid.test(test_env) # 0.60 s + + # performance test + from tests.testhelpers import profile_env_step + test_tasks = task_spec.make_task_from_spec(test_config.TEAMS, + [hunt_task(30)]*len(test_config.TEAMS)) + profile_env_step(tasks=test_tasks) + # env._compute_rewards(): 0.17201571099940338 diff --git a/nmmo/minigames/sandwich.py b/nmmo/minigames/sandwich.py new file mode 100644 index 000000000..60009d615 --- /dev/null +++ b/nmmo/minigames/sandwich.py @@ -0,0 +1,200 @@ +import time +import numpy as np + +from nmmo.core.game_api import TeamBattle, team_survival_task +from nmmo.task import task_spec +from nmmo.lib import team_helper + + +def secure_order(pos, radius=5): + return {"secure": {"position": pos, "radius": radius}} + +class Sandwich(TeamBattle): + required_systems = ["TERRAIN", "COMBAT", "NPC", "COMMUNICATION"] + num_teams = 8 + + def __init__(self, env, sampling_weight=None): + super().__init__(env, sampling_weight) + + self.map_size = 80 + self._inner_npc_num = 2 # determines the difficulty + self._outer_npc_num = None # these npcs rally to the center + self.npc_step_size = 2 + self.adaptive_difficulty = True + self.num_game_won = 2 # at the same duration, threshold to increase the difficulty + self.max_npc_num = self.config.PLAYER_N // self.num_teams + self.survival_crit = 500 # to win, agents must survive this long + self._grass_map = False + + @property + def teams(self): + team_size = self.config.PLAYER_N // self.num_teams + teams = {i: list(range((i-1)*team_size+1, i*team_size+1)) + for i in range(1, self.num_teams)} + teams[self.num_teams] = \ + list(range((self.num_teams-1)*team_size+1, self.config.PLAYER_N+1)) + return teams + + @property + def inner_npc_num(self): + return self._inner_npc_num + + def set_inner_npc_num(self, inner_npc_num): + self._inner_npc_num = inner_npc_num + + @property + def outer_npc_num(self): + return self._outer_npc_num or min(self._inner_npc_num*self.num_teams, self.map_size*2) + + def set_outer_npc_num(self, outer_npc_num): + self._outer_npc_num = outer_npc_num + + def set_grass_map(self, grass_map): + self._grass_map = grass_map + + def is_compatible(self): + return self.config.are_systems_enabled(self.required_systems) + + def reset(self, np_random, map_dict, tasks=None): + super().reset(np_random, map_dict) + self.history[-1]["inner_npc_num"] = self.inner_npc_num + self.history[-1]["outer_npc_num"] = self.outer_npc_num + self._grass_map = False # reset to default + + def _set_config(self): + self.config.reset() + self.config.toggle_systems(self.required_systems) + self.config.set_for_episode("TEAMS", self.teams) + self.config.set_for_episode("ALLOW_MOVE_INTO_OCCUPIED_TILE", False) + self.config.set_for_episode("NPC_DEFAULT_REFILL_DEAD_NPCS", False) + # Make the map small + self.config.set_for_episode("MAP_CENTER", self.map_size) + # Regenerate the map from fractal to have less obstacles + self.config.set_for_episode("MAP_RESET_FROM_FRACTAL", True) + self.config.set_for_episode("TERRAIN_WATER", 0.1) + self.config.set_for_episode("TERRAIN_FOILAGE", 0.9) + self.config.set_for_episode("TERRAIN_SCATTER_EXTRA_RESOURCES", False) + self.config.set_for_episode("TERRAIN_RESET_TO_GRASS", self._grass_map) + # Activate death fog from the onset + self.config.set_for_episode("DEATH_FOG_ONSET", 1) + self.config.set_for_episode("DEATH_FOG_SPEED", 1/10) + self.config.set_for_episode("DEATH_FOG_FINAL_SIZE", 5) + # Enable +1 hp per tick + self.config.set_for_episode("PLAYER_HEALTH_INCREMENT", 1) + self._determine_difficulty() # sets the seize duration + + def _determine_difficulty(self): + # Determine the difficulty based on the previous results + if self.adaptive_difficulty and self.history \ + and self.history[-1]["result"]: # the last game was won + last_results = [r["result"] for r in self.history + if r["inner_npc_num"] == self.inner_npc_num] + if sum(last_results) >= self.num_game_won: + # Increase the npc num, when there were only few npcs left at the end + self._inner_npc_num += self.npc_step_size + self._inner_npc_num = min(self._inner_npc_num, self.max_npc_num) + + def _generate_spawn_locs(self): + center = self.config.MAP_SIZE // 2 + radius = self.map_size // 4 + angles = np.linspace(0, 2*np.pi, self.num_teams, endpoint=False) + return [(center + int(radius*np.cos(a)), center + int(radius*np.sin(a))) for a in angles] + + def _set_realm(self, map_dict): + self.realm.reset(self._np_random, map_dict, custom_spawn=True) + # team spawn requires custom spawning + spawn_locs = self._generate_spawn_locs() + team_loader = team_helper.TeamLoader(self.config, self._np_random, spawn_locs) + self.realm.players.spawn(team_loader) + + # spawn NPCs + npc_manager = self.realm.npcs + center = self.config.MAP_SIZE // 2 + offset = self.config.MAP_CENTER // 8 + for i in range(self.num_teams): + r, c = spawn_locs[i] + if r < center: + r_min, r_max = center - offset, center - 1 + else: + r_min, r_max = center + 1, center + offset + if c < center: + c_min, c_max = center - offset, center - 1 + else: + c_min, c_max = center + 1, center + offset + # pylint: disable=cell-var-from-loop + npc_manager.area_spawn(r_min, r_max, c_min, c_max, self.inner_npc_num, + lambda r, c: npc_manager.spawn_npc( + r, c, name=f"NPC{i+1}", order={"rally": spawn_locs[i]})) + npc_manager.edge_spawn(self.outer_npc_num, + lambda r, c: npc_manager.spawn_npc( + r, c, name="NPC5", order={"rally": (center,center)})) + + def _process_dead_npcs(self, dead_npcs): + npc_manager = self.realm.npcs + target_num = min(self.realm.num_players, self.inner_npc_num) // 2 + if len(npc_manager) < target_num: + center = self.config.MAP_SIZE // 2 + offset = self.config.MAP_CENTER // 6 + r_min = c_min = center - offset + r_max = c_max = center + offset + num_spawn = target_num - len(npc_manager) + npc_manager.area_spawn(r_min, r_max, c_min, c_max, num_spawn, + lambda r, c: npc_manager.spawn_npc( + r, c, name="NPC5", order={"rally": (center,center)})) + + @property + def winning_score(self): + if self._winners: + time_limit = self.config.HORIZON + speed_bonus = (time_limit - self.realm.tick) / time_limit + return speed_bonus # set max to 1.0 + # No one succeeded + return 0.0 + + def _check_winners(self, terminated): + # Basic survival criteria + if self.realm.tick < self.survival_crit: + return None + return super()._check_winners(terminated) + + @staticmethod + def test(env, horizon=30, seed=0): + game = Sandwich(env) + env.reset(game=game, seed=seed) + + # Check configs + config = env.config + assert config.are_systems_enabled(game.required_systems) + assert config.TERRAIN_SYSTEM_ENABLED is True + assert config.RESOURCE_SYSTEM_ENABLED is False + assert config.COMBAT_SYSTEM_ENABLED is True + assert config.NPC_SYSTEM_ENABLED is True + assert config.NPC_DEFAULT_REFILL_DEAD_NPCS is False + assert config.EQUIPMENT_SYSTEM_ENABLED is False # equipment is used to set npc stats + assert config.ALLOW_MOVE_INTO_OCCUPIED_TILE is False + + start_time = time.time() + for _ in range(horizon): + env.step({}) + print(f"Time taken: {time.time() - start_time:.3f} s") # pylint: disable=bad-builtin + + # Test if the difficulty increases + org_inner_npc_num = game.inner_npc_num + for result in [False]*7 + [True]*game.num_game_won: + game.history.append( + {"result": result, "inner_npc_num": game.inner_npc_num}) + game._determine_difficulty() # pylint: disable=protected-access + assert game.inner_npc_num == (org_inner_npc_num + game.npc_step_size) + +if __name__ == "__main__": + import nmmo + test_config = nmmo.config.Default() # Medium, AllGameSystems + test_env = nmmo.Env(test_config) + Sandwich.test(test_env) # 0.74 s + + # performance test + from tests.testhelpers import profile_env_step + test_tasks = task_spec.make_task_from_spec(test_config.TEAMS, + [team_survival_task(30)]*len(test_config.TEAMS)) + profile_env_step(tasks=test_tasks) + # env._compute_rewards(): 0.1768564050034911 diff --git a/nmmo/render/render_client.py b/nmmo/render/render_client.py index d861c3fed..9cb0b7135 100644 --- a/nmmo/render/render_client.py +++ b/nmmo/render/render_client.py @@ -1,15 +1,15 @@ from __future__ import annotations import numpy as np -from nmmo.render import websocket from nmmo.render.overlay import OverlayRegistry from nmmo.render.render_utils import patch_packet # Render is external to the game -class WebsocketRenderer: +# NOTE: WebsocketRenderer has been renamed to DummyRenderer +class DummyRenderer: def __init__(self, realm=None) -> None: - self._client = websocket.Application(realm) + self._client = None # websocket.Application(realm) self.overlay_pos = [256, 256] self._realm = realm @@ -53,7 +53,24 @@ def render_realm(self) -> None: self.packet = packet # pass the packet to renderer - pos, cmd = self._client.update(self.packet) + pos, cmd = None, None # self._client.update(self.packet) + + # NOTE: copy pasted from nmmo/render/websocket.py + # def update(self, packet): + # self.tick += 1 + # uptime = np.round(self.tickRate*self.tick, 1) + # delta = time.time() - self.time + # print('Wall Clock: ', str(delta)[:5], 'Uptime: ', uptime, ', Tick: ', self.tick) + # delta = self.tickRate - delta + # if delta > 0: + # time.sleep(delta) + # self.time = time.time() + # for client in self.clients: + # client.sendUpdate(packet) + # if client.pos is not None: + # self.pos = client.pos + # self.cmd = client.cmd + # return self.pos, self.cmd self.overlay_pos = pos self.registry.step(cmd) diff --git a/nmmo/render/replay_helper.py b/nmmo/render/replay_helper.py index 1a1bab1e6..177e9a962 100644 --- a/nmmo/render/replay_helper.py +++ b/nmmo/render/replay_helper.py @@ -2,9 +2,10 @@ import json import logging import lzma -import pickle from typing import Dict +import dill + from .render_utils import np_encoder, patch_packet class ReplayHelper: @@ -33,12 +34,17 @@ def __init__(self, realm=None): self.packets = None self.map = None self._i = 0 + self._agent_task = None def reset(self): self.packets = [] self.map = None self._i = 0 self.update() # to capture the initial packet + self._agent_task = { + agent_id: agent.my_task.name + for agent_id, agent in self._realm.players.items() + } def __len__(self): return len(self.packets) @@ -67,12 +73,24 @@ def _packet(self): if "config" in packet: del packet["config"] + # Include the attributes that the web client refers + packet["config"] = { + "PLAYER_DEATH_FOG": self._realm.config.DEATH_FOG_ONSET, + "PLAYER_DEATH_FOG_FINAL_SIZE": self._realm.config.DEATH_FOG_FINAL_SIZE, + "PLAYER_DEATH_FOG_SPEED": self._realm.config.DEATH_FOG_SPEED, + } + return packet + # NOTE: Added data for analysis def _metadata(self) -> Dict: return { + 'config': self._realm.config.original, # returns config Dict + 'task': self._agent_task, + 'tick': self._realm.tick, # agents that don't have AGENT_CULL (91) event are alive 'event_log': self._realm.event_log.get_data(), - 'event_attr_col': self._realm.event_log.attr_to_col + 'event_attr_col': self._realm.event_log.attr_to_col, + 'event_stats': self._realm.event_log.get_stat(), } def update(self): @@ -97,7 +115,7 @@ def save(self, filename_prefix, compress=False): out.write(data) with open(metadata_file, 'wb') as out: - pickle.dump(self._metadata(), out) + dill.dump(self._metadata(), out) @classmethod def load(cls, replay_file): diff --git a/nmmo/render/websocket.py b/nmmo/render/websocket.py deleted file mode 100644 index b3b70123c..000000000 --- a/nmmo/render/websocket.py +++ /dev/null @@ -1,163 +0,0 @@ -# pylint: disable=all - -import numpy as np - -from signal import signal, SIGINT -import json -import os -import sys -import time -import threading - -from twisted.internet import reactor -from twisted.python import log -from twisted.web.server import Site -from twisted.web.static import File - -from autobahn.twisted.websocket import WebSocketServerFactory, \ - WebSocketServerProtocol -from autobahn.twisted.resource import WebSocketResource - -from .render_utils import np_encoder - -class GodswordServerProtocol(WebSocketServerProtocol): - def __init__(self): - super().__init__() - print("Created a server") - self.frame = 0 - - #"connected" is already used by WSSP - self.sent_environment = False - self.isConnected = False - - self.pos = [0, 0] - self.cmd = None - - def onOpen(self): - print("Opened connection to server") - - def onClose(self, wasClean, code=None, reason=None): - self.isConnected = False - print('Connection closed') - - def connectionMade(self): - super().connectionMade() - self.factory.clientConnectionMade(self) - - def connectionLost(self, reason): - super().connectionLost(reason) - self.factory.clientConnectionLost(self) - self.sent_environment = False - - #Not used without player interaction - def onMessage(self, packet, isBinary): - print("Server packet", packet) - packet = packet.decode() - _, packet = packet.split(';') #Strip headeer - r, c, cmd = packet.split(' ') #Split camera coords - if len(cmd) == 0 or cmd == '\t': - cmd = None - - self.pos = [int(r), int(c)] - self.cmd = cmd - - self.isConnected = True - - def onConnect(self, request): - print("WebSocket connection request: {}".format(request)) - realm = self.factory.realm - self.realm = realm - self.frame += 1 - - def serverPacket(self): - data = self.realm.packet - return data - - def sendUpdate(self, data): - packet = {} - packet['resource'] = data['resource'] - packet['player'] = data['player'] - packet['npc'] = data['npc'] - packet['pos'] = data['pos'] - packet['wilderness'] = data['wilderness'] - packet['market'] = data['market'] - - print('Is Connected? : {}'.format(self.isConnected)) - if not self.sent_environment: - packet['map'] = data['environment'] - packet['border'] = data['border'] - packet['size'] = data['size'] - self.sent_environment=True - - if 'overlay' in data: - packet['overlay'] = data['overlay'] - print('SENDING OVERLAY: ', len(packet['overlay'])) - - packet = json.dumps(packet, default=np_encoder).encode('utf8') - self.sendMessage(packet, False) - - -class WSServerFactory(WebSocketServerFactory): - def __init__(self, ip, realm): - super().__init__(ip) - self.realm = realm - self.time = time.time() - self.clients = [] - - self.pos = [0, 0] - self.cmd = None - self.tickRate = 0.6 - self.tick = 0 - - def update(self, packet): - self.tick += 1 - uptime = np.round(self.tickRate*self.tick, 1) - delta = time.time() - self.time - print('Wall Clock: ', str(delta)[:5], 'Uptime: ', uptime, ', Tick: ', self.tick) - delta = self.tickRate - delta - if delta > 0: - time.sleep(delta) - self.time = time.time() - - for client in self.clients: - client.sendUpdate(packet) - if client.pos is not None: - self.pos = client.pos - self.cmd = client.cmd - - return self.pos, self.cmd - - def clientConnectionMade(self, client): - self.clients.append(client) - - def clientConnectionLost(self, client): - self.clients.remove(client) - -class Application: - def __init__(self, realm): - signal(SIGINT, self.kill) - log.startLogging(sys.stdout) - - port = 8080 - self.factory = WSServerFactory(u'ws://localhost:{}'.format(port), realm) - self.factory.protocol = GodswordServerProtocol - resource = WebSocketResource(self.factory) - - root = File(".") - root.putChild(b"ws", resource) - site = Site(root) - - reactor.listenTCP(port, site) - - def run(): - reactor.run(installSignalHandlers=0) - - threading.Thread(target=run).start() - - def update(self, packet): - return self.factory.update(packet) - - def kill(*args): - print("Killed by user") - reactor.stop() - os._exit(0) \ No newline at end of file diff --git a/nmmo/systems/ai/__init__.py b/nmmo/systems/ai/__init__.py deleted file mode 100644 index 5c46b3697..000000000 --- a/nmmo/systems/ai/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# pylint: disable=import-self -from . import utils, move, behavior, policy diff --git a/nmmo/systems/ai/behavior.py b/nmmo/systems/ai/behavior.py deleted file mode 100644 index 81075415b..000000000 --- a/nmmo/systems/ai/behavior.py +++ /dev/null @@ -1,80 +0,0 @@ -#pylint: disable=protected-access, invalid-name - -import numpy as np - -import nmmo -from nmmo.systems.ai import move, utils - -def update(entity): - '''Update validity of tracked entities''' - if not utils.validTarget(entity, entity.attacker, entity.vision): - entity.attacker = None - if not utils.validTarget(entity, entity.target, entity.vision): - entity.target = None - if not utils.validTarget(entity, entity.closest, entity.vision): - entity.closest = None - - if entity.__class__.__name__ != 'Player': - return - - if not utils.validResource(entity, entity.food, entity.vision): - entity.food = None - if not utils.validResource(entity, entity.water, entity.vision): - entity.water = None - - -def pathfind(realm, actions, entity, target): - # TODO: do not access realm._np_random directly. ALSO see below for all other uses - actions[nmmo.action.Move] = { - nmmo.action.Direction: move.pathfind(realm.map, entity, target, realm._np_random)} - - -def explore(realm, actions, entity): - sz = realm.config.TERRAIN_SIZE - r, c = entity.pos - - spawnR, spawnC = entity.spawnPos - centR, centC = sz//2, sz//2 - - vR, vC = centR-spawnR, centC-spawnC - - mmag = max(abs(vR), abs(vC)) - rr = r + int(np.round(entity.vision*vR/mmag)) - cc = c + int(np.round(entity.vision*vC/mmag)) - - tile = realm.map.tiles[rr, cc] - pathfind(realm, actions, entity, tile) - - -def meander(realm, actions, entity): - actions[nmmo.action.Move] = { - nmmo.action.Direction: move.habitable(realm.map, entity, realm._np_random)} - -def evade(realm, actions, entity): - actions[nmmo.action.Move] = { - nmmo.action.Direction: move.antipathfind(realm.map, entity, entity.attacker, - realm._np_random)} - -def hunt(realm, actions, entity): - # Move args - distance = utils.lInfty(entity.pos, entity.target.pos) - - if distance > 1: - actions[nmmo.action.Move] = {nmmo.action.Direction: move.pathfind(realm.map, - entity, - entity.target, - realm._np_random)} - elif distance == 0: - actions[nmmo.action.Move] = { - nmmo.action.Direction: move.habitable(realm.map, entity, realm._np_random)} - - attack(realm, actions, entity) - -def attack(realm, actions, entity): - distance = utils.lInfty(entity.pos, entity.target.pos) - if distance > entity.skills.style.attack_range(realm.config): - return - - actions[nmmo.action.Attack] = { - nmmo.action.Style: entity.skills.style, - nmmo.action.Target: entity.target} diff --git a/nmmo/systems/ai/move.py b/nmmo/systems/ai/move.py deleted file mode 100644 index d2d398f2b..000000000 --- a/nmmo/systems/ai/move.py +++ /dev/null @@ -1,47 +0,0 @@ -# pylint: disable=cyclic-import -from nmmo.core import action -from nmmo.systems.ai import utils - -DIRECTIONS = [ # row delta, col delta, action - (-1, 0, action.North), - (1, 0, action.South), - (0, -1, action.West), - (0, 1, action.East)] * 2 - -def habitable(realm_map, ent, np_random): - r, c = ent.pos - is_habitable = realm_map.habitable_tiles - start = np_random.get_direction() - for i in range(4): - dr, dc, act = DIRECTIONS[start + i] - if is_habitable[r + dr, c + dc]: - return act - - return action.North - -def towards(direction, np_random): - if direction == (-1, 0): - return action.North - if direction == (1, 0): - return action.South - if direction == (0, -1): - return action.West - if direction == (0, 1): - return action.East - - return np_random.choice(action.Direction.edges) - -def bullrush(ent, targ, np_random): - direction = utils.directionTowards(ent, targ) - return towards(direction, np_random) - -def pathfind(realm_map, ent, targ, np_random): - direction = utils.aStar(realm_map, ent.pos, targ.pos) - return towards(direction, np_random) - -def antipathfind(realm_map, ent, targ, np_random): - er, ec = ent.pos - tr, tc = targ.pos - goal = (2*er - tr , 2*ec-tc) - direction = utils.aStar(realm_map, ent.pos, goal) - return towards(direction, np_random) diff --git a/nmmo/systems/ai/policy.py b/nmmo/systems/ai/policy.py deleted file mode 100644 index ff5b6642e..000000000 --- a/nmmo/systems/ai/policy.py +++ /dev/null @@ -1,38 +0,0 @@ - -from nmmo.systems.ai import behavior, utils - -def passive(realm, entity): - behavior.update(entity) - actions = {} - - behavior.meander(realm, actions, entity) - - return actions - -def neutral(realm, entity): - behavior.update(entity) - actions = {} - - if not entity.attacker: - behavior.meander(realm, actions, entity) - else: - entity.target = entity.attacker - behavior.hunt(realm, actions, entity) - - return actions - -def hostile(realm, entity): - behavior.update(entity) - actions = {} - - # This is probably slow - if not entity.target: - entity.target = utils.closestTarget(entity, realm.map.tiles, - rng=entity.vision) - - if not entity.target: - behavior.meander(realm, actions, entity) - else: - behavior.hunt(realm, actions, entity) - - return actions diff --git a/nmmo/systems/ai/utils.py b/nmmo/systems/ai/utils.py deleted file mode 100644 index 3fd7165ca..000000000 --- a/nmmo/systems/ai/utils.py +++ /dev/null @@ -1,195 +0,0 @@ -#pylint: disable=protected-access, invalid-name - -import heapq -from typing import Tuple - -import numpy as np - -from nmmo.lib.utils import in_bounds - - -def validTarget(ent, targ, rng): - if targ is None or not targ.alive or lInfty(ent.pos, targ.pos) > rng: - return False - return True - - -def validResource(ent, tile, rng): - return tile is not None and tile.state.tex in ( - 'foilage', 'water') and lInfty(ent.pos, tile.pos) <= rng - - -def directionTowards(ent, targ): - sr, sc = ent.pos - tr, tc = targ.pos - - if abs(sc - tc) > abs(sr - tr): - direction = (0, np.sign(tc - sc)) - else: - direction = (np.sign(tr - sr), 0) - - return direction - - -def closestTarget(ent, tiles, rng=1): - sr, sc = ent.pos - for d in range(rng+1): - for r in range(-d, d+1): - for e in tiles[sr+r, sc-d].entities.values(): - if e is not ent and validTarget(ent, e, rng): - return e - - for e in tiles[sr + r, sc + d].entities.values(): - if e is not ent and validTarget(ent, e, rng): - return e - - for e in tiles[sr - d, sc + r].entities.values(): - if e is not ent and validTarget(ent, e, rng): - return e - - for e in tiles[sr + d, sc + r].entities.values(): - if e is not ent and validTarget(ent, e, rng): - return e - return None - - -def lInf(ent, targ): - sr, sc = ent.pos - gr, gc = targ.pos - return abs(gr - sr) + abs(gc - sc) - - -def adjacentPos(pos): - r, c = pos - return [(r - 1, c), (r, c - 1), (r + 1, c), (r, c + 1)] - - -def cropTilesAround(position: Tuple[int, int], horizon: int, tiles): - line, column = position - - return tiles[max(line - horizon, 0): min(line + horizon + 1, len(tiles)), - max(column - horizon, 0): min(column + horizon + 1, len(tiles[0]))] - -# A* Search - - -def l1(start, goal): - sr, sc = start - gr, gc = goal - return abs(gr - sr) + abs(gc - sc) - - -def l2(start, goal): - sr, sc = start - gr, gc = goal - return 0.5*((gr - sr)**2 + (gc - sc)**2)**0.5 - -# TODO: unify lInfty and lInf - - -def lInfty(start, goal): - sr, sc = start - gr, gc = goal - return max(abs(gr - sr), abs(gc - sc)) - - -CUTOFF = 100 - - -def aStar(realm_map, start, goal): - cutoff = CUTOFF - tiles = realm_map.tiles - if start == goal: - return (0, 0) - if (start, goal) in realm_map.pathfinding_cache: - return realm_map.pathfinding_cache[(start, goal)] - initial_goal = goal - pq = [(0, start)] - - backtrace = {} - cost = {start: 0} - - closestPos = start - closestHeuristic = l1(start, goal) - closestCost = closestHeuristic - - while pq: - # Use approximate solution if budget exhausted - cutoff -= 1 - if cutoff <= 0: - if goal not in backtrace: - goal = closestPos - break - - priority, cur = heapq.heappop(pq) - - if cur == goal: - break - - for nxt in adjacentPos(cur): - if not in_bounds(*nxt, tiles.shape): - continue - - newCost = cost[cur] + 1 - if nxt not in cost or newCost < cost[nxt]: - cost[nxt] = newCost - heuristic = lInfty(goal, nxt) - priority = newCost + heuristic - - # Compute approximate solution - if heuristic < closestHeuristic or ( - heuristic == closestHeuristic and priority < closestCost): - closestPos = nxt - closestHeuristic = heuristic - closestCost = priority - - heapq.heappush(pq, (priority, nxt)) - backtrace[nxt] = cur - - while goal in backtrace and backtrace[goal] != start: - gr, gc = goal - goal = backtrace[goal] - sr, sc = goal - realm_map.pathfinding_cache[(goal, initial_goal)] = (gr - sr, gc - sc) - - sr, sc = start - gr, gc = goal - realm_map.pathfinding_cache[(start, initial_goal)] = (gr - sr, gc - sc) - return (gr - sr, gc - sc) -# End A* - -# Adjacency functions -def adjacentDeltas(): - return [(-1, 0), (1, 0), (0, 1), (0, -1)] - - -def l1Deltas(s): - rets = [] - for r in range(-s, s + 1): - for c in range(-s, s + 1): - rets.append((r, c)) - return rets - - -def posSum(pos1, pos2): - return pos1[0] + pos2[0], pos1[1] + pos2[1] - - -def adjacentEmptyPos(env, pos): - return [p for p in adjacentPos(pos) - if in_bounds(*p, env.size)] - - -def adjacentTiles(env, pos): - return [env.tiles[p] for p in adjacentPos(pos) - if in_bounds(*p, env.size)] - - -def adjacentMats(tiles, pos): - return [type(tiles[p].state) for p in adjacentPos(pos) - if in_bounds(*p, tiles.shape)] - - -def adjacencyDelMatPairs(env, pos): - return zip(adjacentDeltas(), adjacentMats(env.tiles, pos)) -### End### diff --git a/nmmo/systems/combat.py b/nmmo/systems/combat.py index b944c7763..bfcda75a9 100644 --- a/nmmo/systems/combat.py +++ b/nmmo/systems/combat.py @@ -3,7 +3,7 @@ import numpy as np from nmmo.systems import skill as Skill -from nmmo.lib.log import EventCode +from nmmo.lib.event_code import EventCode def level(skills): return max(e.level.val for e in skills.skills) @@ -24,9 +24,9 @@ def damage_multiplier(config, skill, targ): return 1.0 # pylint: disable=unnecessary-lambda-assignment -def attack(realm, player, target, skill_fn): - config = player.config - skill = skill_fn(player) +def attack(realm, attacker, target, skill_fn): + config = attacker.config + skill = skill_fn(attacker) skill_type = type(skill) skill_name = skill_type.__name__ @@ -67,22 +67,29 @@ def attack(realm, player, target, skill_fn): # Compute modifiers multiplier = damage_multiplier(config, skill, target) - skill_offense = base_damage + level_damage * skill.level.val - if config.PROGRESSION_SYSTEM_ENABLED: - skill_defense = config.PROGRESSION_BASE_DEFENSE + \ + # NOTE: skill offense and defense are only for agents, NOT npcs + skill_offense = base_damage + if attacker.is_player: + skill_offense += level_damage * skill.level.val + if attacker.is_npc and config.EQUIPMENT_SYSTEM_ENABLED: + # NOTE: In this case, npc off/def is set only with equipment. Revisit this. + skill_offense = 0 + + if config.PROGRESSION_SYSTEM_ENABLED and target.is_player: + skill_defense = config.PROGRESSION_BASE_DEFENSE + \ config.PROGRESSION_LEVEL_DEFENSE*level(target.skills) else: - skill_defense = 0 + skill_defense = 0 if config.EQUIPMENT_SYSTEM_ENABLED: - equipment_offense = player.equipment.total(offense_fn) + equipment_offense = attacker.equipment.total(offense_fn) equipment_defense = target.equipment.total(defense_fn) # after tallying ammo damage, consume ammo (i.e., fire) when the skill type matches - ammunition = player.equipment.ammunition.item + ammunition = attacker.equipment.ammunition.item if ammunition is not None and getattr(ammunition, skill_name.lower() + '_attack').val > 0: - ammunition.fire(player) + ammunition.fire(attacker) else: equipment_offense = 0 @@ -91,28 +98,16 @@ def attack(realm, player, target, skill_fn): # Total damage calculation offense = skill_offense + equipment_offense defense = skill_defense + equipment_defense - damage = config.COMBAT_DAMAGE_FORMULA(offense, defense, multiplier) - #damage = multiplier * (offense - defense) + min_damage_prop = config.COMBAT_MINIMUM_DAMAGE_PROPORTION + damage = config.COMBAT_DAMAGE_FORMULA(offense, defense, multiplier, min_damage_prop) damage = max(int(damage), 0) - if player.is_player: - equipment_level_offense = 0 - equipment_level_defense = 0 - if config.EQUIPMENT_SYSTEM_ENABLED: - equipment_level_offense = player.equipment.total(lambda e: e.level) - equipment_level_defense = target.equipment.total(lambda e: e.level) - - realm.event_log.record(EventCode.SCORE_HIT, player, + if attacker.is_player: + realm.event_log.record(EventCode.SCORE_HIT, attacker, target=target, combat_style=skill_type, damage=damage) - realm.log_milestone(f'Damage_{skill_name}', damage, - f'COMBAT: Inflicted {damage} {skill_name} damage ' + - f'(attack equip lvl {equipment_level_offense} vs ' + - f'defense equip lvl {equipment_level_defense})', - tags={"player_id": player.ent_id}) - - player.apply_damage(damage, skill.__class__.__name__.lower()) - target.receive_damage(player, damage) + attacker.apply_damage(damage, skill.__class__.__name__.lower()) + target.receive_damage(attacker, damage) return damage diff --git a/nmmo/systems/exchange.py b/nmmo/systems/exchange.py index 96d04c170..36b2df2a9 100644 --- a/nmmo/systems/exchange.py +++ b/nmmo/systems/exchange.py @@ -5,7 +5,7 @@ from typing import Dict from nmmo.systems.item import Item, Stack -from nmmo.lib.log import EventCode +from nmmo.lib.event_code import EventCode """ The Exchange class is a simulation of an in-game item exchange. @@ -38,6 +38,10 @@ def __init__(self, realm): self._realm = realm self._config = realm.config + def reset(self): + self._listings_queue.clear() + self._item_listings.clear() + def _list_item(self, item: Item, seller, price: int, tick: int): item.listed_price.update(price) self._item_listings[item.id.val] = ItemListing(item, seller, price, tick) @@ -51,11 +55,10 @@ def _unlist_item(self, item_id: int): item = self._item_listings.pop(item_id).item item.listed_price.update(0) - def step(self, current_tick: int): + def step(self): """ Remove expired listings from the exchange's listings queue - and item listings dictionary. It takes in one parameter, - current_tick, which is the current time in the game. + and item listings dictionary. The method starts by checking the oldest listing in the listings queue using a while loop. If the current tick minus the @@ -71,6 +74,10 @@ def step(self, current_tick: int): the item's listed price. The process repeats until all expired listings are removed from the queue and dictionary. """ + if self._config.EXCHANGE_SYSTEM_ENABLED is False: + return + + current_tick = self._realm.tick # Remove expired listings while self._listings_queue: @@ -97,15 +104,9 @@ def sell(self, seller, item: Item, price: int, tick: int): assert item.listed_price.val == 0, 'Item is already listed' assert item.equipped.val == 0, 'Item has been equiped so cannot be listed' assert price > 0, 'Price must be larger than 0' - self._list_item(item, seller, price, tick) - self._realm.event_log.record(EventCode.LIST_ITEM, seller, item=item, price=price) - self._realm.log_milestone(f'Sell_{item.__class__.__name__}', item.level.val, - f'EXCHANGE: Offered level {item.level.val} {item.__class__.__name__} for {price} gold', - tags={"player_id": seller.ent_id}) - def buy(self, buyer, item: Item): assert item.quantity.val > 0, f'{item} purchase has quantity {item.quantity.val}' assert item.equipped.val == 0, 'Listed item must not be equipped' @@ -133,9 +134,6 @@ def buy(self, buyer, item: Item): buyer.gold.decrement(price) listing.seller.gold.increment(price) - # TODO(kywch): tidy up the logs - milestone, event, etc ... - #self._realm.log_milestone(f'Buy_{item.__name__}', item.level.val) - #self._realm.log_milestone('Transaction_Amount', item.listed_price.val) self._realm.event_log.record(EventCode.BUY_ITEM, buyer, item=item, price=price) self._realm.event_log.record(EventCode.EARN_GOLD, listing.seller, amount=price) diff --git a/nmmo/systems/inventory.py b/nmmo/systems/inventory.py index ae2024500..c29c2bab6 100644 --- a/nmmo/systems/inventory.py +++ b/nmmo/systems/inventory.py @@ -155,10 +155,6 @@ def receive(self, item: Item.Item) -> bool: item.destroy() return False - self.realm.log_milestone(f'Receive_{item.__class__.__name__}', item.level.val, - f'INVENTORY: Received level {item.level.val} {item.__class__.__name__}', - tags={"player_id": self.entity.ent_id}) - item.owner_id.update(self.entity.id.val) self.items.add(item) return True diff --git a/nmmo/systems/item.py b/nmmo/systems/item.py index 4df13516c..73b1f911c 100644 --- a/nmmo/systems/item.py +++ b/nmmo/systems/item.py @@ -7,7 +7,7 @@ from nmmo.datastore.serialized import SerializedState from nmmo.lib.colors import Tier -from nmmo.lib.log import EventCode +from nmmo.lib.event_code import EventCode # pylint: disable=no-member ItemState = SerializedState.subclass("Item", [ @@ -35,7 +35,7 @@ # TODO: These limits should be defined in the config. ItemState.Limits = lambda config: { "id": (0, math.inf), - "type_id": (0, (config.ITEM_N + 1) if config.ITEM_SYSTEM_ENABLED else 0), + "type_id": (0, 99), "owner_id": (-math.inf, math.inf), "level": (0, 99), "capacity": (0, 99), @@ -111,6 +111,8 @@ def __init__(self, realm, level, def destroy(self): # NOTE: we may want to track the item lifecycle and # and see how many high-level items are wasted + if self.config.EXCHANGE_SYSTEM_ENABLED: + self.realm.exchange.unlist_item(self) if self.owner_id.val in self.realm.players: self.realm.players[self.owner_id.val].inventory.remove(self) self.realm.items.pop(self.id.val, None) @@ -183,19 +185,6 @@ def equip(self, entity, equip_slot): self.equipped.update(1) equip_slot.equip(self) - if self.config.LOG_MILESTONES and entity.is_player and self.config.LOG_VERBOSE: - for (label, level) in [ - (f"{self.__class__.__name__}_Level", self.level.val), - ("Item_Level", entity.equipment.item_level), - ("Melee_Attack", entity.equipment.melee_attack), - ("Range_Attack", entity.equipment.range_attack), - ("Mage_Attack", entity.equipment.mage_attack), - ("Melee_Defense", entity.equipment.melee_defense), - ("Range_Defense", entity.equipment.range_defense), - ("Mage_Defense", entity.equipment.mage_defense)]: - - self.realm.log_milestone(label, level, f'EQUIPMENT: {label} {level}') - def _slot(self, entity): raise NotImplementedError @@ -329,6 +318,7 @@ def fire(self, entity) -> int: # delete this empty item instance from the datastore self.destroy() + self.realm.event_log.record(EventCode.FIRE_AMMO, entity, item=self) return self.damage class Whetstone(Ammunition): @@ -382,14 +372,7 @@ def use(self, entity) -> bool: assert self.listed_price == 0, "Listed item cannot be used" assert self._level(entity) >= self.level.val, "Entity's level is not sufficient to use the item" - self.realm.log_milestone( - f'Consumed_{self.__class__.__name__}', self.level.val, - f"PROF: Consumed {self.level.val} {self.__class__.__name__} " - f"by Entity level {entity.attack_level}", - tags={"player_id": entity.ent_id}) - self.realm.event_log.record(EventCode.CONSUME_ITEM, entity, item=self) - self._apply_effects(entity) entity.inventory.remove(self) self.destroy() @@ -422,3 +405,11 @@ def _apply_effects(self, entity): entity.poultice_consumed += 1 entity.poultice_level_consumed = max( entity.poultice_level_consumed, self.level.val) + +# Item groupings +ARMOR = [Hat, Top, Bottom] +WEAPON = [Spear, Bow, Wand] +TOOL = [Rod, Gloves, Pickaxe, Axe, Chisel] +AMMUNITION = [Whetstone, Arrow, Runes] +CONSUMABLE = [Ration, Potion] +ALL_ITEM = ARMOR + WEAPON + TOOL + AMMUNITION + CONSUMABLE diff --git a/nmmo/systems/skill.py b/nmmo/systems/skill.py index f0d60dde4..729aacbff 100644 --- a/nmmo/systems/skill.py +++ b/nmmo/systems/skill.py @@ -7,7 +7,7 @@ from nmmo.lib import material from nmmo.systems import combat -from nmmo.lib.log import EventCode +from nmmo.lib.event_code import EventCode ### Infrastructure ### class ExperienceCalculator: @@ -75,10 +75,6 @@ def add_xp(self, xp): self.realm.event_log.record(EventCode.LEVEL_UP, self.entity, skill=self, level=new_level) - self.realm.log_milestone(f'Level_{self.__class__.__name__}', new_level, - f"PROGRESSION: Reached level {new_level} {self.__class__.__name__}", - tags={"player_id": self.entity.ent_id}) - def set_experience_by_level(self, level): self.exp.update(self.experience_calculator.level_at_exp(level)) self.level.update(int(level)) @@ -128,15 +124,11 @@ def process_drops(self, matl, drop_table): #TODO: double-check drop table quantity for drop in drop_table.roll(self.realm, level): assert drop.level.val == level, 'Drop level does not match roll specification' - - self.realm.log_milestone(f'Gather_{drop.__class__.__name__}', - level, f"PROFESSION: Gathered level {level} {drop.__class__.__name__} " - f"(level {self.level.val} {self.__class__.__name__})", - tags={"player_id": entity.ent_id}) - if entity.inventory.space: entity.inventory.receive(drop) self.realm.event_log.record(EventCode.HARVEST_ITEM, entity, item=drop) + else: + drop.destroy() # this was the source of the item leak def harvest(self, matl, deplete=True): entity = self.entity @@ -303,7 +295,7 @@ def update(self): if not config.RESOURCE_SYSTEM_ENABLED: return - if config.IMMORTAL: + if config.IMMORTAL or self.entity.immortal: return depletion = config.RESOURCE_DEPLETION_RATE @@ -325,7 +317,7 @@ def update(self): if not config.RESOURCE_SYSTEM_ENABLED: return - if config.IMMORTAL: + if config.IMMORTAL or self.entity.immortal: return depletion = config.RESOURCE_DEPLETION_RATE @@ -410,3 +402,7 @@ def exp(self): def update(self): self.harvest(material.Crystal) + +# Skill groupings +COMBAT_SKILL = [Melee, Range, Mage] +HARVEST_SKILL = [Fishing, Herbalism, Prospecting, Carving, Alchemy] diff --git a/nmmo/task/base_predicates.py b/nmmo/task/base_predicates.py index cce7d97fd..a85dfe595 100644 --- a/nmmo/task/base_predicates.py +++ b/nmmo/task/base_predicates.py @@ -20,10 +20,12 @@ def Success(gs: GameState, subject: Group): ''' return True -def TickGE(gs: GameState, subject: Group, num_tick: int): +def TickGE(gs: GameState, subject: Group, num_tick: int = None): """True if the current tick is greater than or equal to the specified num_tick. Is progress counter. """ + if num_tick is None: + num_tick = gs.config.HORIZON return norm(gs.current_tick / num_tick) def CanSeeTile(gs: GameState, subject: Group, tile_type: type[Material]): @@ -41,21 +43,24 @@ def AllDead(gs: GameState, subject: Group): """ return norm(1.0 - count(subject.health) / len(subject)) +def CheckAgentStatus(gs: GameState, subject: Group, target: Iterable[int], status: str): + """Check if target agents are alive or dead using the game status""" + if isinstance(target, int): + target = [target] + num_agents = len(target) + num_alive = sum(1 for agent in target if agent in gs.alive_agents) + if status == 'alive': + return num_alive / num_agents + if status == 'dead': + return (num_agents - num_alive) / num_agents + # invalid status + return 0.0 + def OccupyTile(gs: GameState, subject: Group, row: int, col: int): """True if any subject agent is on the desginated tile. """ return np.any((subject.row == row) & (subject.col == col)) -def AllMembersWithinRange(gs: GameState, subject: Group, dist: int): - """True if the max l-inf distance of teammates is - less than or equal to dist - """ - current_dist = max(subject.row.max()-subject.row.min(), - subject.col.max()-subject.col.min()) - if current_dist <= 0: - return 1.0 - return norm(dist / current_dist) - def CanSeeAgent(gs: GameState, subject: Group, target: int): """True if obj_agent is present in the subjects' entities obs. """ @@ -129,7 +134,8 @@ def HoardGold(gs: GameState, subject: Group, amount: int): def EarnGold(gs: GameState, subject: Group, amount: int): """ True if the total amount of gold earned is greater than or equal to amount. """ - return norm(subject.event.EARN_GOLD.gold.sum() / amount) + gold = subject.event.EARN_GOLD.gold.sum() + subject.event.LOOT_GOLD.gold.sum() + return norm(gold / amount) def SpendGold(gs: GameState, subject: Group, amount: int): """ True if the total amount of gold spent is greater than or equal to amount. @@ -139,7 +145,7 @@ def SpendGold(gs: GameState, subject: Group, amount: int): def MakeProfit(gs: GameState, subject: Group, amount: int): """ True if the total amount of gold earned-spent is greater than or equal to amount. """ - profits = subject.event.EARN_GOLD.gold.sum() + profits = subject.event.EARN_GOLD.gold.sum() + subject.event.LOOT_GOLD.gold.sum() costs = subject.event.BUY_ITEM.gold.sum() return norm((profits-costs) / amount) @@ -207,6 +213,13 @@ def HarvestItem(gs: GameState, subject: Group, item: type[Item], level: int, qua lvl_flt = subject.event.HARVEST_ITEM.level >= level return norm(subject.event.HARVEST_ITEM.number[type_flt & lvl_flt].sum() / quantity) +def FireAmmo(gs: GameState, subject: Group, item: type[Item], level: int, quantity: int): + """True if total quantity consumed of item type above level is >= quantity + """ + type_flt = subject.event.FIRE_AMMO.type == item.ITEM_TYPE_ID + lvl_flt = subject.event.FIRE_AMMO.level >= level + return norm(subject.event.FIRE_AMMO.number[type_flt & lvl_flt].sum() / quantity) + def ListItem(gs: GameState, subject: Group, item: type[Item], level: int, quantity: int): """True if total quantity listed of item type above level is >= quantity """ @@ -220,3 +233,102 @@ def BuyItem(gs: GameState, subject: Group, item: type[Item], level: int, quantit type_flt = subject.event.BUY_ITEM.type == item.ITEM_TYPE_ID lvl_flt = subject.event.BUY_ITEM.level >= level return norm(subject.event.BUY_ITEM.number[type_flt & lvl_flt].sum() / quantity) + + +############################################################################################ +# Below are used for the mini games, so these need to be fast + +def ProgressTowardCenter(gs, subject): + if not any(a in gs.alive_agents for a in subject.agents): # subject should be alive + return 0.0 + center = gs.config.MAP_SIZE // 2 + max_dist = center - gs.config.MAP_BORDER + + r = subject.row + c = subject.col + # distance to the center tile, so dist = 0 when subject is on the center tile + if len(r) == 1: + dists = utils.linf_single((r[0], c[0]), (center, center)) + else: + coords = np.hstack([r, c]) + # NOTE: subject can be multiple agents (e.g., team), so taking the minimum + dists = np.min(utils.linf(coords, (center, center))) + return 1.0 - dists/max_dist + +def AllMembersWithinRange(gs: GameState, subject: Group, dist: int): + """True if the max l-inf distance of teammates is + less than or equal to dist + """ + if dist < 0 or \ + not any(a in gs.alive_agents for a in subject.agents): # subject should be alive + return 0.0 + + max_dist = gs.config.MAP_CENTER + r = subject.row + c = subject.col + current_dist = max(r.max()-r.min(), c.max()-c.min()) + if current_dist <= dist: + return 1.0 + + # progress bonus, which takes account of the overall distribution + max_dist_score = (max_dist - current_dist) / (max_dist - dist) + r_sd_score = dist / max(3*np.std(r), dist) # becomes 1 if 3*std(r) < dist + c_sd_score = dist / max(3*np.std(c), dist) # becomes 1 if 3*std(c) < dist + return (max_dist_score + r_sd_score + c_sd_score) / 3.0 + +def SeizeTile(gs: GameState, subject: Group, row: int, col: int, num_ticks: int, + progress_bonus = 0.4, seize_bonus = 0.3): + if not any(subject.health > 0): # subject should be alive + return 0.0 + target_tile = (row, col) + + # When the subject seizes the target tile + if target_tile in gs.seize_status and gs.seize_status[target_tile][0] in subject.agents: + seize_duration = gs.current_tick - gs.seize_status[target_tile][1] + hold_bonus = (1.0 - progress_bonus - seize_bonus) * seize_duration/num_ticks + return norm(progress_bonus + seize_bonus + hold_bonus) + + # motivate agents to seize the target tile + #max_dist = utils.linf_single(target_tile, gs.spawn_pos[subject.agents[0]]) + max_dist = gs.config.MAP_CENTER // 2 # does not have to be precise + r = subject.row + c = subject.col + # distance to the center tile, so dist = 0 when subject is on the center tile + if len(r) == 1: + dists = utils.linf_single((r[0], c[0]), target_tile) + else: + coords = np.hstack([r.reshape(-1,1), c.reshape(-1,1)]) + # NOTE: subject can be multiple agents (e.g., team), so taking the minimum + dists = np.min(utils.linf(coords, target_tile)) + + return norm(progress_bonus * (1.0 - dists/max_dist)) + +def SeizeCenter(gs: GameState, subject: Group, num_ticks: int, + progress_bonus = 0.3): + row = col = gs.config.MAP_SIZE // 2 # center tile + return SeizeTile(gs, subject, row, col, num_ticks, progress_bonus) + +def SeizeQuadCenter(gs: GameState, subject: Group, num_ticks: int, quadrant: str, + progress_bonus = 0.3): + center = gs.config.MAP_SIZE // 2 + half_dist = gs.config.MAP_CENTER // 4 + if quadrant == "first": + row = col = center + half_dist + elif quadrant == "second": + row, col = center - half_dist, center + half_dist + elif quadrant == "third": + row = col = center - half_dist + elif quadrant == "fourth": + row, col = center + half_dist, center - half_dist + else: + raise ValueError(f"Invalid quadrant {quadrant}") + return SeizeTile(gs, subject, row, col, num_ticks, progress_bonus) + +def ProtectLeader(gs, subject, target_protect: int, target_destroy: Iterable[int]): + """target_destory is not used for reward, but used as info for the reward wrapper""" + # Failed to protect the leader + if target_protect not in gs.alive_agents: + return 0 + + # Reward each tick the target is alive + return gs.current_tick / gs.config.HORIZON diff --git a/nmmo/task/constraint.py b/nmmo/task/constraint.py deleted file mode 100644 index d19686bc5..000000000 --- a/nmmo/task/constraint.py +++ /dev/null @@ -1,183 +0,0 @@ -from __future__ import annotations - -import random -from numbers import Number -from typing import Union, Callable, Dict -from abc import ABC, abstractmethod - -from nmmo.systems import skill, item -from nmmo.lib import material -from nmmo.lib.log import EventCode -from nmmo.core.config import Config - -class InvalidConstraint(Exception): - pass - -class Constraint(ABC): - """ To check the validity of predicates - and assist generating new predicates. Similar to gym spaces. - """ - def __init__(self, systems=None): - if systems is None: - systems = [] - self._systems = systems - - # pylint: disable=unused-argument - def check(self, config: Config, value): - """ Checks value is in bounds given config - """ - for system in self._systems: - try: - if not getattr(config,system): - return False - except AttributeError: - return False - return True - - @abstractmethod - def sample(self, config: Config): - """ Generator to sample valid values given config - """ - raise NotImplementedError - - def __str__(self): - return self.__class__.__name__ - -# This is a dummy function for GroupConstraint -# NOTE: config does not have team info -def sample_one_big_team(config): - from nmmo.task.group import Group - team = list(range(1, config.PLAYER_N+1)) - return [Group(team, 'All')] - -class GroupConstraint(Constraint): - """ Ensures that all agents of a group exist in a config - """ - def __init__(self, - sample_fn = sample_one_big_team, - systems = None): - """ - Params - sample_fn: given a Config, return groups to select from - systems: systems required to operate - """ - super().__init__(systems) - self._sample_fn = sample_fn - - def check(self, config, value): - if not super().check(config,value): - return False - for agent in value.agents: - if agent > config.PLAYER_N: - return False - return True - - def sample(self, config): - return random.choice(self._sample_fn(config)) - - def sample_from_teams(self, teams: Dict[int, Dict]): - from nmmo.task.group import Group - team_id = random.choice(list(teams.keys())) - return Group(teams[team_id], str(team_id)) - -class AgentListConstraint(Constraint): - """ Ensures that all agents of the list exist in a config - """ - def check(self, config, value): - for agent in value: - if agent > config.PLAYER_N: - return False - return True - - def sample(self, config): - return None - -class ScalarConstraint(Constraint): - def __init__(self, - low: Union[Callable, Number] = 0, - high: Union[Callable, Number] = 1024, - dtype = int, - systems = None): - super().__init__(systems) - self._low = low - self._high = high - if isinstance(low, Number): - self._low = lambda _ : low - if isinstance(high, Number): - self._high = lambda _ : high - self._dtype = dtype - - def check(self, config, value): - if not super().check(config,value): - return False - if self._low(config) <= value < self._high(config): - return True - return False - - def sample(self, config): - l, h = self._low(config), self._high(config) - return self._dtype(random.random()*(h-l)+l) - -class DiscreteConstraint(Constraint): - def __init__(self, space, systems=None): - super().__init__(systems) - self._space = set(space) - - def check(self, config: Config, value): - if not super().check(config,value): - return False - return value in self._space - - def sample(self, config: Config): - # NOTE: this does NOT need to be deterministic - return random.choice(self._space) - -# Group Constraints -TEAM_GROUPS = GroupConstraint() -INDIVIDUAL_GROUPS=GroupConstraint() -AGENT_LIST_CONSTRAINT = AgentListConstraint() - -# Tile Constraints -MATERIAL_CONSTRAINT = DiscreteConstraint(space=list(material.All.materials), - systems=['TERRAIN_SYSTEM_ENABLED', - 'RESOURCE_SYSTEM_ENABLED']) -HABITABLE_CONSTRAINT = DiscreteConstraint(space=list(material.Habitable.materials), - systems=['TERRAIN_SYSTEM_ENABLED']) - -# Event Constraints -event_names = [k for k, v in EventCode.__dict__.items() if isinstance(v,int)] -EVENTCODE_CONSTRAINT = DiscreteConstraint(space=event_names) - -# Skill Constraints -combat_skills = [skill.Melee, skill.Mage, skill.Range] -harvest_skills = [skill.Fishing, skill.Herbalism, skill.Prospecting, skill.Alchemy, skill.Carving] -SKILL_CONSTRAINT = DiscreteConstraint(space=combat_skills+harvest_skills, - systems=['PROFESSION_SYSTEM_ENABLED']) -COMBAT_SKILL_CONSTRAINT = DiscreteConstraint(space=combat_skills, - systems=['PROFESSION_SYSTEM_ENABLED']) - -# Item Constraints -armour = [item.Hat, item.Top, item.Bottom] -weapons = [item.Spear, item.Bow, item.Wand] -tools = [item.Axe, item.Gloves, item.Rod, item.Pickaxe, item.Chisel] -ammunition = [item.Runes, item.Arrow, item.Whetstone] -consumables = [item.Potion, item.Ration] -ITEM_CONSTRAINT = DiscreteConstraint(space=armour+weapons+tools+ammunition+consumables, - systems=['ITEM_SYSTEM_ENABLED']) -EQUIPABLE_CONSTRAINT = DiscreteConstraint(space=armour+weapons+tools+ammunition, - systems=['ITEM_SYSTEM_ENABLED']) -CONSUMABLE_CONSTRAINT = DiscreteConstraint(space=consumables, - systems=['ITEM_SYSTEM_ENABLED']) -HARVEST_CONSTRAINT = DiscreteConstraint(space=weapons+ammunition+consumables, - systems=['ITEM_SYSTEM_ENABLED']) - -# Config Constraints -COORDINATE_CONSTRAINT = ScalarConstraint(high = lambda c: c.MAP_CENTER) -PROGRESSION_CONSTRAINT = ScalarConstraint(high = lambda c: c.PROGRESSION_LEVEL_MAX+1) -INVENTORY_CONSTRAINT = ScalarConstraint(high=lambda c: c.ITEM_INVENTORY_CAPACITY+1) -AGENT_NUMBER_CONSTRAINT = ScalarConstraint(low = 1, high = lambda c: c.PLAYER_N+1) - -# Arbitrary Constraints -EVENT_NUMBER_CONSTRAINT = ScalarConstraint(low = 1, high = 110) -GOLD_CONSTRAINT = ScalarConstraint(low = 1, high = 1000) -AGENT_TYPE_CONSTRAINT = DiscreteConstraint(space=['npc','player']) diff --git a/nmmo/task/game_state.py b/nmmo/task/game_state.py index e5e743d05..e6b131f8d 100644 --- a/nmmo/task/game_state.py +++ b/nmmo/task/game_state.py @@ -15,7 +15,7 @@ from nmmo.task.group import Group from nmmo.entity.entity import EntityState from nmmo.lib.event_log import EventState, ATTACK_COL_MAP, ITEM_COL_MAP, LEVEL_COL_MAP -from nmmo.lib.log import EventCode +from nmmo.lib.event_code import EventCode from nmmo.systems.item import ItemState from nmmo.core.tile import TileState @@ -44,16 +44,18 @@ class GameState: event_data: np.ndarray # a copied, whole Event log table event_index: Dict[int, Iterable] + # status of the seize target tiles (row, col) -> (ent_id, tick) + seize_status: Dict[Tuple[int, int], Tuple[int, int]] + cache_result: MutableMapping # cache for general memoization _group_view: List[GroupView] = field(default_factory=list) # cache for GroupView # add helper functions below @functools.lru_cache def entity_or_none(self, ent_id): - flt_ent = self.entity_data[:, EntityAttr['id']] == ent_id - if np.any(flt_ent): - return EntityState.parse_array(self.entity_data[flt_ent][0]) - return None + if ent_id not in self.entity_index: + return None + return EntityState.parse_array(self.entity_data[self.entity_index[ent_id]][0]) def where_in_id(self, data_type, subject: Iterable[int]): k = (data_type, subject) @@ -162,7 +164,7 @@ def __init__(self, gs: GameState, subject: Group, arr: np.ndarray): super().__init__(TileAttr, 'tile', gs, subject, arr) def get_attribute(self, attr) -> np.ndarray: - return [o[:, self._mapping[attr]]for o in self._arr] + return [o[:, self._mapping[attr]] for o in self._arr] class EventCodeView(ArrayView): def __init__(self, @@ -275,6 +277,7 @@ def generate(self, realm: Realm, env_obs: Dict[int, Observation]) -> GameState: item_index = precompute_index(item_data, ItemAttr["owner_id"]), event_data = event_data, event_index = precompute_index(event_data, EventAttr['ent_id']), + seize_status = realm.seize_status, cache_result = {} ) diff --git a/nmmo/task/predicate_api.py b/nmmo/task/predicate_api.py index a1511a029..73b35259f 100644 --- a/nmmo/task/predicate_api.py +++ b/nmmo/task/predicate_api.py @@ -1,5 +1,5 @@ from __future__ import annotations -from typing import Callable, List, Optional, Tuple, Union, Iterable, Type, TYPE_CHECKING +from typing import Callable, List, Optional, Union, Iterable, Type, TYPE_CHECKING from types import FunctionType from abc import ABC, abstractmethod import inspect @@ -8,7 +8,6 @@ from nmmo.core.config import Config from nmmo.task.group import Group, union from nmmo.task.game_state import GameState -from nmmo.task.constraint import Constraint, GroupConstraint if TYPE_CHECKING: from nmmo.task.task_api import Task @@ -22,7 +21,6 @@ class Predicate(ABC): def __init__(self, subject: Group, *args, - constraints: Optional[List[Tuple[str,Optional[Constraint]]]] = None, **kwargs): self.name = self._make_name(self.__class__.__name__, args, kwargs) @@ -33,7 +31,6 @@ def __init__(self, self._args = args self._kwargs = kwargs - self._constraints = constraints # NOTE: not used self._config = None self._subject = subject @@ -54,7 +51,7 @@ def __call__(self, gs: GameState) -> float: if self.name in cache: progress = cache[self.name] else: - progress = max(min(self._evaluate(gs)*1.0,1.0),0.0) + progress = max(min(float(self._evaluate(gs)),1.0),0.0) cache[self.name] = progress return progress @@ -166,7 +163,7 @@ def __init__(self, *args, **kwargs) -> None: self._kwargs = kwargs self.name = self._make_name(fn.__name__, args, kwargs) def _evaluate(self, gs: GameState) -> float: - return fn(gs, *self._args, **self._kwargs) + return float(fn(gs, *self._args, **self._kwargs)) def get_source_code(self): return inspect.getsource(fn).strip() def get_signature(self) -> List: @@ -183,11 +180,8 @@ def __init__(self, n, *predicates: Union[Predicate, Real], subject: Group=None): predicates = list(predicates) self._subject_argument = subject if subject is None: - try: - subject = union(*[p.subject - for p in filter(lambda p: isinstance(p, Predicate), predicates)]) - except AttributeError: - subject = GroupConstraint() + subject = union(*[p.subject + for p in filter(lambda p: isinstance(p, Predicate), predicates)]) super().__init__(subject, *predicates) for i, p in enumerate(predicates): diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index be293c6a6..20e209e08 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -19,7 +19,9 @@ def __init__(self, assignee: Union[Iterable[int], int], reward_multiplier = 1.0, embedding = None, - spec_name: str = None): + spec_name: str = None, + reward_to = None, + tags: List[str] = None): if isinstance(assignee, int): self._assignee = (assignee,) else: @@ -28,7 +30,8 @@ def __init__(self, self._eval_fn = eval_fn self._reward_multiplier = reward_multiplier self._embedding = None if embedding is None else np.array(embedding, dtype=np.float16) - self.spec_name = spec_name # None if not created using TaskSpec + # These are None if not created using TaskSpec + self.spec_name, self.reward_to, self.tags = spec_name, reward_to, tags self.name = self._make_name(self.__class__.__name__, eval_fn=eval_fn, assignee=self._assignee) self.reset() @@ -56,6 +59,10 @@ def assignee(self) -> Tuple[int]: def completed(self) -> bool: return self._completed_tick is not None + @property + def progress(self) -> float: + return self._progress + @property def reward_multiplier(self) -> float: return self._reward_multiplier @@ -80,12 +87,11 @@ def _map_progress_to_reward(self, gs: GameState) -> float: if self.completed: return 0.0 - new_progress = max(min(self._eval_fn(gs)*1.0,1.0),0.0) + new_progress = max(min(float(self._eval_fn(gs)),1.0),0.0) diff = new_progress - self._progress self._progress = new_progress if self._progress >= 1: self._completed_tick = gs.current_tick - diff = 1.0 # give out the max reward when task is completed return diff @@ -245,5 +251,4 @@ def nmmo_default_task(agent_list: Iterable[int], test_mode=None) -> List[Task]: # pylint: disable=unused-argument return make_same_task(lambda gs, subject: True, agent_list, task_cls=OngoingTask) - # the default is to use the predicate class - return make_same_task(bp.StayAlive, agent_list, task_cls=OngoingTask) + return make_same_task(bp.TickGE, agent_list) diff --git a/nmmo/task/task_spec.py b/nmmo/task/task_spec.py index 0c33267fd..e81e9ee0c 100644 --- a/nmmo/task/task_spec.py +++ b/nmmo/task/task_spec.py @@ -3,6 +3,7 @@ from typing import Iterable, Dict, List, Union, Type from types import FunctionType from copy import deepcopy +from tqdm import tqdm import numpy as np @@ -31,9 +32,8 @@ """ REWARD_TO = ["agent", "team"] -VALID_TARGET = ["left_team", "left_team_leader", - "right_team", "right_team_leader", - "my_team_leader", "all_foes"] +VALID_TARGET = ["left_team", "left_team_leader", "right_team", "right_team_leader", + "my_team_leader", "all_foes", "all_foe_leaders"] @dataclass class TaskSpec: @@ -45,6 +45,7 @@ class TaskSpec: sampling_weight: float = 1.0 embedding: np.ndarray = None predicate: Predicate = None + tags: List[str] = field(default_factory=list) def __post_init__(self): if self.predicate is None: @@ -100,15 +101,18 @@ def make_task_from_spec(assign_to: Union[Iterable[int], Dict], task_kwargs = deepcopy(task_spec[idx].task_kwargs) task_kwargs["embedding"] = task_spec[idx].embedding # to pass to task_cls task_kwargs["spec_name"] = task_spec[idx].name + task_kwargs["reward_to"] = task_spec[idx].reward_to + task_kwargs["tags"] = task_spec[idx].tags predicate = task_spec[idx].predicate # reserve "target" for relative agent mapping - if "target" in pred_fn_kwargs: - target = pred_fn_kwargs.pop("target") - assert target in VALID_TARGET, "Invalid target" + target_keys = [key for key in pred_fn_kwargs.keys() if key.startswith("target")] + for key in target_keys: + target_keyword = pred_fn_kwargs.pop(key) + assert target_keyword in VALID_TARGET, "Invalid target" # translate target to specific agent ids using team_helper - target = team_helper.get_target_agent(team_id, target) - pred_fn_kwargs["target"] = target + target_ent = team_helper.get_target_agent(team_id, target_keyword) + pred_fn_kwargs[key] = target_ent # handle some special cases and instantiate the predicate first if pred_fn is not None and isinstance(pred_fn, FunctionType): @@ -119,8 +123,8 @@ def make_task_from_spec(assign_to: Union[Iterable[int], Dict], if (pred_fn in [bp.AllDead]) or \ (pred_fn in [bp.StayAlive] and "target" in pred_fn_kwargs): # use the target as the predicate subject - pred_fn_kwargs.pop("target") # remove target - predicate = pred_cls(Group(target), **pred_fn_kwargs) + target_ent = pred_fn_kwargs.pop("target") # remove target + predicate = pred_cls(Group(target_ent), **pred_fn_kwargs) # create the task if reward_to == "team": @@ -146,12 +150,14 @@ def make_task_from_spec(assign_to: Union[Iterable[int], Dict], return tasks # pylint: disable=bare-except,cell-var-from-loop -def check_task_spec(spec_list: List[TaskSpec]) -> List[Dict]: +def check_task_spec(spec_list: List[TaskSpec], debug=False) -> List[Dict]: teams = {0: [1, 2, 3], 3: [4, 5], 7: [6, 7], 11: [8, 9], 14: [10, 11]} config = nmmo.config.Default() + config.set("PLAYER_N", 11) + config.set("TEAMS", teams) env = nmmo.Env(config) results = [] - for single_spec in spec_list: + for single_spec in tqdm(spec_list): result = {"spec_name": single_spec.name} try: env.reset(make_task_fn=lambda: make_task_from_spec(teams, [single_spec])) @@ -160,6 +166,7 @@ def check_task_spec(spec_list: List[TaskSpec]) -> List[Dict]: result["runnable"] = True except: result["runnable"] = False - + if debug: + raise results.append(result) return results diff --git a/nmmo/version.py b/nmmo/version.py index 668c3446e..b15121b0f 100644 --- a/nmmo/version.py +++ b/nmmo/version.py @@ -1 +1 @@ -__version__ = '2.0.2' +__version__ = '2.1.0' \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..fc4abfc6a --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,2 @@ +[build-system] +requires = ["setuptools", "wheel", "cython", "numpy==1.23.3"] \ No newline at end of file diff --git a/scripted/attack.py b/scripted/attack.py index 9b62089bd..f4b06cd9e 100644 --- a/scripted/attack.py +++ b/scripted/attack.py @@ -11,7 +11,7 @@ def closestTarget(config, ob: Observation): shortestDist = np.inf closestAgent = None - agent = ob.agent() + agent = ob.agent start = (agent.row, agent.col) for target_ent in ob.entities.values: @@ -30,7 +30,7 @@ def closestTarget(config, ob: Observation): return closestAgent, shortestDist def attacker(config, ob: Observation): - agent = ob.agent() + agent = ob.agent attacker_id = agent.attacker_id if attacker_id == 0: diff --git a/scripted/baselines.py b/scripted/baselines.py index 860c105e0..9cd6718be 100644 --- a/scripted/baselines.py +++ b/scripted/baselines.py @@ -317,7 +317,7 @@ def __call__(self, observation: Observation): self.actions = {} self.ob = observation - self.me = observation.agent() + self.me = observation.agent # combat level self.me.level = max(self.me.melee_level, self.me.range_level, self.me.mage_level) @@ -344,10 +344,10 @@ def __call__(self, observation: Observation): # When to run from death fog in BR configs self.fog_criterion = None - if self.config.PLAYER_DEATH_FOG is not None: + if self.config.DEATH_FOG_ONSET is not None: time_alive = self.me.time_alive - start_running = time_alive > self.config.PLAYER_DEATH_FOG - 64 - run_now = time_alive % max(1, int(1 / self.config.PLAYER_DEATH_FOG_SPEED)) + start_running = time_alive > self.config.DEATH_FOG_ONSET - 64 + run_now = time_alive % max(1, int(1 / self.config.DEATH_FOG_SPEED)) self.fog_criterion = start_running and run_now diff --git a/scripted/move.py b/scripted/move.py index 0c80f9d6b..7f097749b 100644 --- a/scripted/move.py +++ b/scripted/move.py @@ -4,8 +4,7 @@ from nmmo.core import action from nmmo.core.observation import Observation -from nmmo.lib import material -from nmmo.systems.ai import utils +from nmmo.lib import material, astar def inSight(dr, dc, vision): @@ -60,7 +59,7 @@ def explore(config, ob, actions, r, c, np_random): pathfind(config, ob, actions, rr, cc, np_random) def evade(config, ob: Observation, actions, attacker, np_random): - agent = ob.agent() + agent = ob.agent rr, cc = (2*agent.row - attacker.row, 2*agent.col - attacker.col) pathfind(config, ob, actions, rr, cc, np_random) @@ -68,7 +67,7 @@ def forageDijkstra(config, ob: Observation, actions, food_max, water_max, np_random, cutoff=100): vision = config.PLAYER_VISION_RADIUS - agent = ob.agent() + agent = ob.agent food = agent.food water = agent.water @@ -87,7 +86,7 @@ def forageDijkstra(config, ob: Observation, actions, break cur = queue.pop(0) - for nxt in utils.adjacentPos(cur): + for nxt in astar.adjacentPos(cur): if nxt in backtrace: continue @@ -106,7 +105,7 @@ def forageDijkstra(config, ob: Observation, actions, if matl == material.Foilage.index: food = min(food+food_max//2, food_max) - for pos in utils.adjacentPos(nxt): + for pos in astar.adjacentPos(nxt): if not inSight(*pos, vision): continue @@ -171,7 +170,7 @@ def gatherBFS(config, ob: Observation, actions, resource, np_random, cutoff=100) return False cur = queue.pop(0) - for nxt in utils.adjacentPos(cur): + for nxt in astar.adjacentPos(cur): if found: break @@ -197,7 +196,7 @@ def gatherBFS(config, ob: Observation, actions, resource, np_random, cutoff=100) backtrace[nxt] = cur break - for pos in utils.adjacentPos(nxt): + for pos in astar.adjacentPos(nxt): if not inSight(*pos, vision): continue @@ -238,7 +237,7 @@ def aStar(config, ob: Observation, actions, rr, cc, cutoff=100): cost = {start: 0} closestPos = start - closestHeuristic = utils.l1(start, goal) + closestHeuristic = astar.l1(start, goal) closestCost = closestHeuristic while pq: @@ -254,7 +253,7 @@ def aStar(config, ob: Observation, actions, rr, cc, cutoff=100): if cur == goal: break - for nxt in utils.adjacentPos(cur): + for nxt in astar.adjacentPos(cur): if not inSight(*nxt, vision): continue @@ -271,7 +270,7 @@ def aStar(config, ob: Observation, actions, rr, cc, cutoff=100): newCost = cost[cur] + 1 if nxt not in cost or newCost < cost[nxt]: cost[nxt] = newCost - heuristic = utils.lInfty(goal, nxt) + heuristic = astar.l1(goal, nxt) priority = newCost + heuristic # Compute approximate solution diff --git a/setup.py b/setup.py index ff014244f..db5415352 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,8 @@ from itertools import chain from setuptools import find_packages, setup +from Cython.Build import cythonize +import numpy as np REPO_URL = "https://github.com/neuralmmo/environment" @@ -35,20 +37,19 @@ 'scipy==1.10.0', 'pytest==7.3.0', 'pytest-benchmark==3.4.1', - # Deprecated renderer breaks py311 - #'autobahn==19.3.3', - #'Twisted==19.2.0', 'vec-noise==1.1.4', 'imageio==2.23.0', 'ordered-set==4.1.0', - 'pettingzoo>=1.19.0', - 'gym==0.23.0', + 'pettingzoo==1.24.1', + 'gymnasium==0.29.1', 'pylint==2.16.0', - 'psutil==5.9.3', - 'py==1.11.0', + 'psutil<6', 'tqdm<5', - 'dill==0.3.6', + 'py==1.11.0', + 'dill<0.4', ], + ext_modules = cythonize(["nmmo/lib/cython_helper.pyx"]), + include_dirs=[np.get_include()], extras_require=extra, python_requires=">=3.7", license="MIT", diff --git a/tests/action/test_ammo_use.py b/tests/action/test_ammo_use.py index 340883703..302a5e3c8 100644 --- a/tests/action/test_ammo_use.py +++ b/tests/action/test_ammo_use.py @@ -10,7 +10,7 @@ RANDOM_SEED = 284 -LOGFILE = 'tests/action/test_ammo_use.log' +LOGFILE = None # 'tests/action/test_ammo_use.log' class TestAmmoUse(ScriptedTestTemplate): # pylint: disable=protected-access,multiple-statements,no-member @@ -20,8 +20,7 @@ def setUpClass(cls): super().setUpClass() # config specific to the tests here - cls.config.LOG_VERBOSE = False - if cls.config.LOG_VERBOSE: + if LOGFILE: # for debugging logging.basicConfig(filename=LOGFILE, level=logging.INFO) def _assert_action_targets_zero(self, gym_obs): @@ -29,9 +28,10 @@ def _assert_action_targets_zero(self, gym_obs): + np.sum(gym_obs["ActionTargets"]["Buy"]["MarketItem"]) for atn in [action.Use, action.Give, action.Destroy, action.Sell]: mask += np.sum(gym_obs["ActionTargets"][atn.__name__]["InventoryItem"]) - # If MarketItem and InventoryTarget have no-action flags, these sum up to 5 + # If MarketItem and InventoryTarget have no-action flags, these sum up to 104 # To prevent entropy collapse, GiveGold/Price and Buy/MarketItem masks are tweaked - self.assertEqual(mask, 1 + 5*int(self.config.PROVIDE_NOOP_ACTION_TARGET)) + # The Price mask is all ones, so the sum is 104 + self.assertEqual(mask, 99 + 5*int(self.config.PROVIDE_NOOP_ACTION_TARGET)) def test_spawn_immunity(self): env = self._setup_env(random_seed=RANDOM_SEED) @@ -181,7 +181,6 @@ def test_cannot_use_listed_items(self): ent_id = 3 provide_item(env.realm, ent_id, Item.Whetstone, level=5, quantity=3) provide_item(env.realm, ent_id, Item.Whetstone, level=7, quantity=3) - env.obs = env._compute_observations() # First tick actions: SELL level-0 ammo env.step({ ent_id: { action.Sell: @@ -250,9 +249,9 @@ def sig_int_tuple(sig): # level up the agent 1 (Melee) to 2 env.realm.players[1].skills.melee.level.update(2) - env.obs = env._compute_observations() # check inventory + env._compute_observations() for ent_id in self.ammo: # realm data inv_realm = { item.signature: item.quantity.val @@ -270,20 +269,20 @@ def sig_int_tuple(sig): ItemState.parse_array(inv_obs.values[inv_obs.sig(*wstone_lvl1)]).quantity) if ent_id == 1: # if the ammo has the same signature, the quantity is added to the existing stack - self.assertEqual( inv_realm[sig_int_tuple(wstone_lvl0)], + self.assertEqual(inv_realm[sig_int_tuple(wstone_lvl0)], extra_ammo + self.ammo_quantity ) - self.assertEqual( extra_ammo + self.ammo_quantity, + self.assertEqual(extra_ammo + self.ammo_quantity, ItemState.parse_array(inv_obs.values[inv_obs.sig(*wstone_lvl0)]).quantity) # so there should be 1 more space - self.assertEqual( inv_obs.len, self.config.ITEM_INVENTORY_CAPACITY - 1) + self.assertEqual(inv_obs.len, self.config.ITEM_INVENTORY_CAPACITY - 1) else: # if the signature is different, it occupies a new inventory space - self.assertEqual( inv_realm[sig_int_tuple(wstone_lvl0)], extra_ammo ) - self.assertEqual( extra_ammo, + self.assertEqual(inv_realm[sig_int_tuple(wstone_lvl0)], extra_ammo ) + self.assertEqual(extra_ammo, ItemState.parse_array(inv_obs.values[inv_obs.sig(*wstone_lvl0)]).quantity) # thus the inventory is full - self.assertEqual( inv_obs.len, self.config.ITEM_INVENTORY_CAPACITY) + self.assertEqual(inv_obs.len, self.config.ITEM_INVENTORY_CAPACITY) if ent_id == 1: gym_obs = env.obs[ent_id].to_gym() @@ -342,7 +341,6 @@ def test_use_ration_potion(self): env.realm.players[ent_id].resources.food.update(init_res) env.realm.players[ent_id].resources.water.update(init_res) env.realm.players[ent_id].resources.health.update(init_res) - env.obs = env._compute_observations() """First tick: try to use level-3 ration & potion""" ration_lvl3 = (Item.Ration, 3) diff --git a/tests/action/test_destroy_give_gold.py b/tests/action/test_destroy_give_gold.py index fa9f10b1a..72c051e36 100644 --- a/tests/action/test_destroy_give_gold.py +++ b/tests/action/test_destroy_give_gold.py @@ -10,7 +10,7 @@ RANDOM_SEED = 985 -LOGFILE = 'tests/action/test_destroy_give_gold.log' +LOGFILE = None # 'tests/action/test_destroy_give_gold.log' class TestDestroyGiveGold(ScriptedTestTemplate): # pylint: disable=protected-access,multiple-statements,no-member @@ -20,16 +20,15 @@ def setUpClass(cls): super().setUpClass() # config specific to the tests here - cls.config.PLAYERS = [baselines.Melee, baselines.Range] - cls.config.PLAYER_N = 6 + cls.config.set("PLAYERS", [baselines.Melee, baselines.Range]) + cls.config.set("PLAYER_N", 6) cls.policy = { 1:'Melee', 2:'Range', 3:'Melee', 4:'Range', 5:'Melee', 6:'Range' } cls.spawn_locs = { 1:(17,17), 2:(21,21), 3:(17,17), 4:(21,21), 5:(21,21), 6:(17,17) } cls.ammo = { 1:Item.Whetstone, 2:Item.Arrow, 3:Item.Whetstone, 4:Item.Arrow, 5:Item.Whetstone, 6:Item.Arrow } - cls.config.LOG_VERBOSE = False - if cls.config.LOG_VERBOSE: + if LOGFILE: # for debugging logging.basicConfig(filename=LOGFILE, level=logging.INFO) def test_destroy(self): @@ -97,7 +96,7 @@ def test_give_tile_npc(self): # teleport the npc -1 to agent 5's location change_spawn_pos(env.realm, -1, self.spawn_locs[5]) - env.obs = env._compute_observations() + env._compute_observations() """ First tick actions """ actions = {} @@ -109,9 +108,6 @@ def test_give_tile_npc(self): # agent 2: give ammo to agent 2 (invalid: cannot give to self) test_cond[2] = { 'tgt_id': 2, 'item_sig': self.item_sig[2][0], 'ent_mask': False, 'inv_mask': True, 'valid': False } - # agent 4: give ammo to agent 5 (invalid: other tile) - test_cond[4] = { 'tgt_id': 6, 'item_sig': self.item_sig[4][0], - 'ent_mask': False, 'inv_mask': True, 'valid': False } # agent 5: give ammo to npc -1 (invalid, should be masked) test_cond[5] = { 'tgt_id': -1, 'item_sig': self.item_sig[5][0], 'ent_mask': False, 'inv_mask': True, 'valid': False } @@ -204,8 +200,7 @@ def test_give_full_inventory(self): for item_sig in extra_items: self.item_sig[ent_id].append(item_sig) provide_item(env.realm, ent_id, item_sig[0], item_sig[1], 1) - - env.obs = env._compute_observations() + env._compute_observations() # check if the inventory is full for ent_id in [1, 2]: @@ -248,7 +243,7 @@ def test_give_gold(self): # teleport the npc -1 to agent 3's location change_spawn_pos(env.realm, -1, self.spawn_locs[3]) - env.obs = env._compute_observations() + env._compute_observations() test_cond = {} @@ -268,10 +263,6 @@ def test_give_gold(self): # tgt_gold is 0 because (2) gave all gold to (4) test_cond[4] = { 'tgt_id': 2, 'gold': -1, 'ent_mask': True, 'ent_gold': 2*self.init_gold, 'tgt_gold': 0 } - # agent 6: give gold to agent 4 (invalid: the other tile) - # tgt_gold is 2*self.init_gold because (4) got 5 gold from (2) - test_cond[6] = { 'tgt_id': 4, 'gold': 1, 'ent_mask': False, - 'ent_gold': self.init_gold, 'tgt_gold': 2*self.init_gold } actions = self._check_assert_make_action(env, action.GiveGold, test_cond) env.step(actions) diff --git a/tests/action/test_monkey_action.py b/tests/action/test_monkey_action.py index 2c2f6da92..114facdba 100644 --- a/tests/action/test_monkey_action.py +++ b/tests/action/test_monkey_action.py @@ -74,7 +74,7 @@ def setUpClass(cls): # NOTE: this can also be used for sweeping random seeds def rollout_with_seed(config, seed, use_str_key=False): env = ScriptedAgentTestEnv(config) - obs = env.reset(seed=seed) + obs, _ = env.reset(seed=seed) for _ in tqdm(range(TEST_HORIZON)): # sample random actions for each player @@ -82,7 +82,7 @@ def rollout_with_seed(config, seed, use_str_key=False): for ent_id in env.realm.players: ent_atns = make_random_actions(config, obs[ent_id]) actions[ent_id] = filter_item_actions(ent_atns, use_str_key) - obs, _, _, _ = env.step(actions) + obs, _, _, _, _ = env.step(actions) def test_monkey_action(self): try: diff --git a/tests/action/test_sell_buy.py b/tests/action/test_sell_buy.py index 452beb109..2795f7ea1 100644 --- a/tests/action/test_sell_buy.py +++ b/tests/action/test_sell_buy.py @@ -10,7 +10,7 @@ RANDOM_SEED = 985 -LOGFILE = 'tests/action/test_sell_buy.log' +LOGFILE = None # 'tests/action/test_sell_buy.log' class TestSellBuy(ScriptedTestTemplate): # pylint: disable=protected-access,multiple-statements,unsubscriptable-object,no-member @@ -20,18 +20,16 @@ def setUpClass(cls): super().setUpClass() # config specific to the tests here - cls.config.PLAYERS = [baselines.Melee, baselines.Range] - cls.config.PLAYER_N = 6 + cls.config.set("PLAYERS", [baselines.Melee, baselines.Range]) + cls.config.set("PLAYER_N", 6) cls.policy = { 1:'Melee', 2:'Range', 3:'Melee', 4:'Range', 5:'Melee', 6:'Range' } cls.ammo = { 1:Item.Whetstone, 2:Item.Arrow, 3:Item.Whetstone, 4:Item.Arrow, 5:Item.Whetstone, 6:Item.Arrow } - cls.config.LOG_VERBOSE = False - if cls.config.LOG_VERBOSE: + if LOGFILE: # for debugging logging.basicConfig(filename=LOGFILE, level=logging.INFO) - def test_sell_buy(self): # cannot list an item with 0 price --> impossible to do this # cannot list an equipped item for sale (should be masked) @@ -48,8 +46,7 @@ def test_sell_buy(self): for item_sig in extra_items: self.item_sig[ent_id].append(item_sig) provide_item(env.realm, ent_id, item_sig[0], item_sig[1], 1) - - env.obs = env._compute_observations() + env._compute_observations() # check if the inventory is full for ent_id in [1, 2]: diff --git a/tests/core/test_config.py b/tests/core/test_config.py new file mode 100644 index 000000000..8cc4c981b --- /dev/null +++ b/tests/core/test_config.py @@ -0,0 +1,37 @@ +import unittest + +import nmmo +import nmmo.core.config as cfg + + +class Config(cfg.Config, cfg.Terrain, cfg.Combat): + pass + +class TestConfig(unittest.TestCase): + def test_config_attr_set_episode(self): + config = nmmo.config.Default() + self.assertEqual(config.RESOURCE_SYSTEM_ENABLED, True) + + config.set_for_episode("RESOURCE_SYSTEM_ENABLED", False) + self.assertEqual(config.RESOURCE_SYSTEM_ENABLED, False) + + config.reset() + self.assertEqual(config.RESOURCE_SYSTEM_ENABLED, True) + + def test_cannot_change_immutable_attr(self): + config = Config() + with self.assertRaises(AssertionError): + config.set_for_episode("PLAYER_N", 100) + + def test_cannot_change_obs_attr(self): + config = Config() + with self.assertRaises(AssertionError): + config.set_for_episode("PLAYER_N_OBS", 50) + + def test_cannot_use_noninit_system(self): + config = Config() + with self.assertRaises(AssertionError): + config.set_for_episode("ITEM_SYSTEM_ENABLED", True) + +if __name__ == '__main__': + unittest.main() diff --git a/tests/core/test_cython_masks.py b/tests/core/test_cython_masks.py new file mode 100644 index 000000000..5d3eac6d5 --- /dev/null +++ b/tests/core/test_cython_masks.py @@ -0,0 +1,64 @@ +# pylint: disable=protected-access,bad-builtin +import unittest +from timeit import timeit +from copy import deepcopy +#import random +import numpy as np + +import nmmo +from tests.testhelpers import ScriptedAgentTestConfig + +RANDOM_SEED = 3333 # random.randint(0, 10000) +PERF_TEST = True + +class TestCythonMasks(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.config = ScriptedAgentTestConfig() + cls.config.set("USE_CYTHON", True) + cls.config.set("COMBAT_SPAWN_IMMUNITY", 5) + cls.env = nmmo.Env(cls.config, RANDOM_SEED) + cls.env.reset() + for _ in range(7): + cls.env.step({}) + + cls.move_mask = cls.env._dummy_obs["ActionTargets"]["Move"] + cls.attack_mask = cls.env._dummy_obs["ActionTargets"]["Attack"] + + def test_move_mask(self): + obs = self.env.obs + for agent_id in self.env.realm.players: + np_masks = deepcopy(self.move_mask) + cy_masks = deepcopy(self.move_mask) + obs[agent_id]._make_move_mask(np_masks, use_cython=False) + obs[agent_id]._make_move_mask(cy_masks, use_cython=True) + self.assertTrue(np.array_equal(np_masks["Direction"], cy_masks["Direction"])) + if PERF_TEST: + print('---test_move_mask---') + print('numpy:', timeit( + lambda: [obs[agent_id]._make_move_mask(np_masks, use_cython=False) + for agent_id in self.env.realm.players], number=1000, globals=globals())) + print('cython:', timeit( + lambda: [obs[agent_id]._make_move_mask(cy_masks, use_cython=True) + for agent_id in self.env.realm.players], number=1000, globals=globals())) + + def test_attack_mask(self): + obs = self.env.obs + for agent_id in self.env.realm.players: + np_masks = deepcopy(self.attack_mask) + cy_masks = deepcopy(self.attack_mask) + obs[agent_id]._make_attack_mask(np_masks, use_cython=False) + obs[agent_id]._make_attack_mask(cy_masks, use_cython=True) + self.assertTrue(np.array_equal(np_masks["Target"], cy_masks["Target"])) + if PERF_TEST: + print('---test_attack_mask---') + print('numpy:', timeit( + lambda: [obs[agent_id]._make_attack_mask(np_masks, use_cython=False) + for agent_id in self.env.realm.players], number=1000, globals=globals())) + print('cython:', timeit( + lambda: [obs[agent_id]._make_attack_mask(cy_masks, use_cython=True) + for agent_id in self.env.realm.players], number=1000, globals=globals())) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/entity/test_entity.py b/tests/core/test_entity.py similarity index 68% rename from tests/entity/test_entity.py rename to tests/core/test_entity.py index 952e10696..d9c351bcf 100644 --- a/tests/entity/test_entity.py +++ b/tests/core/test_entity.py @@ -4,6 +4,8 @@ import nmmo from nmmo.entity.entity import Entity, EntityState from nmmo.datastore.numpy_datastore import NumpyDatastore +from scripted.baselines import Random + class MockRealm: def __init__(self): @@ -57,6 +59,35 @@ def test_query_by_ids(self): e_row = EntityState.Query.by_id(realm.datastore, entity_id) self.assertEqual(e_row[Entity.State.attr_name_to_col["food"]], 11) + def test_recon_resurrect(self): + config = nmmo.config.Default() + config.set("PLAYERS", [Random]) + env = nmmo.Env(config) + env.reset() + + # set player 1 to be a recon + # Recons are immortal and cannot act (move) + player1 = env.realm.players[1] + player1.make_recon() + spawn_pos = player1.pos + + for _ in range(50): # long enough to starve to death + env.step({}) + self.assertEqual(player1.pos, spawn_pos) + self.assertEqual(player1.health.val, config.PLAYER_BASE_HEALTH) + + # resurrect player1 + player1.health.update(0) + self.assertEqual(player1.alive, False) + + player1.resurrect(health_prop=0.5, freeze_duration=10) + self.assertEqual(player1.health.val, 50) + self.assertEqual(player1.freeze.val, 10) + self.assertEqual(player1.message.val, 0) + self.assertEqual(player1.npc_type, -1) # immortal flag + self.assertEqual(player1.my_task.progress, 0) # task progress should be reset + # pylint:disable=protected-access + self.assertEqual(player1._make_mortal_tick, env.realm.tick + 10) if __name__ == '__main__': unittest.main() diff --git a/tests/core/test_env.py b/tests/core/test_env.py index ca7e8cc1a..02b8efdfc 100644 --- a/tests/core/test_env.py +++ b/tests/core/test_env.py @@ -39,11 +39,9 @@ def test_action_space(self): atn_str_keys) def test_observations(self): - obs = self.env.reset() - + obs, _ = self.env.reset() self.assertEqual(obs.keys(), self.env.realm.players.keys()) - dead_agents = set() for _ in tqdm(range(TEST_HORIZON)): entity_locations = [ [ev.row.val, ev.col.val, e] for e, ev in self.env.realm.players.entities.items() @@ -64,26 +62,25 @@ def test_observations(self): self.assertEqual(np.sum(player_obs["Entity"]), 0) self.assertEqual(np.sum(player_obs["Inventory"]), 0) self.assertEqual(np.sum(player_obs["Market"]), 0) - self.assertEqual(np.sum(player_obs["ActionTargets"]["Move"]["Direction"]), 1) - self.assertEqual(np.sum(player_obs["ActionTargets"]["Attack"]["Style"]), 3) + self.assertEqual(np.sum(player_obs["ActionTargets"]["Move"]["Direction"]), 1) # no-op + self.assertEqual(np.sum(player_obs["ActionTargets"]["Attack"]["Style"]), 3) # all ones - obs, rewards, dones, infos = self.env.step({}) + obs, rewards, terminated, truncated, infos = self.env.step({}) # make sure dead agents return proper dones=True, dummy obs, and -1 reward - self.assertEqual(len(self.env.agents), - len(self.env.realm.players) + len(self.env._dead_this_tick)) - self.assertEqual(len(self.env.possible_agents), - len(self.env.realm.players) + len(self.env._dead_agents)) + self.assertEqual(len(self.env.agents), len(self.env.realm.players)) + # NOTE: the below is no longer true when mini games resurrect dead players + # self.assertEqual(len(self.env.possible_agents), + # len(self.env.realm.players) + len(self.env._dead_agents)) for agent_id in self.env.agents: self.assertTrue(agent_id in obs) self.assertTrue(agent_id in rewards) - self.assertTrue(agent_id in dones) + self.assertTrue(agent_id in terminated) + self.assertTrue(agent_id in truncated) self.assertTrue(agent_id in infos) - if len(self.env._dead_agents) > len(dead_agents): - for dead_id in self.env._dead_agents - dead_agents: - self.assertEqual(rewards[dead_id], -1) - self.assertTrue(dones[dead_id]) - dead_agents.add(dead_id) + for dead_id in self.env._dead_this_tick: + self.assertEqual(rewards[dead_id], -1) + self.assertTrue(terminated[dead_id]) # check dead and alive entity_all = EntityState.Query.table(self.env.realm.datastore) @@ -91,7 +88,6 @@ def test_observations(self): alive_agents = set(alive_agents[alive_agents > 0]) for agent_id in alive_agents: self.assertTrue(agent_id in self.env.realm.players) - self.assertTrue(agent_id not in self.env._dead_agents) def _validate_tiles(self, obs, realm: Realm): for tile_obs in obs["Tile"]: @@ -149,7 +145,8 @@ def _validate_market(self, obs, realm: Realm): def _validate_items(self, items_dict, item_obs): item_obs = item_obs[item_obs[:,0] != 0] if len(items_dict) != len(item_obs): - assert len(items_dict) == len(item_obs) + assert len(items_dict) == len(item_obs),\ + f"Mismatch in number of items. Random seed: {RANDOM_SEED}" for item_ob in item_obs: item_ob = ItemState.parse_array(item_ob) item = items_dict[item_ob.id] @@ -175,5 +172,19 @@ def test_clean_item_after_reset(self): # item state table must be empty after reset self.assertTrue(ItemState.State.table(new_env.realm.datastore).is_empty()) + def test_truncated(self): + test_horizon = 25 + config = Config() + config.set("HORIZON", test_horizon) + env = nmmo.Env(config, RANDOM_SEED) + obs, _ = env.reset() + for _ in tqdm(range(test_horizon)): + obs, _, terminated, truncated, _ = env.step({}) + for agent_id in obs: + alive = agent_id in env.realm.players + self.assertEqual(terminated[agent_id], not alive) + # Test that the last step is truncated + self.assertEqual(truncated[agent_id], alive and env.realm.tick >= test_horizon) + if __name__ == '__main__': unittest.main() diff --git a/tests/core/test_game_api.py b/tests/core/test_game_api.py new file mode 100644 index 000000000..14bf07ad7 --- /dev/null +++ b/tests/core/test_game_api.py @@ -0,0 +1,146 @@ +# pylint: disable=protected-access +import unittest + +import nmmo +from nmmo.core.game_api import AgentTraining, TeamTraining, TeamBattle +from nmmo.lib.team_helper import TeamHelper + + +NUM_TEAMS = 16 +TEAM_SIZE = 8 + +class TeamConfig(nmmo.config.Small, nmmo.config.AllGameSystems): + PLAYER_N = NUM_TEAMS * TEAM_SIZE + TEAMS = {"Team" + str(i+1): [i*TEAM_SIZE+j+1 for j in range(TEAM_SIZE)] + for i in range(NUM_TEAMS)} + CURRICULUM_FILE_PATH = "tests/task/sample_curriculum.pkl" + +class TestGameApi(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.config = TeamConfig() + cls.env = nmmo.Env(cls.config) + + def test_num_agents_in_teams(self): + # raise error if PLAYER_N is not equal to the number of agents in TEAMS + config = TeamConfig() + config.set("PLAYER_N", 127) + env = nmmo.Env(config) + self.assertRaises(AssertionError, lambda: TeamTraining(env)) + + def test_agent_training_game(self): + game = AgentTraining(self.env) + self.env.reset(game=game) + + # this should use the DefaultGame setup + self.assertTrue(isinstance(self.env.game, AgentTraining)) + for task in self.env.tasks: + self.assertEqual(task.reward_to, "agent") # all tasks are for agents + + # every agent is assigned a task + self.assertEqual(len(self.env.possible_agents), len(self.env.tasks)) + # for the training tasks, the task assignee and subject should be the same + for task in self.env.tasks: + self.assertEqual(task.assignee, task.subject) + + # winners should be none when not determined + self.assertEqual(self.env.game.winners, None) + self.assertEqual(self.env.game.is_over, False) + + # make agent 1 a winner by destroying all other agents + for agent_id in self.env.possible_agents[1:]: + self.env.realm.players[agent_id].resources.health.update(0) + self.env.step({}) + self.assertEqual(self.env.game.winners, [1]) + + # when there are winners, the game is over + self.assertEqual(self.env.game.is_over, True) + + def test_team_training_game_spawn(self): + # when TEAMS is set, the possible agents should include all agents + team_helper = TeamHelper(self.config.TEAMS) + self.assertListEqual(self.env.possible_agents, + list(team_helper.team_and_position_for_agent.keys())) + + game = TeamTraining(self.env) + self.env.reset(game=game) + + for task in self.env.tasks: + self.assertEqual(task.reward_to, "team") # all tasks are for teams + + # agents in the same team should spawn together + team_locs = {} + for team_id, team_members in self.env.config.TEAMS.items(): + team_locs[team_id] = self.env.realm.players[team_members[0]].pos + for agent_id in team_members: + self.assertEqual(team_locs[team_id], self.env.realm.players[agent_id].pos) + + # teams should be apart from each other + for team_a in self.config.TEAMS.keys(): + for team_b in self.config.TEAMS.keys(): + if team_a != team_b: + self.assertNotEqual(team_locs[team_a], team_locs[team_b]) + + def test_team_battle_mode(self): + game = TeamBattle(self.env) + self.env.reset(game=game) + env = self.env + + # battle mode: all teams share the same task + task_spec_name = env.tasks[0].spec_name + for task in env.tasks: + self.assertEqual(task.reward_to, "team") # all tasks are for teams + self.assertEqual(task.spec_name, task_spec_name) # all tasks are the same in competition + + # set the first team to win + winner_team = "Team1" + for team_id, members in env.config.TEAMS.items(): + if team_id != winner_team: + for agent_id in members: + env.realm.players[agent_id].resources.health.update(0) + env.step({}) + self.assertEqual(env.game.winners, env.config.TEAMS[winner_team]) + + def test_competition_winner_task_completed(self): + game = TeamBattle(self.env) + self.env.reset(game=game) + + # The first two tasks get completed + winners = [] + for task in self.env.tasks[:2]: + task._completed_tick = 1 + self.assertEqual(task.completed, True) + winners += task.assignee + + self.env.step({}) + self.assertEqual(self.env.game.winners, winners) + + def test_game_via_config(self): + config = TeamConfig() + config.set("GAME_PACKS", [(AgentTraining, 1), + (TeamTraining, 1), + (TeamBattle, 1)]) + env = nmmo.Env(config) + env.reset() + for _ in range(3): + env.step({}) + + self.assertTrue(isinstance(env.game, game_cls) + for game_cls in [AgentTraining, TeamTraining, TeamBattle]) + + def test_game_set_next_task(self): + game = AgentTraining(self.env) + tasks = game._define_tasks() # sample tasks for testing + game.set_next_tasks(tasks) + self.env.reset(game=game) + + # The tasks are successfully fed into the env + for a, b in zip(tasks, self.env.tasks): + self.assertIs(a, b) + + # The next tasks is empty + self.assertIsNone(game._next_tasks) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/core/test_gym_obs_spaces.py b/tests/core/test_gym_obs_spaces.py index 50638bbfe..743e8090d 100644 --- a/tests/core/test_gym_obs_spaces.py +++ b/tests/core/test_gym_obs_spaces.py @@ -1,48 +1,98 @@ import unittest +from copy import deepcopy +import numpy as np import nmmo +from nmmo.core.game_api import DefaultGame + +RANDOM_SEED = np.random.randint(0, 100000) -class TestGymObsSpaces(unittest.TestCase): - def _test_gym_obs_space(self, env): - obs_spec = env.observation_space(1) - obs, _, _, _ = env.step({}) +class TestGymObsSpaces(unittest.TestCase): + def _is_obs_valid(self, obs_spec, obs): for agent_obs in obs.values(): for key, val in agent_obs.items(): - if key != 'ActionTargets': - self.assertTrue(obs_spec[key].contains(val), - f"Invalid obs format -- key: {key}, val: {val}") + self.assertTrue(obs_spec[key].contains(val), + f"Invalid obs format -- key: {key}, val: {val}") - if 'ActionTargets' in agent_obs: - val = agent_obs['ActionTargets'] + def _test_gym_obs_space(self, env): + obs_spec = env.observation_space(1) + obs, _, _, _, _ = env.step({}) + self._is_obs_valid(obs_spec, obs) + for agent_obs in obs.values(): + if "ActionTargets" in agent_obs: + val = agent_obs["ActionTargets"] for atn in nmmo.Action.edges(env.config): if atn.enabled(env.config): for arg in atn.edges: # pylint: disable=not-an-iterable - mask_spec = obs_spec['ActionTargets'][atn.__name__][arg.__name__] + mask_spec = obs_spec["ActionTargets"][atn.__name__][arg.__name__] mask_val = val[atn.__name__][arg.__name__] self.assertTrue(mask_spec.contains(mask_val), "Invalid obs format -- " + \ f"key: {atn.__name__}/{arg.__name__}, val: {mask_val}") + return obs def test_env_without_noop(self): config = nmmo.config.Default() - config.PROVIDE_NOOP_ACTION_TARGET = False + config.set("PROVIDE_NOOP_ACTION_TARGET", False) env = nmmo.Env(config) env.reset(seed=1) for _ in range(3): env.step({}) - self._test_gym_obs_space(env) def test_env_with_noop(self): config = nmmo.config.Default() - config.PROVIDE_NOOP_ACTION_TARGET = True + config.set("PROVIDE_NOOP_ACTION_TARGET", True) + env = nmmo.Env(config) + env.reset(seed=1) + for _ in range(3): + env.step({}) + self._test_gym_obs_space(env) + + def test_env_with_fogmap(self): + config = nmmo.config.Default() + config.set("PROVIDE_DEATH_FOG_OBS", True) env = nmmo.Env(config) env.reset(seed=1) for _ in range(3): env.step({}) + self._test_gym_obs_space(env) + + def test_system_disable(self): + class CustomGame(DefaultGame): + def _set_config(self): + self.config.reset() + self.config.set_for_episode("COMBAT_SYSTEM_ENABLED", False) + self.config.set_for_episode("ITEM_SYSTEM_ENABLED", False) + self.config.set_for_episode("EXCHANGE_SYSTEM_ENABLED", False) + self.config.set_for_episode("COMMUNICATION_SYSTEM_ENABLED", False) + + config = nmmo.config.Default() + env = nmmo.Env(config) + # test the default game + env.reset() + for _ in range(3): + env.step({}) self._test_gym_obs_space(env) + org_obs_spec = deepcopy(env.observation_space(1)) + + # test the custom game + game = CustomGame(env) + env.reset(game=game, seed=RANDOM_SEED) + for _ in range(3): + env.step({}) + new_obs = self._test_gym_obs_space(env) + + # obs format must match between episodes + self._is_obs_valid(org_obs_spec, new_obs) + + # check if the combat system is disabled + for agent_obs in new_obs.values(): + self.assertEqual(sum(agent_obs["ActionTargets"]["Attack"]["Target"]), + int(config.PROVIDE_NOOP_ACTION_TARGET), + f"Incorrect gym obs. seed: {RANDOM_SEED}") -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/core/test_immutable_tile_property.py b/tests/core/test_immutable_tile_property.py deleted file mode 100644 index 6d8c56da7..000000000 --- a/tests/core/test_immutable_tile_property.py +++ /dev/null @@ -1,37 +0,0 @@ -# Test immutable invariants assumed for certain optimizations - -import unittest - -import copy -import nmmo -from scripted.baselines import Random - -def rollout(): - config = nmmo.config.Default() - config.PLAYERS = [Random] - env = nmmo.Env(config) - env.reset() - start = copy.deepcopy(env.realm) - for _ in range(64): - env.step({}) - end = copy.deepcopy(env.realm) - return (start, end) - -class TestImmutableTileProperty(unittest.TestCase): - - def test_passability_immutable(self): - # Used in optimization that caches the result of A* - start, end = rollout() - start_passable = [tile.impassible for tile in start.map.tiles.flatten()] - end_passable = [tile.impassible for tile in end.map.tiles.flatten()] - self.assertListEqual(start_passable, end_passable) - - def test_habitability_immutable(self): - # Used in optimization with habitability lookup table - start, end = rollout() - start_habitable = [tile.habitable for tile in start.map.tiles.flatten()] - end_habitable = [tile.habitable for tile in end.map.tiles.flatten()] - self.assertListEqual(start_habitable, end_habitable) - -if __name__ == '__main__': - unittest.main() diff --git a/tests/core/test_map_generation.py b/tests/core/test_map_generation.py index d7f35d9c6..36b438234 100644 --- a/tests/core/test_map_generation.py +++ b/tests/core/test_map_generation.py @@ -1,14 +1,18 @@ +# pylint: disable=protected-access import unittest import os import shutil +import numpy as np import nmmo +from nmmo.lib import material + class TestMapGeneration(unittest.TestCase): def test_insufficient_maps(self): config = nmmo.config.Small() - config.PATH_MAPS = 'maps/test_map_gen' - config.MAP_N = 20 + config.set("PATH_MAPS", "maps/test_map_gen") + config.set("MAP_N", 20) # clear the directory path_maps = os.path.join(config.PATH_CWD, config.PATH_MAPS) @@ -17,11 +21,13 @@ def test_insufficient_maps(self): # this generates 20 maps nmmo.Env(config) - # test if MAP_FORCE_GENERATION can be overriden - config.MAP_N = 30 - config.MAP_FORCE_GENERATION = False + # test if MAP_FORCE_GENERATION can be overriden, when the maps are insufficient + config2 = nmmo.config.Small() + config2.set("PATH_MAPS", "maps/test_map_gen") # the same map dir + config2.set("MAP_N", 30) + config2.set("MAP_FORCE_GENERATION", False) - test_env = nmmo.Env(config) + test_env = nmmo.Env(config2) test_env.reset(map_id=config.MAP_N) # this should finish without error @@ -42,7 +48,89 @@ class MapConfig( path_maps = os.path.join(config.PATH_CWD, config.PATH_MAPS) shutil.rmtree(path_maps, ignore_errors=True) - test_env = nmmo.Env(config) # pylint: disable=unused-variable + nmmo.Env(config) + + # this should finish without error + + def test_map_reset_from_fractal(self): + class MapConfig( + nmmo.config.Small, # no fractal, grass only + nmmo.config.Terrain, # water, grass, foilage, stone + nmmo.config.Item, # no additional effect on the map + nmmo.config.Profession, # add ore, tree, crystal, herb, fish + ): + PATH_MAPS = 'maps/test_fractal' + MAP_FORCE_GENERATION = True + MAP_RESET_FROM_FRACTAL = True + config = MapConfig() + self.assertEqual(config.MAP_SIZE, 64) + self.assertEqual(config.MAP_CENTER, 32) + + # clear the directory + path_maps = os.path.join(config.PATH_CWD, config.PATH_MAPS) + shutil.rmtree(path_maps, ignore_errors=True) + + test_env = nmmo.Env(config) + + # the fractals should be saved + fractal_file = os.path.join(path_maps, config.PATH_FRACTAL_SUFFIX.format(1)) + self.assertTrue(os.path.exists(fractal_file)) + + config = test_env.config + map_size = config.MAP_SIZE + np_random = test_env._np_random + + # Return the Grass map + config.set_for_episode("TERRAIN_SYSTEM_ENABLED", False) + map_dict = test_env._load_map_file() + map_array = test_env.realm.map._process_map(map_dict, np_random) + self.assertEqual(np.sum(map_array == material.Void.index)+\ + np.sum(map_array == material.Grass.index), map_size*map_size) + # NOTE: +1 to make the center tile, really the center + self.assertEqual((config.MAP_CENTER+1)**2, np.sum(map_array == material.Grass.index)) + + # Another way to make the grass map (which can place other tiles, if want to) + config.set_for_episode("MAP_RESET_FROM_FRACTAL", True) + config.set_for_episode("TERRAIN_RESET_TO_GRASS", True) + config.set_for_episode("PROFESSION_SYSTEM_ENABLED", False) # harvestalbe tiles + config.set_for_episode("TERRAIN_SCATTER_EXTRA_RESOURCES", False) + map_dict = test_env._load_map_file() + map_array = test_env.realm.map._process_map(map_dict, np_random) + self.assertEqual(np.sum(map_array == material.Void.index)+\ + np.sum(map_array == material.Grass.index), map_size*map_size) + # NOTE: +1 to make the center tile, really the center + self.assertEqual((config.MAP_CENTER+1)**2, np.sum(map_array == material.Grass.index)) + + # Generate from fractal, but not spawn profession tiles + config.reset() + config.set_for_episode("PROFESSION_SYSTEM_ENABLED", False) + map_dict = test_env._load_map_file() + map_array = test_env.realm.map._process_map(map_dict, np_random) + self.assertEqual(np.sum(map_array == material.Void.index)+\ + np.sum(map_array == material.Grass.index)+\ + np.sum(map_array == material.Water.index)+\ + np.sum(map_array == material.Stone.index)+\ + np.sum(map_array == material.Foilage.index), + map_size*map_size) + + # Use the saved map, but disable stone + config.reset() + config.set_for_episode("MAP_RESET_FROM_FRACTAL", False) + config.set_for_episode("TERRAIN_DISABLE_STONE", True) + map_dict = test_env._load_map_file() + org_map = map_dict["map"].copy() + self.assertTrue("fractal" not in map_dict) + map_array = test_env.realm.map._process_map(map_dict, np_random) + self.assertTrue(np.sum(org_map == material.Stone.index) > 0) + self.assertTrue(np.sum(map_array == material.Stone.index) == 0) + + # Generate from fractal, test add-on functions + config.reset() + config.set_for_episode("MAP_RESET_FROM_FRACTAL", True) + config.set_for_episode("PROFESSION_SYSTEM_ENABLED", True) + config.set_for_episode("TERRAIN_SCATTER_EXTRA_RESOURCES", True) + map_dict = test_env._load_map_file() + map_array = test_env.realm.map._process_map(map_dict, np_random) # this should finish without error diff --git a/tests/core/test_observation_tile.py b/tests/core/test_observation_tile.py index 00d519fb8..9514b023b 100644 --- a/tests/core/test_observation_tile.py +++ b/tests/core/test_observation_tile.py @@ -38,13 +38,14 @@ def test_action_target_consts(self): self.assertEqual(len(Action.Token.edges), self.config.COMMUNICATION_NUM_TOKENS) def test_obs_tile_correctness(self): - obs = self.env._compute_observations() center = self.config.PLAYER_VISION_RADIUS tile_dim = self.config.PLAYER_VISION_DIAMETER + self.env._compute_observations() + obs = self.env.obs # pylint: disable=inconsistent-return-statements def correct_tile(agent_obs: Observation, r_delta, c_delta): - agent = agent_obs.agent() + agent = agent_obs.agent if (0 <= agent.row + r_delta < self.config.MAP_SIZE) & \ (0 <= agent.col + c_delta < self.config.MAP_SIZE): r_cond = (agent_obs.tiles[:,TileState.State.attr_name_to_col["row"]] == agent.row+r_delta) @@ -59,7 +60,7 @@ def correct_tile(agent_obs: Observation, r_delta, c_delta): row_map = agent_obs.tiles[:,TileAttr['row']].reshape(tile_dim,tile_dim) col_map = agent_obs.tiles[:,TileAttr['col']].reshape(tile_dim,tile_dim) mat_map = agent_obs.tiles[:,TileAttr['material_id']].reshape(tile_dim,tile_dim) - agent = agent_obs.agent() + agent = agent_obs.agent self.assertEqual(agent.row, row_map[center,center]) self.assertEqual(agent.col, col_map[center,center]) self.assertEqual(agent_obs.tile(0,0).material_id, mat_map[center,center]) @@ -95,7 +96,8 @@ def visible_tiles_by_index(realm, agent_id, tile_map): # get tile map, to bypass the expensive tile window query tile_map = TileState.Query.get_map(self.env.realm.datastore, self.config.MAP_SIZE) - obs = self.env._compute_observations() + self.env._compute_observations() + obs = self.env.obs for agent_id in self.env.realm.players: self.assertTrue(np.array_equal(correct_visible_tile(self.env.realm, agent_id), obs[agent_id].tiles)) @@ -120,12 +122,13 @@ def simple_within_range(entities, attack_range, agent_row, agent_col): np.abs(entities[:,EntityAttr["col"]] - agent_col) ) <= attack_range - obs = self.env._compute_observations() + self.env._compute_observations() + obs = self.env.obs attack_range = self.config.COMBAT_MELEE_REACH for agent_obs in obs.values(): entities = agent_obs.entities.values - agent = agent_obs.agent() + agent = agent_obs.agent self.assertTrue(np.array_equal( correct_within_range(entities, attack_range, agent.row, agent.col), simple_within_range(entities, attack_range, agent.row, agent.col))) @@ -179,37 +182,6 @@ def sort_event_data(event_data): lambda: where_in_1d_with_index(event_data, [1, 2, 3], event_index), number=1000, globals=globals())) - def test_habitable(self): - from nmmo.systems.ai.move import habitable as habitable_impl - realm_map = self.env.realm.map - realm_tiles= self.env.realm.map.tiles - ent = self.env.realm.npcs[-1] - np_random = self.env._np_random - - def habitable_ref(tiles, ent, np_random): - r, c = ent.pos - cands = [] - if tiles[r-1, c].habitable: - cands.append(Action.North) - if tiles[r+1, c].habitable: - cands.append(Action.South) - if tiles[r, c-1].habitable: - cands.append(Action.West) - if tiles[r, c+1].habitable: - cands.append(Action.East) - - if len(cands) == 0: - return Action.North - - return np_random.choice(cands) - - print('---test_habitable---') - print('reference:', timeit( - lambda: habitable_ref(realm_tiles, ent, np_random), - number=1000, globals=globals())) - print('habitable_impl:', timeit( - lambda: habitable_impl(realm_map, ent, np_random), - number=1000, globals=globals())) if __name__ == '__main__': unittest.main() diff --git a/tests/core/test_tile.py b/tests/core/test_tile.py deleted file mode 100644 index f73dd3ad8..000000000 --- a/tests/core/test_tile.py +++ /dev/null @@ -1,44 +0,0 @@ -import unittest -import numpy as np - -import nmmo -from nmmo.core.tile import Tile, TileState -from nmmo.datastore.numpy_datastore import NumpyDatastore -from nmmo.lib import material - -class MockRealm: - def __init__(self): - self.datastore = NumpyDatastore() - self.datastore.register_object_type("Tile", TileState.State.num_attributes) - self.config = nmmo.config.Small() - self._np_random = np.random - -class MockEntity(): - def __init__(self, ent_id): - self.ent_id = ent_id - -class TestTile(unittest.TestCase): - # pylint: disable=no-member - def test_tile(self): - mock_realm = MockRealm() - np_random = np.random - tile = Tile(mock_realm, 10, 20, np_random) - - tile.reset(material.Foilage, nmmo.config.Small(), np_random) - - self.assertEqual(tile.row.val, 10) - self.assertEqual(tile.col.val, 20) - self.assertEqual(tile.material_id.val, material.Foilage.index) - - tile.add_entity(MockEntity(1)) - tile.add_entity(MockEntity(2)) - self.assertCountEqual(tile.entities.keys(), [1, 2]) - tile.remove_entity(1) - self.assertCountEqual(tile.entities.keys(), [2]) - - tile.harvest(True) - self.assertEqual(tile.depleted, True) - self.assertEqual(tile.material_id.val, material.Scrub.index) - -if __name__ == '__main__': - unittest.main() diff --git a/tests/core/test_tile_property.py b/tests/core/test_tile_property.py new file mode 100644 index 000000000..e85d08a7e --- /dev/null +++ b/tests/core/test_tile_property.py @@ -0,0 +1,36 @@ +import unittest + +import copy +import nmmo +from scripted.baselines import Sleeper + +HORIZON = 32 + + +class TestTileProperty(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.config = nmmo.config.Default() + cls.config.PLAYERS = [Sleeper] + env = nmmo.Env(cls.config) + env.reset() + cls.start = copy.deepcopy(env.realm) + for _ in range(HORIZON): + env.step({}) + cls.end = copy.deepcopy(env.realm) + + # Test immutable invariants assumed for certain optimizations + def test_fixed_habitability_passability(self): + # Used in optimization with habitability lookup table + start_habitable = [tile.habitable for tile in self.start.map.tiles.flatten()] + end_habitable = [tile.habitable for tile in self.end.map.tiles.flatten()] + self.assertListEqual(start_habitable, end_habitable) + + # Used in optimization that caches the result of A* + start_passable = [tile.impassible for tile in self.start.map.tiles.flatten()] + end_passable = [tile.impassible for tile in self.end.map.tiles.flatten()] + self.assertListEqual(start_passable, end_passable) + +if __name__ == '__main__': + unittest.main() diff --git a/tests/core/test_tile_seize.py b/tests/core/test_tile_seize.py new file mode 100644 index 000000000..a4a7deec3 --- /dev/null +++ b/tests/core/test_tile_seize.py @@ -0,0 +1,131 @@ +# pylint: disable=protected-access +import unittest +import numpy as np + +import nmmo +import nmmo.core.map +from nmmo.core.tile import Tile, TileState +from nmmo.datastore.numpy_datastore import NumpyDatastore +from nmmo.lib import material + +class MockRealm: + def __init__(self): + self.datastore = NumpyDatastore() + self.datastore.register_object_type("Tile", TileState.State.num_attributes) + self.config = nmmo.config.Small() + self._np_random = np.random + self.tick = 0 + self.event_log = None + +class MockTask: + def __init__(self, ent_id): + self.assignee = (ent_id,) + +class MockEntity: + def __init__(self, ent_id): + self.ent_id = ent_id + self.my_task = None + if ent_id > 0: # only for players + self.my_task = MockTask(ent_id) + +class TestTileSeize(unittest.TestCase): + # pylint: disable=no-member + def test_tile(self): + mock_realm = MockRealm() + np_random = np.random + tile = Tile(mock_realm, 10, 20, np_random) + + tile.reset(material.Foilage, nmmo.config.Small(), np_random) + + self.assertEqual(tile.row.val, 10) + self.assertEqual(tile.col.val, 20) + self.assertEqual(tile.material_id.val, material.Foilage.index) + self.assertEqual(tile.seize_history, []) + + mock_realm.tick = 1 + tile.add_entity(MockEntity(1)) + self.assertEqual(tile.occupied, True) + tile.update_seize() + self.assertEqual(tile.seize_history[-1], (1, 1)) + + # Agent 1 stayed, so no change + mock_realm.tick = 2 + tile.update_seize() + self.assertEqual(tile.seize_history[-1], (1, 1)) + + # Two agents occupy the tile, so no change + mock_realm.tick = 3 + tile.add_entity(MockEntity(2)) + self.assertCountEqual(tile.entities.keys(), [1, 2]) + self.assertEqual(tile.occupied, True) + tile.update_seize() + self.assertEqual(tile.seize_history[-1], (1, 1)) + + mock_realm.tick = 5 + tile.remove_entity(1) + self.assertCountEqual(tile.entities.keys(), [2]) + self.assertEqual(tile.occupied, True) + tile.update_seize() + self.assertEqual(tile.seize_history[-1], (2, 5)) # new seize history + + # Two agents occupy the tile, so no change + mock_realm.tick = 7 + tile.add_entity(MockEntity(-10)) + self.assertListEqual(list(tile.entities.keys()), [2, -10]) + self.assertEqual(tile.occupied, True) + tile.update_seize() + self.assertEqual(tile.seize_history[-1], (2, 5)) + + # Should not change when occupied by an npc + mock_realm.tick = 9 + tile.remove_entity(2) + self.assertListEqual(list(tile.entities.keys()), [-10]) + self.assertEqual(tile.occupied, True) + tile.update_seize() + self.assertEqual(tile.seize_history[-1], (2, 5)) + + tile.harvest(True) + self.assertEqual(tile.depleted, True) + self.assertEqual(tile.material_id.val, material.Scrub.index) + + def test_map_seize_targets(self): + mock_realm = MockRealm() + config = mock_realm.config + np_random = mock_realm._np_random + map_dict = {"map": np.ones((config.MAP_SIZE, config.MAP_SIZE))*2} # all grass tiles + center_tile = (config.MAP_SIZE//2, config.MAP_SIZE//2) + + test_map = nmmo.core.map.Map(config, mock_realm, np_random) + test_map.reset(map_dict, np_random, seize_targets=["center"]) + self.assertListEqual(test_map.seize_targets, [center_tile]) + self.assertDictEqual(test_map.seize_status, {}) + + mock_realm.tick = 4 + test_map.tiles[center_tile].add_entity(MockEntity(5)) + test_map.step() + self.assertDictEqual(test_map.seize_status, {center_tile: (5, 4)}) # ent_id, tick + + mock_realm.tick = 6 + test_map.tiles[center_tile].remove_entity(5) + test_map.step() + self.assertDictEqual(test_map.seize_status, {center_tile: (5, 4)}) # should not change + + mock_realm.tick = 9 + test_map.tiles[center_tile].add_entity(MockEntity(6)) + test_map.tiles[center_tile].add_entity(MockEntity(-7)) + test_map.step() + self.assertDictEqual(test_map.seize_status, {center_tile: (5, 4)}) # should not change + + mock_realm.tick = 11 + test_map.tiles[center_tile].remove_entity(6) # so that -7 is the only entity + test_map.step() + self.assertDictEqual(test_map.seize_status, {center_tile: (5, 4)}) # should not change + + mock_realm.tick = 14 + test_map.tiles[center_tile].remove_entity(-7) + test_map.tiles[center_tile].add_entity(MockEntity(10)) + test_map.step() + self.assertDictEqual(test_map.seize_status, {center_tile: (10, 14)}) + +if __name__ == '__main__': + unittest.main() diff --git a/tests/datastore/__init__.py b/tests/datastore/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/lib/test_serialized.py b/tests/datastore/test_serialized.py similarity index 100% rename from tests/lib/test_serialized.py rename to tests/datastore/test_serialized.py diff --git a/tests/entity/__init__.py b/tests/entity/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/lib/__init__.py b/tests/lib/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/render/test_load_replay.py b/tests/render/test_load_replay.py index 87904cbf4..4dfd67919 100644 --- a/tests/render/test_load_replay.py +++ b/tests/render/test_load_replay.py @@ -4,11 +4,11 @@ import time # pylint: disable=import-error - from nmmo.render.render_client import WebsocketRenderer + from nmmo.render.render_client import DummyRenderer from nmmo.render.replay_helper import FileReplayHelper # open a client - renderer = WebsocketRenderer() + renderer = DummyRenderer() time.sleep(3) # load a replay: replace 'replay_dev.json' with your replay file diff --git a/tests/render/test_render_save.py b/tests/render/test_render_save.py index 165a1930c..dde115d59 100644 --- a/tests/render/test_render_save.py +++ b/tests/render/test_render_save.py @@ -5,7 +5,7 @@ from nmmo.core.config import (AllGameSystems, Combat, Communication, Equipment, Exchange, Item, Medium, Profession, Progression, Resource, Small, Terrain) -from nmmo.render.render_client import WebsocketRenderer +from nmmo.render.render_client import DummyRenderer from nmmo.render.replay_helper import FileReplayHelper from scripted import baselines @@ -54,7 +54,7 @@ def create_config(base, nent, *systems): replay_helper = FileReplayHelper() # the renderer is external to the env, so need to manually initiate it - renderer = WebsocketRenderer() + renderer = DummyRenderer() for conf_name, config in conf_dict.items(): env = nmmo.Env(config) diff --git a/tests/systems/test_exchange.py b/tests/systems/test_exchange.py index 599be59b4..3664c99e4 100644 --- a/tests/systems/test_exchange.py +++ b/tests/systems/test_exchange.py @@ -1,11 +1,14 @@ +# pylint: disable=unnecessary-lambda,protected-access,no-member from types import SimpleNamespace import unittest +import numpy as np + import nmmo from nmmo.datastore.numpy_datastore import NumpyDatastore from nmmo.systems.exchange import Exchange from nmmo.systems.item import ItemState -import nmmo.systems.item as item -import numpy as np +from nmmo.systems import item + class MockRealm: def __init__(self): @@ -14,6 +17,7 @@ def __init__(self): self.datastore = NumpyDatastore() self.items = {} self.datastore.register_object_type("Item", ItemState.State.num_attributes) + self.tick = 0 class MockEntity: def __init__(self) -> None: @@ -22,7 +26,7 @@ def __init__(self) -> None: receive = lambda item: self.items.append(item), remove = lambda item: self.items.remove(item) ) - + class TestExchange(unittest.TestCase): def test_listings(self): realm = MockRealm() @@ -36,31 +40,31 @@ def test_listings(self): entity_1.inventory.receive(hat_2) self.assertEqual(len(entity_1.items), 2) - tick = 0 + tick = realm.tick = 0 exchange._list_item(hat_1, entity_1, 10, tick) self.assertEqual(len(exchange._item_listings), 1) self.assertEqual(exchange._listings_queue[0], (hat_1.id.val, 0)) - tick = 1 + tick = realm.tick = 1 exchange._list_item(hat_2, entity_1, 20, tick) self.assertEqual(len(exchange._item_listings), 2) self.assertEqual(exchange._listings_queue[0], (hat_1.id.val, 0)) - tick = 4 - exchange.step(tick) + tick = realm.tick = 4 + exchange.step() # hat_1 should expire and not be listed self.assertEqual(len(exchange._item_listings), 1) self.assertEqual(exchange._listings_queue[0], (hat_2.id.val, 1)) - tick = 5 + tick = realm.tick = 5 exchange._list_item(hat_2, entity_1, 10, tick) - exchange.step(tick) + exchange.step() # hat_2 got re-listed, so should still be listed self.assertEqual(len(exchange._item_listings), 1) self.assertEqual(exchange._listings_queue[0], (hat_2.id.val, 5)) - tick = 10 - exchange.step(tick) + tick = realm.tick = 10 + exchange.step() self.assertEqual(len(exchange._item_listings), 0) def test_for_sale_items(self): @@ -77,14 +81,16 @@ def test_for_sale_items(self): item.Item.Query.for_sale(realm.datastore)[:,0], [hat_1.id.val, hat_2.id.val]) # first listing should expire - exchange.step(10) + realm.tick = 10 + exchange.step() np.testing.assert_array_equal( item.Item.Query.for_sale(realm.datastore)[:,0], [hat_2.id.val]) # second listing should expire - exchange.step(100) + realm.tick = 100 + exchange.step() np.testing.assert_array_equal( item.Item.Query.for_sale(realm.datastore)[:,0], []) if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() diff --git a/tests/systems/test_item.py b/tests/systems/test_item.py index bf86d323c..d738448f8 100644 --- a/tests/systems/test_item.py +++ b/tests/systems/test_item.py @@ -4,12 +4,14 @@ import nmmo from nmmo.datastore.numpy_datastore import NumpyDatastore from nmmo.systems.item import Hat, Top, ItemState +from nmmo.systems.exchange import Exchange class MockRealm: def __init__(self): self.config = nmmo.config.Default() self.datastore = NumpyDatastore() self.items = {} + self.exchange = Exchange(self) self.datastore.register_object_type("Item", ItemState.State.num_attributes) self.players = {} @@ -22,12 +24,13 @@ def test_item(self): self.assertTrue(ItemState.Query.by_id(realm.datastore, hat_1.id.val) is not None) self.assertEqual(hat_1.type_id.val, Hat.ITEM_TYPE_ID) self.assertEqual(hat_1.level.val, 1) - self.assertEqual(hat_1.mage_defense.val, 10) + self.assertEqual(hat_1.mage_defense.val, realm.config.EQUIPMENT_ARMOR_LEVEL_DEFENSE) hat_2 = Hat(realm, 10) self.assertTrue(ItemState.Query.by_id(realm.datastore, hat_2.id.val) is not None) self.assertEqual(hat_2.level.val, 10) - self.assertEqual(hat_2.melee_defense.val, 100) + self.assertEqual(hat_2.melee_defense.val, + hat_2.level.val * realm.config.EQUIPMENT_ARMOR_LEVEL_DEFENSE) self.assertDictEqual(realm.items, {hat_1.id.val: hat_1, hat_2.id.val: hat_2}) diff --git a/tests/systems/test_skill_level.py b/tests/systems/test_skill_level.py index 496ecb7a6..626ed2c85 100644 --- a/tests/systems/test_skill_level.py +++ b/tests/systems/test_skill_level.py @@ -11,8 +11,8 @@ class TestSkillLevel(unittest.TestCase): @classmethod def setUpClass(cls): cls.config = ScriptedAgentTestConfig() - cls.config.PROGRESSION_EXP_THRESHOLD = [0, 10, 20, 30, 40, 50] - cls.config.PROGRESSION_LEVEL_MAX = len(cls.config.PROGRESSION_EXP_THRESHOLD) + cls.config.set("PROGRESSION_EXP_THRESHOLD", [0, 10, 20, 30, 40, 50]) + cls.config.set("PROGRESSION_LEVEL_MAX", len(cls.config.PROGRESSION_EXP_THRESHOLD)) cls.env = ScriptedAgentTestEnv(cls.config) def test_experience_calculator(self): diff --git a/tests/task/.gitignore b/tests/task/.gitignore deleted file mode 100644 index a0f7381ba..000000000 --- a/tests/task/.gitignore +++ /dev/null @@ -1 +0,0 @@ -test_held_out_tasks.py \ No newline at end of file diff --git a/tests/task/sample_curriculum.pkl b/tests/task/sample_curriculum.pkl index 2986f6d82..8226834db 100644 Binary files a/tests/task/sample_curriculum.pkl and b/tests/task/sample_curriculum.pkl differ diff --git a/tests/task/test_demo_task_creation.py b/tests/task/test_demo_task_creation.py index f022def37..9a4903dbd 100644 --- a/tests/task/test_demo_task_creation.py +++ b/tests/task/test_demo_task_creation.py @@ -3,7 +3,7 @@ from tests.testhelpers import ScriptedAgentTestConfig from nmmo.core.env import Env -from nmmo.lib.log import EventCode +from nmmo.lib.event_code import EventCode from nmmo.systems import skill from nmmo.task import predicate_api as p from nmmo.task import task_api as t @@ -84,7 +84,7 @@ def ForageSkill(gs, subject, lvl): # Test rollout config = ScriptedAgentTestConfig() - config.ALLOW_MULTI_TASKS_PER_AGENT = True + config.set("ALLOW_MULTI_TASKS_PER_AGENT", True) env = Env(config) # Creating and testing "team" tasks @@ -105,7 +105,7 @@ def ForageSkill(gs, subject, lvl): # Run the environment with these tasks # check rewards and infos for the task info - obs, rewards, dones, infos = rollout(env, team_tasks) + obs, rewards, terminated, truncated, infos = rollout(env, team_tasks) # Creating and testing the same task for all agents # i.e, each agent gets evaluated and rewarded individually @@ -118,7 +118,7 @@ def ForageSkill(gs, subject, lvl): # Run the environment with these tasks # check rewards and infos for the task info - obs, rewards, dones, infos = rollout(env, same_tasks) + obs, rewards, terminated, truncated, infos = rollout(env, same_tasks) # DONE @@ -164,13 +164,13 @@ def KillPredicate(gs: GameState, # Agent 1 kills 1 - reward .06 + .1 # Agent 2 kills 2 - reward .12 + .1 # Agent 3 kills 0 - reward 0 - _, rewards, _, _ = env.step({}) + _, rewards, _, _, _ = env.step({}) self.assertEqual(rewards[1], 0.16) self.assertEqual(rewards[2], 0.22) self.assertEqual(rewards[3], 0) # No reward when no changes - _, rewards, _, _ = env.step({}) + _, rewards, _, _, _ = env.step({}) self.assertEqual(rewards[1], 0) self.assertEqual(rewards[2], 0) self.assertEqual(rewards[3], 0) @@ -200,11 +200,11 @@ def PredicateMath(gs, subject): env.realm.event_log.record(code, players[1], target=players[2]) env.realm.event_log.record(code, players[1], target=players[3]) - _, rewards, _, _ = env.step({}) + _, rewards, _, _, _ = env.step({}) self.assertAlmostEqual(rewards[1], 0.8*2/7 + 1.1*1/3) for _ in range(2): - _, _, _, infos = env.step({}) + _, _, _, _, infos = env.step({}) # 0.8*2/7 + 1.1 > 1, but the progress is maxed at 1 self.assertEqual(infos[1]['task'][env.tasks[0].name]['progress'], 1.0) diff --git a/tests/task/test_manual_curriculum.py b/tests/task/test_manual_curriculum.py index c8d3563ab..0e345540c 100644 --- a/tests/task/test_manual_curriculum.py +++ b/tests/task/test_manual_curriculum.py @@ -4,9 +4,10 @@ from typing import List import nmmo.lib.material as m +import nmmo.systems.item as i +import nmmo.systems.skill as s from nmmo.task.base_predicates import * from nmmo.task.task_api import OngoingTask -from nmmo.task import constraint as c from nmmo.task.task_spec import TaskSpec, check_task_spec EVENT_NUMBER_GOAL = [3, 4, 5, 7, 9, 12, 15, 20, 30, 50] @@ -17,11 +18,11 @@ AGENT_NUM_GOAL = [1, 2, 3, 4, 5] # competition team size: 8 ITEM_NUM_GOAL = AGENT_NUM_GOAL TEAM_ITEM_GOAL = [1, 3, 5, 7, 10, 15, 20] -SKILLS = c.combat_skills + c.harvest_skills -COMBAT_STYLE = c.combat_skills -ALL_ITEM = c.armour + c.weapons + c.tools + c.ammunition + c.consumables -EQUIP_ITEM = c.armour + c.weapons + c.tools + c.ammunition -HARVEST_ITEM = c.weapons + c.ammunition + c.consumables +SKILLS = s.COMBAT_SKILL + s.HARVEST_SKILL +COMBAT_STYLE = s.COMBAT_SKILL +ALL_ITEM = i.ALL_ITEM +EQUIP_ITEM = i.ARMOR + i.WEAPON + i.TOOL + i.AMMUNITION +HARVEST_ITEM = i.WEAPON + i.AMMUNITION + i.CONSUMABLE task_spec: List[TaskSpec] = [] @@ -211,7 +212,7 @@ def PracticeInventoryManagement(gs, subject, space, num_tick): reward_to='team')) # consume items (ration, potion), evaluated based on the event log -for item in c.consumables: +for item in i.CONSUMABLE: for level in LEVEL_GOAL: # agent task for quantity in ITEM_NUM_GOAL: @@ -323,4 +324,4 @@ def create_pool(num_proc): # test if the task spec is pickalable with open('sample_curriculum.pkl', 'wb') as f: - dill.dump(task_spec, f) + dill.dump(task_spec, f, recurse=True) diff --git a/tests/task/test_predicates.py b/tests/task/test_predicates.py index b85715f28..fd92393ac 100644 --- a/tests/task/test_predicates.py +++ b/tests/task/test_predicates.py @@ -11,7 +11,7 @@ from nmmo.systems import item as Item from nmmo.systems import skill as Skill from nmmo.lib import material as Material -from nmmo.lib.log import EventCode +from nmmo.lib.event_code import EventCode # pylint: disable=import-error from nmmo.core.env import Env @@ -26,17 +26,17 @@ class TestBasePredicate(unittest.TestCase): - # pylint: disable=protected-access,invalid-name,no-member + # pylint: disable=protected-access,no-member,invalid-name def _get_taskenv(self, test_preds: List[Tuple[Predicate, Union[Iterable[int], int]]], grass_map=False): config = ScriptedAgentTestConfig() - config.PLAYERS = [Sleeper] - config.PLAYER_N = NUM_AGENT - config.IMMORTAL = True - config.ALLOW_MULTI_TASKS_PER_AGENT = True + config.set("PLAYERS", [Sleeper]) + config.set("PLAYER_N", NUM_AGENT) + config.set("IMMORTAL", True) + config.set("ALLOW_MULTI_TASKS_PER_AGENT", True) # OngoingTask keeps evaluating and returns progress as the reward # vs. Task stops evaluating once the task is completed, returns reward = delta(progress) @@ -99,7 +99,7 @@ def test_tickge_stay_alive_rip(self): env = self._get_taskenv(test_preds) for _ in range(tick_true-1): - _, _, _, infos = env.step({}) + _, _, _, _, infos = env.step({}) # TickGE_5 is false. All agents are alive, # so all StayAlive (ti in [1,2,3]) tasks are true @@ -112,17 +112,16 @@ def test_tickge_stay_alive_rip(self): # kill agents 1-3 for ent_id in death_note: env.realm.players[ent_id].resources.health.update(0) - env.obs = env._compute_observations() # 6th tick - _, _, _, infos = env.step({}) + _, _, _, _, infos = env.step({}) # those who have survived entities = EntityState.Query.table(env.realm.datastore) entities = list(entities[:, EntityState.State.attr_name_to_col['id']]) # ent_ids # make sure the dead agents are not in the realm & datastore - for ent_id in env.realm.players.spawned: + for ent_id in env.realm.players: if ent_id in death_note: # make sure that dead players not in the realm nor the datastore self.assertTrue(ent_id not in env.realm.players) @@ -168,10 +167,7 @@ def test_can_see_tile(self): # All agents to one corner for ent_id in env.realm.players: change_agent_pos(env.realm,ent_id,(BORDER,BORDER)) - - env.obs = env._compute_observations() - _, _, _, infos = env.step({}) - + _, _, _, _, infos = env.step({}) # no target tiles are found, so all are false true_task = [] self._check_result(env, test_preds, infos, true_task) @@ -179,10 +175,7 @@ def test_can_see_tile(self): # Team one to foilage, team two to water change_agent_pos(env.realm,1,(BORDER,MS-2)) # agent 1, team 0, foilage change_agent_pos(env.realm,2,(MS-2,BORDER)) # agent 2, team 1, water - env.obs = env._compute_observations() - - _, _, _, infos = env.step({}) - + _, _, _, _, infos = env.step({}) # t0, t2, t4 are true true_task = [0, 2, 4] self._check_result(env, test_preds, infos, true_task) @@ -210,29 +203,21 @@ def test_can_see_agent(self): # Teleport agent 1 to the opposite corner change_agent_pos(env.realm,1,(MS-2,MS-2)) - env.obs = env._compute_observations() - - _, _, _, infos = env.step({}) - + _, _, _, _, infos = env.step({}) # Only CanSeeAgent(Group([1]), search_target) is true, others are false true_task = [0] self._check_result(env, test_preds, infos, true_task) # Teleport agent 2 to agent 1's pos change_agent_pos(env.realm,2,(MS-2,MS-2)) - env.obs = env._compute_observations() - - _, _, _, infos = env.step({}) - + _, _, _, _, infos = env.step({}) # SearchAgent(Team([2]), search_target) is also true true_task = [0,1] self._check_result(env, test_preds, infos, true_task) # Teleport agent 3 to agent 1s position change_agent_pos(env.realm,3,(MS-2,MS-2)) - env.obs = env._compute_observations() - - _, _, _, infos = env.step({}) + _, _, _, _, infos = env.step({}) true_task = [0,1,2,3] self._check_result(env, test_preds, infos, true_task) @@ -255,10 +240,7 @@ def test_occupy_tile(self): BORDER = env.config.MAP_BORDER for ent_id in env.realm.players: change_agent_pos(env.realm,ent_id,(BORDER,BORDER)) - env.obs = env._compute_observations() - - _, _, _, infos = env.step({}) - + _, _, _, _, infos = env.step({}) # all tasks must be false true_task = [] self._check_result(env, test_preds, infos, true_task) @@ -266,10 +248,7 @@ def test_occupy_tile(self): # teleport agent 1 to the target tile, agent 2 to the adjacent tile change_agent_pos(env.realm,1,target_tile) change_agent_pos(env.realm,2,(target_tile[0],target_tile[1]-1)) - env.obs = env._compute_observations() - - _, _, _, infos = env.step({}) - + _, _, _, _, infos = env.step({}) # tid 0 and 1 should be true: OccupyTile(Group([1]), *target_tile) # & OccupyTile(Group([1,2,3]), *target_tile) true_task = [0, 1] @@ -292,9 +271,7 @@ def test_distance_traveled(self): # make all tiles habitable env = self._get_taskenv(test_preds, grass_map=True) - - _, _, _, infos = env.step({}) - + _, _, _, _, infos = env.step({}) # one cannot accomplish these goals in the first tick, so all false true_task = [] self._check_result(env, test_preds, infos, true_task) @@ -307,10 +284,7 @@ def test_distance_traveled(self): change_agent_pos(env.realm, ent_id, (spawn_pos[ent_id][0]+2, spawn_pos[ent_id][1])) ent_id = 3 # move 3, fail to reach agent_dist, but reach team_dist if add all change_agent_pos(env.realm, ent_id, (spawn_pos[ent_id][0], spawn_pos[ent_id][1]+3)) - env.obs = env._compute_observations() - - _,_,_, infos = env.step({}) - + _, _, _, _, infos = env.step({}) true_task = [0, 3] self._check_result(env, test_preds, infos, true_task) @@ -344,9 +318,8 @@ def test_all_members_within_range(self): change_agent_pos(env.realm, 2, (MS//2+1, MS//2)) # also StayCloseTo a1 = True change_agent_pos(env.realm, 4, (MS//2+5, MS//2)) change_agent_pos(env.realm, 6, (MS//2+8, MS//2)) - env.obs = env._compute_observations() - _, _, _, infos = env.step({}) + _, _, _, _, infos = env.step({}) true_task = [0, 1, 2, 5] self._check_result(env, test_preds, infos, true_task) @@ -379,9 +352,8 @@ def test_attain_skill(self): env.realm.players[2].skills.carving.level.update(goal_level) # AttainSkill(Group([2,4]), Skill.Carving, goal_level, 2) is true env.realm.players[4].skills.carving.level.update(goal_level+2) - env.obs = env._compute_observations() - _, _, _, infos = env.step({}) + _, _, _, _, infos = env.step({}) true_task = [2, 3, 5] self._check_result(env, test_preds, infos, true_task) @@ -414,9 +386,8 @@ def test_gain_experience(self): env.realm.players[2].skills.carving.exp.update(goal_exp) # AttainSkill(Group([2,4]), Skill.Carving, goal_level, 2) is true env.realm.players[4].skills.carving.exp.update(goal_exp+2) - env.obs = env._compute_observations() - _, _, _, infos = env.step({}) + _, _, _, _, infos = env.step({}) true_task = [2, 3, 5] self._check_result(env, test_preds, infos, true_task) @@ -442,20 +413,14 @@ def test_inventory_space_ge_not(self): # add one items to agent 1 within the limit capacity = env.realm.players[1].inventory.capacity provide_item(env.realm, 1, Item.Ration, level=1, quantity=capacity-target_space) - env.obs = env._compute_observations() - - _, _, _, infos = env.step({}) - + _, _, _, _, infos = env.step({}) self.assertTrue(env.realm.players[1].inventory.space >= target_space) true_task = [0, 1, 2, 4, 6] self._check_result(env, test_preds, infos, true_task) # add one more item to agent 1 provide_item(env.realm, 1, Item.Ration, level=1, quantity=1) - env.obs = env._compute_observations() - - _, _, _, infos = env.step({}) - + _, _, _, _, infos = env.step({}) self.assertTrue(env.realm.players[1].inventory.space < target_space) true_task = [1, 4, 5, 6] self._check_result(env, test_preds, infos, true_task) @@ -508,9 +473,8 @@ def test_own_equip_item(self): for ent_id in [4, 5, 6]: whetstone = env.realm.players[ent_id].inventory.items[0] whetstone.use(env.realm.players[ent_id]) - env.obs = env._compute_observations() - _, _, _, infos = env.step({}) + _, _, _, _, infos = env.step({}) true_task = [2, 3, 6, 7] self._check_result(env, test_preds, infos, true_task) @@ -541,9 +505,8 @@ def test_fully_armed(self): for itm in item_list: env.realm.players[ent_id].inventory.receive(itm) itm.use(env.realm.players[ent_id]) - env.obs = env._compute_observations() - _, _, _, infos = env.step({}) + _, _, _, _, infos = env.step({}) true_task = [1, 3] self._check_result(env, test_preds, infos, true_task) @@ -567,9 +530,8 @@ def test_hoard_gold_and_team(self): # HoardGold, TeamHoardGold gold_struck = [1, 2, 3] for ent_id in gold_struck: env.realm.players[ent_id].gold.update(ent_id * 10) - env.obs = env._compute_observations() - _, _, _, infos = env.step({}) + _, _, _, _, infos = env.step({}) true_task = [0, 2] self._check_result(env, test_preds, infos, true_task) @@ -601,10 +563,7 @@ def test_exchange_gold_predicates(self): # Earn Gold, Spend Gold, Make Profit env.realm.event_log.record(EventCode.EARN_GOLD, players[1], amount = 5) env.realm.event_log.record(EventCode.EARN_GOLD, players[1], amount = 3) env.realm.event_log.record(EventCode.EARN_GOLD, players[2], amount = 2) - - env.obs = env._compute_observations() - _, _, _, infos = env.step({}) - + _, _, _, _, infos = env.step({}) true_task = [0,4,5] self._check_result(env, test_preds, infos, true_task) self._check_progress(env.tasks[1], infos, 2 / gold_goal) @@ -612,9 +571,7 @@ def test_exchange_gold_predicates(self): # Earn Gold, Spend Gold, Make Profit env.realm.event_log.record(EventCode.BUY_ITEM, players[1], item=Item.Ration(env.realm,1), price=5) - env.obs = env._compute_observations() - _, _, _, infos = env.step({}) - + _, _, _, _, infos = env.step({}) true_task = [0,2,4] self._check_result(env, test_preds, infos, true_task) @@ -636,9 +593,7 @@ def test_count_event(self): # CountEvent players = env.realm.players env.realm.event_log.record(EventCode.EAT_FOOD, players[1]) env.realm.event_log.record(EventCode.GIVE_GOLD, players[2]) - env.obs = env._compute_observations() - _, _, _, infos = env.step({}) - + _, _, _, _, infos = env.step({}) true_task = [0,3] self._check_result(env, test_preds, infos, true_task) @@ -655,32 +610,30 @@ def test_score_hit(self): # ScoreHit players = env.realm.players env.realm.event_log.record(EventCode.SCORE_HIT, - players[1], + players[1], target=players[2], combat_style = Skill.Mage, damage=1) env.realm.event_log.record(EventCode.SCORE_HIT, - players[1], + players[1], target=players[2], combat_style = Skill.Melee, damage=1) - env.obs = env._compute_observations() - _, _, _, infos = env.step({}) + _, _, _, _, infos = env.step({}) true_task = [1] self._check_result(env, test_preds, infos, true_task) self._check_progress(env.tasks[0], infos, 0.5) env.realm.event_log.record(EventCode.SCORE_HIT, - players[1], + players[1], target=players[2], combat_style = Skill.Mage, damage=1) env.realm.event_log.record(EventCode.SCORE_HIT, - players[1], + players[1], target=players[2], combat_style = Skill.Melee, damage=1) - env.obs = env._compute_observations() - _, _, _, infos = env.step({}) + _, _, _, _, infos = env.step({}) true_task = [0,1] self._check_result(env, test_preds, infos, true_task) @@ -709,7 +662,7 @@ def test_defeat_entity(self): # PlayerKill # killing player 2 does not progress the both tasks env.realm.event_log.record(EventCode.PLAYER_KILL, players[1], target=players[2]) # level 1 player - _, _, _, infos = env.step({}) + _, _, _, _, infos = env.step({}) true_task = [] # all false self._check_result(env, test_preds, infos, true_task) @@ -719,7 +672,7 @@ def test_defeat_entity(self): # PlayerKill # killing npc -1 completes the first task env.realm.event_log.record(EventCode.PLAYER_KILL, players[1], target=npcs[-1]) # level 1 npc - _, _, _, infos = env.step({}) + _, _, _, _, infos = env.step({}) true_task = [0] self._check_result(env, test_preds, infos, true_task) @@ -728,13 +681,13 @@ def test_defeat_entity(self): # PlayerKill # killing player 3 makes half progress on the second task env.realm.event_log.record(EventCode.PLAYER_KILL, players[1], target=players[3]) # level 3 player - _, _, _, infos = env.step({}) + _, _, _, _, infos = env.step({}) self._check_progress(env.tasks[1], infos, .5) # killing player 4 completes the second task env.realm.event_log.record(EventCode.PLAYER_KILL, players[1], target=players[4]) # level 2 player - _, _, _, infos = env.step({}) + _, _, _, _, infos = env.step({}) true_task = [0,1] self._check_result(env, test_preds, infos, true_task) @@ -765,35 +718,25 @@ def test_item_event_predicates(self): # Consume, Harvest, List, Buy # True case: split the required items between 3 and 5 for player in (1,3): for _ in range(quantity // 2 + 1): - env.realm.event_log.record(id_, - players[player], - price=1, - item=true_item(env.realm, - lvl+random.randint(0,3))) + env.realm.event_log.record(id_, players[player], price=1, + item=true_item(env.realm, lvl+random.randint(0,3))) # False case 1: Quantity for _ in range(quantity-1): - env.realm.event_log.record(id_, - players[2], - price=1, - item=true_item(env.realm, lvl)) + env.realm.event_log.record(id_, players[2], price=1, + item=true_item(env.realm, lvl)) # False case 2: Type for _ in range(quantity+1): - env.realm.event_log.record(id_, - players[4], - price=1, - item=false_item(env.realm, lvl)) + env.realm.event_log.record(id_, players[4], price=1, + item=false_item(env.realm, lvl)) # False case 3: Level for _ in range(quantity+1): - env.realm.event_log.record(id_, - players[4], - price=1, - item=true_item(env.realm, - random.randint(0,lvl-1))) - env.obs = env._compute_observations() - _, _, _, infos = env.step({}) + env.realm.event_log.record(id_, players[4], price=1, + item=true_item(env.realm, random.randint(0,lvl-1))) + + _, _, _, _, infos = env.step({}) true_task = [0] self._check_result(env, test_preds, infos, true_task) diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index 1156c69e0..ed3527f03 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -9,7 +9,6 @@ from nmmo.task.task_api import Task, OngoingTask, HoldDurationTask from nmmo.task.task_spec import TaskSpec, make_task_from_spec from nmmo.task.group import Group -from nmmo.task.constraint import ScalarConstraint from nmmo.task.base_predicates import ( TickGE, AllMembersWithinRange, StayAlive, HoardGold ) @@ -137,14 +136,6 @@ def test_predicate_name(self): "(OR_(AND_(Success_(0,2))_(NOT_(OR_(Failure_(0,))_(Fake_(2,)_1_Hat_Melee))))_"+\ "(SUB_(ADD_(MUL_(Failure_(0,))_(Fake_(2,)_1_Hat_Melee))_0.3)_0.4))") - def test_constraint(self): - mock_gs = MockGameState() - scalar = ScalarConstraint(low=-10,high=10) - for _ in range(10): - self.assertTrue(scalar.sample(mock_gs.config)<10) - self.assertTrue(scalar.sample(mock_gs.config)>=-10) - - def test_task_api_with_predicate(self): # pylint: disable=no-value-for-parameter,no-member fake_pred_cls = make_predicate(Fake) @@ -218,8 +209,8 @@ def PracticeFormation(gs, subject, dist, num_tick): team_ids= list(teams.keys()) config = ScriptedAgentTestConfig() - config.PLAYERS =[Sleeper] - config.IMMORTAL = True + config.set("PLAYERS", [Sleeper]) + config.set("IMMORTAL", True) env = Env(config) env.reset(make_task_fn=lambda: make_task_from_spec(teams, [task_spec])) @@ -249,10 +240,10 @@ def PracticeFormation(gs, subject, dist, num_tick): env.realm.players[1].pos) for tick in range(goal_tick+2): - _, rewards, _, infos = env.step({}) + _, rewards, _, _, infos = env.step({}) if tick < 10: - target_reward = 1.0 if env.realm.tick == goal_tick else 1/goal_tick + target_reward = 1/goal_tick self.assertAlmostEqual(rewards[1], target_reward) self.assertAlmostEqual((1+tick)/goal_tick, infos[1]["task"][env.tasks[0].name]["progress"]) @@ -292,7 +283,7 @@ def PracticeFormation(gs, subject, dist, num_tick): def test_completed_tasks_in_info(self): # pylint: disable=no-value-for-parameter,no-member config = ScriptedAgentTestConfig() - config.ALLOW_MULTI_TASKS_PER_AGENT = True + config.set("ALLOW_MULTI_TASKS_PER_AGENT", True) env = Env(config) # make predicate class from function @@ -313,7 +304,7 @@ def test_completed_tasks_in_info(self): # tasks are all instantiated with the agent ids env.reset(make_task_fn=lambda: test_tasks) - _, _, _, infos = env.step({}) + _, _, _, _, infos = env.step({}) # agent 1: assigned only task 1, which is always True self.assertEqual(infos[1]["task"][env.tasks[0].name]["reward"], 1.0) diff --git a/tests/task/test_task_system_perf.py b/tests/task/test_task_system_perf.py index 978356964..36973de31 100644 --- a/tests/task/test_task_system_perf.py +++ b/tests/task/test_task_system_perf.py @@ -27,7 +27,7 @@ def create_stay_alive_eval_wo_group(agent_id: int): # check tasks for agent_id in agent_list: if test_mode is None: - self.assertTrue('StayAlive' in tasks[agent_id-1].name) # default task + self.assertTrue('TickGE' in tasks[agent_id-1].name) # default task if test_mode != 'no_task': self.assertTrue(f'assignee:({agent_id},)' in tasks[agent_id-1].name) diff --git a/tests/test_death_fog.py b/tests/test_death_fog.py new file mode 100644 index 000000000..941a54d0c --- /dev/null +++ b/tests/test_death_fog.py @@ -0,0 +1,45 @@ +# pylint: disable=protected-access, no-member +import unittest +import nmmo + + +class TestDeathFog(unittest.TestCase): + def test_death_fog(self): + config = nmmo.config.Default() + config.set("DEATH_FOG_ONSET", 3) + config.set("DEATH_FOG_SPEED", 1/2) + config.set("DEATH_FOG_FINAL_SIZE", 16) + config.set("PROVIDE_DEATH_FOG_OBS", True) + + env = nmmo.Env(config) + env.reset() + + # check the initial fog map + border = config.MAP_BORDER + other_border = config.MAP_SIZE - config.MAP_BORDER - 1 + center = config.MAP_SIZE // 2 + safe = config.DEATH_FOG_FINAL_SIZE + self.assertEqual(env.realm.fog_map[border,border], 0) + self.assertEqual(env.realm.fog_map[other_border,other_border], 0) + self.assertEqual(env.realm.fog_map[border+1,border+1], -1) + + # Safe area should be marked with the negative map size + self.assertEqual(env.realm.fog_map[center-safe,center-safe], -config.MAP_SIZE) + self.assertEqual(env.realm.fog_map[center+safe-1,center+safe-1], -config.MAP_SIZE) + + for _ in range(config.DEATH_FOG_ONSET): + env.step({}) + + # check the fog map after the death fog onset + self.assertEqual(env.realm.fog_map[border,border], config.DEATH_FOG_SPEED) + self.assertEqual(env.realm.fog_map[border+1,border+1], -1 + config.DEATH_FOG_SPEED) + + for _ in range(3): + env.step({}) + + # check the fog map after 3 ticks after the death fog onset + self.assertEqual(env.realm.fog_map[border,border], config.DEATH_FOG_SPEED*4) + self.assertEqual(env.realm.fog_map[border+1,border+1], -1 + config.DEATH_FOG_SPEED*4) + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_determinism.py b/tests/test_determinism.py index fdfcfab90..84247a85d 100644 --- a/tests/test_determinism.py +++ b/tests/test_determinism.py @@ -23,16 +23,16 @@ def test_np_random_get_direction(self): # also test get_direction, which was added for speed optimization self.assertTrue(np.array_equal(np_random_1._dir_seq, np_random_2._dir_seq)) - print('---test_np_random_get_direction---') - print('np_random.integers():', timeit(lambda: np_random_1.integers(0,4), + print("---test_np_random_get_direction---") + print("np_random.integers():", timeit(lambda: np_random_1.integers(0,4), number=100000, globals=globals())) - print('np_random.get_direction():', timeit(lambda: np_random_1.get_direction(), + print("np_random.get_direction():", timeit(lambda: np_random_1.get_direction(), number=100000, globals=globals())) def test_map_determinism(self): config = nmmo.config.Default() - config.MAP_FORCE_GENERATION = True - config.TERRAIN_FLIP_SEED = False + config.set("MAP_FORCE_GENERATION", True) + config.set("TERRAIN_FLIP_SEED", False) map_generator = config.MAP_GENERATOR(config) np_random1, _ = seeding.np_random(RANDOM_SEED) @@ -46,8 +46,8 @@ def test_map_determinism(self): # test flip seed config2 = nmmo.config.Default() - config2.MAP_FORCE_GENERATION = True - config2.TERRAIN_FLIP_SEED = True + config2.set("MAP_FORCE_GENERATION", True) + config2.set("TERRAIN_FLIP_SEED", True) map_generator2 = config2.MAP_GENERATOR(config2) np_random2, _ = seeding.np_random(RANDOM_SEED) @@ -61,20 +61,20 @@ def test_env_level_rng(self): # config to always generate new maps, to test map determinism config1 = ScriptedAgentTestConfig() - setattr(config1, 'MAP_FORCE_GENERATION', True) - setattr(config1, 'PATH_MAPS', 'maps/det1') - setattr(config1, 'RESOURCE_RESILIENT_POPULATION', 0.2) # uses np_random + config1.set("MAP_FORCE_GENERATION", True) + config1.set("PATH_MAPS", "maps/det1") + config1.set("RESOURCE_RESILIENT_POPULATION", 0.2) # uses np_random config2 = ScriptedAgentTestConfig() - setattr(config2, 'MAP_FORCE_GENERATION', True) - setattr(config2, 'PATH_MAPS', 'maps/det2') - setattr(config2, 'RESOURCE_RESILIENT_POPULATION', 0.2) + config2.set("MAP_FORCE_GENERATION", True) + config2.set("PATH_MAPS", "maps/det2") + config2.set("RESOURCE_RESILIENT_POPULATION", 0.2) # to create the same maps, seed must be provided env1 = ScriptedAgentTestEnv(config1, seed=RANDOM_SEED) env2 = ScriptedAgentTestEnv(config2, seed=RANDOM_SEED) envs = [env1, env2] - init_obs = [env.reset(seed=RANDOM_SEED+1) for env in envs] + init_obs = [env.reset(seed=RANDOM_SEED+1)[0] for env in envs] self.assertTrue(observations_are_equal(init_obs[0], init_obs[0])) # sanity check self.assertTrue(observations_are_equal(init_obs[0], init_obs[1]), @@ -91,5 +91,5 @@ def test_env_level_rng(self): f"The multi-env determinism failed. Seed: {RANDOM_SEED}.") -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_eventlog.py b/tests/test_eventlog.py index c6322c52f..8c9e5ce05 100644 --- a/tests/test_eventlog.py +++ b/tests/test_eventlog.py @@ -3,7 +3,7 @@ import nmmo from nmmo.datastore.numpy_datastore import NumpyDatastore from nmmo.lib.event_log import EventState, EventLogger -from nmmo.lib.log import EventCode +from nmmo.lib.event_code import EventCode from nmmo.entity.entity import Entity from nmmo.systems.item import ItemState from nmmo.systems.item import Whetstone, Ration, Hat @@ -18,7 +18,11 @@ def __init__(self): self.datastore.register_object_type("Event", EventState.State.num_attributes) self.datastore.register_object_type("Item", ItemState.State.num_attributes) self.tick = 0 + self.event_log = None + def step(self): + self.tick += 1 + self.event_log.update() class MockEntity(Entity): # pylint: disable=super-init-not-called @@ -39,27 +43,25 @@ class TestEventLog(unittest.TestCase): def test_event_logging(self): mock_realm = MockRealm() - event_log = EventLogger(mock_realm) + mock_realm.event_log = EventLogger(mock_realm) + event_log = mock_realm.event_log - mock_realm.tick = 0 # tick increase to 1 after all actions are processed event_log.record(EventCode.EAT_FOOD, MockEntity(1)) event_log.record(EventCode.DRINK_WATER, MockEntity(2)) event_log.record(EventCode.SCORE_HIT, MockEntity(2), - combat_style=Skill.Melee, damage=50) + target=MockEntity(1), combat_style=Skill.Melee, damage=50) event_log.record(EventCode.PLAYER_KILL, MockEntity(3), target=MockEntity(5, attack_level=5)) - event_log.update() + mock_realm.step() - mock_realm.tick = 1 event_log.record(EventCode.CONSUME_ITEM, MockEntity(4), item=Ration(mock_realm, 8)) event_log.record(EventCode.GIVE_ITEM, MockEntity(4)) event_log.record(EventCode.DESTROY_ITEM, MockEntity(5)) event_log.record(EventCode.HARVEST_ITEM, MockEntity(6), item=Whetstone(mock_realm, 3)) - event_log.update() + mock_realm.step() - mock_realm.tick = 2 event_log.record(EventCode.GIVE_GOLD, MockEntity(7)) event_log.record(EventCode.LIST_ITEM, MockEntity(8), item=Ration(mock_realm, 5), price=11) @@ -67,36 +69,34 @@ def test_event_logging(self): event_log.record(EventCode.BUY_ITEM, MockEntity(10), item=Whetstone(mock_realm, 7), price=21) #event_log.record(EventCode.SPEND_GOLD, env.realm.players[11], amount=25) - event_log.update() + mock_realm.step() - mock_realm.tick = 3 event_log.record(EventCode.LEVEL_UP, MockEntity(12), skill=Skill.Fishing, level=3) - event_log.update() + mock_realm.step() - mock_realm.tick = 4 event_log.record(EventCode.GO_FARTHEST, MockEntity(12), distance=6) event_log.record(EventCode.EQUIP_ITEM, MockEntity(12), item=Hat(mock_realm, 4)) - event_log.update() + mock_realm.step() log_data = [list(row) for row in event_log.get_data()] self.assertListEqual(log_data, [ [1, 1, 1, EventCode.EAT_FOOD, 0, 0, 0, 0, 0], [1, 2, 1, EventCode.DRINK_WATER, 0, 0, 0, 0, 0], - [1, 2, 1, EventCode.SCORE_HIT, 1, 0, 50, 0, 0], + [1, 2, 1, EventCode.SCORE_HIT, 1, 0, 50, 0, 1], [1, 3, 1, EventCode.PLAYER_KILL, 0, 5, 0, 0, 5], - [1, 4, 2, EventCode.CONSUME_ITEM, 16, 8, 1, 0, 0], + [1, 4, 2, EventCode.CONSUME_ITEM, 16, 8, 1, 0, 1], [1, 4, 2, EventCode.GIVE_ITEM, 0, 0, 0, 0, 0], [1, 5, 2, EventCode.DESTROY_ITEM, 0, 0, 0, 0, 0], - [1, 6, 2, EventCode.HARVEST_ITEM, 13, 3, 1, 0, 0], + [1, 6, 2, EventCode.HARVEST_ITEM, 13, 3, 1, 0, 2], [1, 7, 3, EventCode.GIVE_GOLD, 0, 0, 0, 0, 0], - [1, 8, 3, EventCode.LIST_ITEM, 16, 5, 1, 11, 0], + [1, 8, 3, EventCode.LIST_ITEM, 16, 5, 1, 11, 3], [1, 9, 3, EventCode.EARN_GOLD, 0, 0, 0, 15, 0], - [1, 10, 3, EventCode.BUY_ITEM, 13, 7, 1, 21, 0], + [1, 10, 3, EventCode.BUY_ITEM, 13, 7, 1, 21, 4], [1, 12, 4, EventCode.LEVEL_UP, 4, 3, 0, 0, 0], [1, 12, 5, EventCode.GO_FARTHEST, 0, 0, 6, 0, 0], - [1, 12, 5, EventCode.EQUIP_ITEM, 2, 4, 1, 0, 0]]) + [1, 12, 5, EventCode.EQUIP_ITEM, 2, 4, 1, 0, 5]]) log_by_tick = [list(row) for row in event_log.get_data(tick = 4)] self.assertListEqual(log_by_tick, [ @@ -104,14 +104,14 @@ def test_event_logging(self): log_by_event = [list(row) for row in event_log.get_data(event_code = EventCode.CONSUME_ITEM)] self.assertListEqual(log_by_event, [ - [1, 4, 2, EventCode.CONSUME_ITEM, 16, 8, 1, 0, 0]]) + [1, 4, 2, EventCode.CONSUME_ITEM, 16, 8, 1, 0, 1]]) log_by_tick_agent = [list(row) for row in \ event_log.get_data(tick = 5, agents = [12], event_code = EventCode.EQUIP_ITEM)] self.assertListEqual(log_by_tick_agent, [ - [1, 12, 5, EventCode.EQUIP_ITEM, 2, 4, 1, 0, 0]]) + [1, 12, 5, EventCode.EQUIP_ITEM, 2, 4, 1, 0, 5]]) empty_log = event_log.get_data(tick = 10) self.assertTrue(empty_log.shape[0] == 0) diff --git a/tests/test_mini_games.py b/tests/test_mini_games.py new file mode 100644 index 000000000..db9a13a77 --- /dev/null +++ b/tests/test_mini_games.py @@ -0,0 +1,31 @@ +# pylint: disable=protected-access +import unittest +import numpy as np +import nmmo +from nmmo import minigames as mg +from nmmo.lib import team_helper + +TEST_HORIZON = 10 + + +class TestMinigames(unittest.TestCase): + def test_mini_games(self): + config = nmmo.config.Default() + config.set("TEAMS", team_helper.make_teams(config, num_teams=16)) + env = nmmo.Env(config) + + for game_cls in mg.AVAILABLE_GAMES: + game = game_cls(env) + env.reset(game=game) + game.test(env, TEST_HORIZON) + + # Check if the gym_obs is correctly set, on alive agents + for agent_id in env.realm.players: + gym_obs = env.obs[agent_id].to_gym() + self.assertEqual(gym_obs["AgentId"], agent_id) + self.assertEqual(gym_obs["CurrentTick"], env.realm.tick) + self.assertTrue( + np.array_equal(gym_obs["Task"], env.agent_task_map[agent_id][0].embedding)) + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_performance.py b/tests/test_performance.py index ce9051a20..f24bf93e5 100644 --- a/tests/test_performance.py +++ b/tests/test_performance.py @@ -1,7 +1,9 @@ +# pylint: disable=no-member # import time import cProfile import io import pstats +from tqdm import tqdm import nmmo from nmmo.core.config import (NPC, AllGameSystems, Combat, Communication, @@ -25,16 +27,16 @@ def create_config(base, *systems): conf = type(name, systems, {})() - conf.TERRAIN_TRAIN_MAPS = 1 - conf.TERRAIN_EVAL_MAPS = 1 - conf.IMMORTAL = True + conf.set("TERRAIN_TRAIN_MAPS", 1) + conf.set("TERRAIN_EVAL_MAPS", 1) + conf.set("IMMORTAL", True) return conf def benchmark_config(benchmark, base, nent, *systems): conf = create_config(base, *systems) - conf.PLAYER_N = nent - conf.PLAYERS = [baselines.Random] + conf.set("PLAYER_N", nent) + conf.set("PLAYERS", [baselines.Random]) env = nmmo.Env(conf) env.reset() @@ -47,7 +49,7 @@ def test_small_env_creation(benchmark): def test_small_env_reset(benchmark): config = Small() - config.PLAYERS = [baselines.Random] + config.set("PLAYERS", [baselines.Random]) env = nmmo.Env(config) benchmark(lambda: env.reset(map_id=1)) @@ -110,18 +112,24 @@ def test_fps_all_med_100_pop(benchmark): def set_seed_test(): random_seed = 5000 - conf = create_config(Medium, Terrain, Resource, Combat, NPC) - conf.PLAYER_N = 10 - conf.PLAYERS = [baselines.Random] + # conf = create_config(Medium, Terrain, Resource, Combat, NPC, Communication) + # conf.set("PLAYER_N", 7) + # conf.set("PLAYERS", [baselines.Random]) + conf = nmmo.config.Default() + conf.set("TERRAIN_TRAIN_MAPS", 1) + conf.set("TERRAIN_EVAL_MAPS", 1) + conf.set("IMMORTAL", True) + conf.set("NPC_N", 128) + conf.set("USE_CYTHON", True) + conf.set("PROVIDE_DEATH_FOG_OBS", True) env = nmmo.Env(conf) - env.reset(seed=random_seed) - for _ in range(1024): + for _ in tqdm(range(1024)): env.step({}) def set_seed_test_complex(): - tasks = nmmo_default_task(range(128)) + tasks = nmmo_default_task(range(1, 129)) tasks += make_same_task(CountEvent, range(128), pred_kwargs={'event': 'EAT_FOOD', 'N': 10}) tasks += make_same_task(FullyArmed, range(128), @@ -129,14 +137,15 @@ def set_seed_test_complex(): profile_env_step(tasks=tasks) if __name__ == '__main__': + pr = cProfile.Profile() + pr.enable() + #set_seed_test_complex() + set_seed_test() + pr.disable() with open('profile.run','a', encoding="utf-8") as f: - pr = cProfile.Profile() - pr.enable() - set_seed_test_complex() - pr.disable() s = io.StringIO() ps = pstats.Stats(pr,stream=s).sort_stats('tottime') - ps.print_stats() + ps.print_stats(100) f.write(s.getvalue()) ''' diff --git a/tests/test_pettingzoo.py b/tests/test_pettingzoo.py index 3a8d78f30..3bc5a2c55 100644 --- a/tests/test_pettingzoo.py +++ b/tests/test_pettingzoo.py @@ -1,15 +1,17 @@ - +import unittest +from pettingzoo.test import parallel_api_test import nmmo from scripted import baselines -def test_pettingzoo_api(): - config = nmmo.config.Default() - config.PLAYERS = [baselines.Random] - # ensv = nmmo.Env(config) - # TODO: disabled due to Env not implementing the correct PettinZoo step() API - # parallel_api_test(env, num_cycles=1000) +class TestPettingZoo(unittest.TestCase): + def test_pettingzoo_api(self): + config = nmmo.config.Default() + config.set("PLAYERS", [baselines.Random]) + config.set("HORIZON", 290) + env = nmmo.Env(config) + parallel_api_test(env, num_cycles=300) -if __name__ == '__main__': - test_pettingzoo_api() +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_rollout.py b/tests/test_rollout.py index 35f3591f6..515237f3e 100644 --- a/tests/test_rollout.py +++ b/tests/test_rollout.py @@ -1,14 +1,20 @@ import nmmo from scripted.baselines import Random +class SimpleConfig(nmmo.config.Small, nmmo.config.Combat): + pass + def test_rollout(): - config = nmmo.config.Default() - config.PLAYERS = [Random] + config = nmmo.config.Default() # SimpleConfig() + config.set("PLAYERS", [Random]) + config.set("USE_CYTHON", True) env = nmmo.Env(config) env.reset() - for _ in range(128): + for _ in range(64): env.step({}) + env.reset() + if __name__ == '__main__': test_rollout() diff --git a/tests/testhelpers.py b/tests/testhelpers.py index 111bf2621..bb128dc90 100644 --- a/tests/testhelpers.py +++ b/tests/testhelpers.py @@ -111,14 +111,7 @@ class ScriptedAgentTestConfig(nmmo.config.Small, nmmo.config.AllGameSystems): __test__ = False - LOG_ENV = True - - LOG_MILESTONES = True - LOG_EVENTS = False - LOG_VERBOSE = False - - PLAYER_DEATH_FOG = 5 - + DEATH_FOG_ONSET = 5 PLAYERS = [ baselines.Fisher, baselines.Herbalist, baselines.Prospector,baselines.Carver, baselines.Alchemist, @@ -178,8 +171,7 @@ def change_spawn_pos(realm: Realm, ent_id: int, new_pos): realm.map.tiles[old_pos].remove_entity(ent_id) # set to new pos - entity.row.update(new_pos[0]) - entity.col.update(new_pos[1]) + entity.set_pos(*new_pos) entity.spawn_pos = new_pos realm.map.tiles[new_pos].add_entity(entity) @@ -197,10 +189,10 @@ class ScriptedTestTemplate(unittest.TestCase): def setUpClass(cls): # only use Combat agents cls.config = ScriptedAgentTestConfig() - cls.config.PROVIDE_ACTION_TARGETS = True + cls.config.set("PROVIDE_ACTION_TARGETS", True) - cls.config.PLAYERS = [baselines.Melee, baselines.Range, baselines.Mage] - cls.config.PLAYER_N = 3 + cls.config.set("PLAYERS", [baselines.Melee, baselines.Range, baselines.Mage]) + cls.config.set("PLAYER_N", 3) #cls.config.IMMORTAL = True # set up agents to test ammo use @@ -212,6 +204,7 @@ def setUpClass(cls): # items to provide cls.init_gold = 5 + # TODO: there should not be level 0 items cls.item_level = [0, 3] # 0 can be used, 3 cannot be used cls.item_sig = {} @@ -230,7 +223,7 @@ def _setup_env(self, random_seed, check_assert=True, remove_immunity=False): config = deepcopy(self.config) if remove_immunity: - config.COMBAT_SPAWN_IMMUNITY = 0 + config.set("COMBAT_SPAWN_IMMUNITY", 0) env = ScriptedAgentTestEnv(config, seed=random_seed) env.reset() @@ -261,7 +254,7 @@ def _setup_env(self, random_seed, check_assert=True, remove_immunity=False): tile.material_id.update(Material.Grass.index) tile.state = Material.Grass(env.config) - env.obs = env._compute_observations() + env._compute_observations() if check_assert: self._check_default_asserts(env) @@ -385,9 +378,9 @@ def _check_assert_make_action(self, env, atn, test_cond): # pylint: disable=unnecessary-lambda,bad-builtin def profile_env_step(action_target=True, tasks=None, condition=None): config = nmmo.config.Default() - config.PLAYERS = [baselines.Sleeper] # the scripted agents doing nothing - config.IMMORTAL = True # otherwise the agents will die - config.PROVIDE_ACTION_TARGETS = action_target + config.set("PLAYERS", [baselines.Sleeper]) # the scripted agents doing nothing + config.set("IMMORTAL", True) # otherwise the agents will die + config.set("PROVIDE_ACTION_TARGETS", action_target) env = nmmo.Env(config, seed=0) if tasks is None: tasks = [] @@ -395,7 +388,7 @@ def profile_env_step(action_target=True, tasks=None, condition=None): for _ in range(3): env.step({}) - env.obs = env._compute_observations() + env._compute_observations() obs = deepcopy(env.obs) test_func = [ diff --git a/utils/pre-git-check.sh b/utils/pre-git-check.sh index 39131a200..e8fbf5cfc 100755 --- a/utils/pre-git-check.sh +++ b/utils/pre-git-check.sh @@ -28,7 +28,7 @@ for file in $files; do fi done -if ! pylint --jobs=$cores --recursive=y nmmo tests; then +if ! pylint --recursive=y nmmo tests; then echo "Lint failed. Exiting." exit 1 fi