Skip to content

Commit

Permalink
Improve Scalability
Browse files Browse the repository at this point in the history
  • Loading branch information
hanwen-pcluste committed Aug 28, 2024
1 parent e8e7874 commit 6f75427
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions src/common/schedulers/slurm_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,7 @@ def resume_powering_down_nodes():
# TODO: This function was added due to Slurm ticket 12915. The bug is not reproducible and the ticket was then
# closed. This operation may now be useless: we need to check this.
log.info("Resuming powering down nodes.")
log.info("new helllllllllllllllllll")
powering_down_nodes = _get_slurm_nodes(states="powering_down")
update_nodes(nodes=powering_down_nodes, state="resume", raise_on_error=False)

Expand All @@ -374,7 +375,7 @@ def _get_all_partition_nodes(partition_name, command_timeout=DEFAULT_GET_INFO_CO


def _get_slurm_nodes(states=None, partition_name=None, command_timeout=DEFAULT_GET_INFO_COMMAND_TIMEOUT):
sinfo_command = f"{SINFO} -h -N -o %N"
sinfo_command = f"{SINFO} -h -o %N"
partition_name = partition_name or ",".join(PartitionNodelistMapping.instance().get_partitions())
validate_subprocess_argument(partition_name)
sinfo_command += f" -p {partition_name}"
Expand All @@ -383,7 +384,11 @@ def _get_slurm_nodes(states=None, partition_name=None, command_timeout=DEFAULT_G
sinfo_command += f" -t {states}"
# Every node is print on a separate line
# It's safe to use the function affected by B604 since the command is fully built in this code
return check_command_output(sinfo_command, timeout=command_timeout, shell=True).splitlines() # nosec B604
sinfo_output = check_command_output(sinfo_command, timeout=command_timeout, shell=True).splitlines()
nodes=[]
for line in sinfo_output:
nodes.extend(check_command_output(f"{SCONTROL} show hostnames {line}", timeout=command_timeout, shell=True).splitlines())
return nodes # nosec B604


def _parse_nodes_info(slurm_node_info: str) -> List[SlurmNode]:
Expand Down

0 comments on commit 6f75427

Please sign in to comment.