From 2a13ae6eee7382b1137c9af9d6c884215444d8bc Mon Sep 17 00:00:00 2001 From: Chris Makin Date: Mon, 1 Jul 2024 12:45:56 -0400 Subject: [PATCH 1/3] [LoginNodes] Modify cli to add custom action configuration parameters to login nodes (#6319) * Add CustomActions configuration for login nodes - Added schema for login nodes custom actions - Added CustomAction to LoginNodesPool resource in cluster_config.py Signed-off-by: Chris Makin * Add login node Pool Name to dna_json * Add DescribeStack action to LoginNodesIamResources This change allows custom actions OnNodeUpdated to be performed for login nodes, which requires a check of the cluster stack status in custom_action_executor.py. * Modify integration test for custom actions on Login Nodes - Modified test_essential_features integration test to account for custom actions on login nodes. * Update unit tests to include login node custom action changes - Modified login node dna_json unit test to include pool_name - Ran tox autoformatter * Merge head and login nodes custom action schema unit test - Update changelog - Fix DescribeStacks resource error in unit test --------- Signed-off-by: Chris Makin --- CHANGELOG.md | 7 +++++ cli/src/pcluster/config/cluster_config.py | 2 ++ cli/src/pcluster/schemas/cluster_schema.py | 14 ++++++++++ .../pcluster/templates/cdk_builder_utils.py | 11 +++++++- .../pcluster/templates/login_nodes_stack.py | 1 + .../pcluster/example_configs/slurm.full.yaml | 16 ++++++++++++ .../pcluster/schemas/test_cluster_schema.py | 11 +++++--- .../pcluster/templates/test_cluster_stack.py | 22 +++++++++++++--- .../slurm.full.all_resources.yaml | 20 ++++++++++++++ .../slurm.full_config.snapshot.yaml | 13 ++++++++++ .../test_login_nodes_dna_json/dna-1.json | 1 + .../test_login_nodes_dna_json/dna-2.json | 1 + .../tests/basic/test_essential_features.py | 8 +++++- .../pcluster.config.yaml | 26 ++++++++++++++++++- tests/integration-tests/utils.py | 8 ++++-- 15 files changed, 149 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a4530cc5fa..0163ad6820 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,13 @@ CHANGELOG ========= +3.11.0 +------ + +**ENHANCEMENTS** + +- Add support for custom actions on login nodes. + 3.10.0 ------ diff --git a/cli/src/pcluster/config/cluster_config.py b/cli/src/pcluster/config/cluster_config.py index 796c22d44d..20cf64f282 100644 --- a/cli/src/pcluster/config/cluster_config.py +++ b/cli/src/pcluster/config/cluster_config.py @@ -1346,6 +1346,7 @@ def __init__( networking: LoginNodesNetworking = None, count: int = None, ssh: LoginNodesSsh = None, + custom_actions: CustomActions = None, iam: LoginNodesIam = None, gracetime_period: int = None, **kwargs, @@ -1357,6 +1358,7 @@ def __init__( self.networking = networking self.count = Resource.init_param(count, default=1) self.ssh = ssh + self.custom_actions = custom_actions self.iam = iam or LoginNodesIam(implied=True) self.gracetime_period = Resource.init_param(gracetime_period, default=10) diff --git a/cli/src/pcluster/schemas/cluster_schema.py b/cli/src/pcluster/schemas/cluster_schema.py index d2425fe110..8f3eec5930 100644 --- a/cli/src/pcluster/schemas/cluster_schema.py +++ b/cli/src/pcluster/schemas/cluster_schema.py @@ -1315,6 +1315,19 @@ def make_resource(self, data, **kwargs): return CustomActions(**data) +class LoginNodesCustomActionsSchema(BaseSchema): + """Represent the schema for all available custom actions in a login node pool.""" + + on_node_start = OneOrManyCustomActionField(metadata={"update_policy": UpdatePolicy.UNSUPPORTED}) + on_node_configured = OneOrManyCustomActionField(metadata={"update_policy": UpdatePolicy.UNSUPPORTED}) + on_node_updated = OneOrManyCustomActionField(metadata={"update_policy": UpdatePolicy.UNSUPPORTED}) + + @post_load + def make_resource(self, data, **kwargs): + """Generate resource.""" + return CustomActions(**data) + + class InstanceTypeSchema(BaseSchema): """Schema of a compute resource that supports a pool of instance types.""" @@ -1422,6 +1435,7 @@ class LoginNodesPoolSchema(BaseSchema): metadata={"update_policy": UpdatePolicy.SUPPORTED}, ) ssh = fields.Nested(LoginNodesSshSchema, metadata={"update_policy": UpdatePolicy.LOGIN_NODES_STOP}) + custom_actions = fields.Nested(LoginNodesCustomActionsSchema, metadata={"update_policy": UpdatePolicy.IGNORED}) iam = fields.Nested(LoginNodesIamSchema, metadata={"update_policy": UpdatePolicy.LOGIN_NODES_STOP}) gracetime_period = fields.Int( validate=validate.Range( diff --git a/cli/src/pcluster/templates/cdk_builder_utils.py b/cli/src/pcluster/templates/cdk_builder_utils.py index 9b65f9ac05..3d1484433d 100644 --- a/cli/src/pcluster/templates/cdk_builder_utils.py +++ b/cli/src/pcluster/templates/cdk_builder_utils.py @@ -934,9 +934,18 @@ def _build_policy(self) -> List[iam.PolicyStatement]: sid="CloudFormation", actions=[ "cloudformation:DescribeStackResource", + "cloudformation:DescribeStacks", ], effect=iam.Effect.ALLOW, - resources=[core.Aws.STACK_ID], + resources=[ + self._format_arn( + service="cloudformation", + resource=f"stack/{Stack.of(self).stack_name}/*", + region=Stack.of(self).region, + account=Stack.of(self).account, + ), + core.Aws.STACK_ID, + ], ), iam.PolicyStatement( sid="DynamoDBTable", diff --git a/cli/src/pcluster/templates/login_nodes_stack.py b/cli/src/pcluster/templates/login_nodes_stack.py index 3dff7126ff..41ffab59a9 100644 --- a/cli/src/pcluster/templates/login_nodes_stack.py +++ b/cli/src/pcluster/templates/login_nodes_stack.py @@ -258,6 +258,7 @@ def _add_login_nodes_pool_launch_template(self): "hosted_zone": (str(self._cluster_hosted_zone.ref) if self._cluster_hosted_zone else ""), "log_group_name": self._log_group.log_group_name, "log_rotation_enabled": "true" if self._config.is_log_rotation_enabled else "false", + "pool_name": self._pool.name, "node_type": "LoginNode", "proxy": self._pool.networking.proxy.http_proxy_address if self._pool.networking.proxy else "NONE", "raid_shared_dir": to_comma_separated_string( diff --git a/cli/tests/pcluster/example_configs/slurm.full.yaml b/cli/tests/pcluster/example_configs/slurm.full.yaml index 3fdcd1249d..b45b142d33 100644 --- a/cli/tests/pcluster/example_configs/slurm.full.yaml +++ b/cli/tests/pcluster/example_configs/slurm.full.yaml @@ -17,6 +17,22 @@ LoginNodes: - subnet-12345678 Ssh: KeyName: ec2-key-name + CustomActions: + OnNodeStart: + Script: https://test.tgz # s3:// | https:// + Args: + - arg1 + - arg2 + OnNodeConfigured: + Script: https://test.tgz # s3:// | https:// + Args: + - arg1 + - arg2 + OnNodeUpdated: + Script: https://test.tgz # s3:// | https:// + Args: + - arg1 + - arg2 Iam: InstanceRole: arn:aws:iam::aws:role/LoginNodeRole HeadNode: diff --git a/cli/tests/pcluster/schemas/test_cluster_schema.py b/cli/tests/pcluster/schemas/test_cluster_schema.py index b54ac7c89c..f42311c344 100644 --- a/cli/tests/pcluster/schemas/test_cluster_schema.py +++ b/cli/tests/pcluster/schemas/test_cluster_schema.py @@ -25,6 +25,7 @@ HeadNodeIamSchema, HeadNodeRootVolumeSchema, ImageSchema, + LoginNodesCustomActionsSchema, QueueCustomActionsSchema, QueueIamSchema, QueueTagSchema, @@ -259,14 +260,18 @@ def test_head_node_root_volume_schema(mocker, config_dict, failure_message): ), ], ) -def test_head_node_custom_actions_schema(mocker, config_dict, failure_message): +def test_head_login_node_custom_actions_schema(mocker, config_dict, failure_message): mock_aws_api(mocker) if failure_message: with pytest.raises(ValidationError, match=failure_message): HeadNodeCustomActionsSchema().load(config_dict) + LoginNodesCustomActionsSchema().load(config_dict) else: - conf = HeadNodeCustomActionsSchema().load(config_dict) - HeadNodeCustomActionsSchema().dump(conf) + head_conf = HeadNodeCustomActionsSchema().load(config_dict) + login_conf = LoginNodesCustomActionsSchema().load(config_dict) + + HeadNodeCustomActionsSchema().dump(head_conf) + LoginNodesCustomActionsSchema().dump(login_conf) @pytest.mark.parametrize( diff --git a/cli/tests/pcluster/templates/test_cluster_stack.py b/cli/tests/pcluster/templates/test_cluster_stack.py index 13f2fb8ecb..52b5b83fc7 100644 --- a/cli/tests/pcluster/templates/test_cluster_stack.py +++ b/cli/tests/pcluster/templates/test_cluster_stack.py @@ -756,11 +756,25 @@ def assert_iam_policy_properties(self, template, resource_name: str): "Sid": "Autoscaling", }, { - "Action": "cloudformation:DescribeStackResource", + "Action": ["cloudformation:DescribeStackResource", "cloudformation:DescribeStacks"], "Effect": "Allow", - "Resource": { - "Ref": "AWS::StackId", - }, + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + {"Ref": "AWS::Partition"}, + ":cloudformation:", + {"Ref": "AWS::Region"}, + ":", + {"Ref": "AWS::AccountId"}, + ":stack/clustername/*", + ], + ] + }, + {"Ref": "AWS::StackId"}, + ], "Sid": "CloudFormation", }, { diff --git a/cli/tests/pcluster/templates/test_cluster_stack/test_cluster_config_limits/slurm.full.all_resources.yaml b/cli/tests/pcluster/templates/test_cluster_stack/test_cluster_config_limits/slurm.full.all_resources.yaml index c9515dd97c..6afe37bd6c 100644 --- a/cli/tests/pcluster/templates/test_cluster_stack/test_cluster_config_limits/slurm.full.all_resources.yaml +++ b/cli/tests/pcluster/templates/test_cluster_stack/test_cluster_config_limits/slurm.full.all_resources.yaml @@ -20,6 +20,26 @@ LoginNodes: HttpProxyAddress: https://proxy-address:port Ssh: KeyName: validate_key_name + CustomActions: + OnNodeStart: + Script: https://test.tgz + Args: + # Number of args doesn't impact number of resources, just the size of the template + {% for i in range(number_of_script_args) %} + - arg{{ i }} + {% endfor %} + OnNodeConfigured: + Script: https://test.tgz + Args: + {% for i in range(number_of_script_args) %} + - arg{{ i }} + {% endfor %} + OnNodeUpdated: + Script: https://test.tgz + Args: + {% for i in range(number_of_script_args) %} + - arg{{ i }} + {% endfor %} Iam: AdditionalIamPolicies: - Policy: arn:aws:iam::aws:policy/AdministratorAccess diff --git a/cli/tests/pcluster/templates/test_cluster_stack/test_cluster_config_limits/slurm.full_config.snapshot.yaml b/cli/tests/pcluster/templates/test_cluster_stack/test_cluster_config_limits/slurm.full_config.snapshot.yaml index 93b58d081d..bf39a224b5 100644 --- a/cli/tests/pcluster/templates/test_cluster_stack/test_cluster_config_limits/slurm.full_config.snapshot.yaml +++ b/cli/tests/pcluster/templates/test_cluster_stack/test_cluster_config_limits/slurm.full_config.snapshot.yaml @@ -87,6 +87,19 @@ Imds: LoginNodes: Pools: - Count: 1 + CustomActions: + OnNodeConfigured: + Args: + - arg0 + Script: https://test.tgz + OnNodeStart: + Args: + - arg0 + Script: https://test.tgz + OnNodeUpdated: + Args: + - arg0 + Script: https://test.tgz GracetimePeriod: 10 Iam: AdditionalIamPolicies: diff --git a/cli/tests/pcluster/templates/test_login_nodes_stack/test_login_nodes_dna_json/dna-1.json b/cli/tests/pcluster/templates/test_login_nodes_stack/test_login_nodes_dna_json/dna-1.json index 08a4950198..f1b89346fd 100644 --- a/cli/tests/pcluster/templates/test_login_nodes_stack/test_login_nodes_dna_json/dna-1.json +++ b/cli/tests/pcluster/templates/test_login_nodes_stack/test_login_nodes_dna_json/dna-1.json @@ -34,6 +34,7 @@ "hosted_zone": "{\"Ref\": \"referencetoclusternameRoute53HostedZone2388733DRef\"}", "log_group_name": "/aws/parallelcluster/clustername-202401151530", "log_rotation_enabled": "true", + "pool_name": "login", "node_type": "LoginNode", "proxy": "NONE", "raid_shared_dir": "", diff --git a/cli/tests/pcluster/templates/test_login_nodes_stack/test_login_nodes_dna_json/dna-2.json b/cli/tests/pcluster/templates/test_login_nodes_stack/test_login_nodes_dna_json/dna-2.json index 0c752eb2f5..3899f16910 100644 --- a/cli/tests/pcluster/templates/test_login_nodes_stack/test_login_nodes_dna_json/dna-2.json +++ b/cli/tests/pcluster/templates/test_login_nodes_stack/test_login_nodes_dna_json/dna-2.json @@ -34,6 +34,7 @@ "hosted_zone": "{\"Ref\": \"referencetoclusternameRoute53HostedZone2388733DRef\"}", "log_group_name": "/aws/parallelcluster/clustername-202401151530", "log_rotation_enabled": "true", + "pool_name": "login", "node_type": "LoginNode", "proxy": "NONE", "raid_shared_dir": "", diff --git a/tests/integration-tests/tests/basic/test_essential_features.py b/tests/integration-tests/tests/basic/test_essential_features.py index ff48075a36..11069e7a4c 100644 --- a/tests/integration-tests/tests/basic/test_essential_features.py +++ b/tests/integration-tests/tests/basic/test_essential_features.py @@ -321,7 +321,13 @@ def _test_custom_bootstrap_scripts_args_quotes(cluster): The cluster should be created and running. """ # Check head node and compute node status - check_status(cluster, "CREATE_COMPLETE", head_node_status="running", compute_fleet_status="RUNNING") + check_status( + cluster, + "CREATE_COMPLETE", + head_node_status="running", + compute_fleet_status="RUNNING", + login_nodes_status="active", + ) def _test_disable_hyperthreading( diff --git a/tests/integration-tests/tests/basic/test_essential_features/test_essential_features/pcluster.config.yaml b/tests/integration-tests/tests/basic/test_essential_features/test_essential_features/pcluster.config.yaml index 1f76b1d2db..9823f2f9da 100644 --- a/tests/integration-tests/tests/basic/test_essential_features/test_essential_features/pcluster.config.yaml +++ b/tests/integration-tests/tests/basic/test_essential_features/test_essential_features/pcluster.config.yaml @@ -28,6 +28,30 @@ HeadNode: Dcv: Enabled: True {% endif %} +LoginNodes: + Pools: + - Name: pool + InstanceType: {{ instance }} + Count: 3 + Networking: + SubnetIds: + - {{ private_subnet_id }} + CustomActions: + OnNodeStart: + Script: s3://{{ bucket_name }}/scripts/pre_install.sh + Args: + - "R curl wget" + - arg2 + - 'arg3 arg3' + OnNodeConfigured: + Script: s3://{{ bucket_name }}/scripts/post_install.sh + Args: + - "R curl wget" + - arg2 + - 'arg3 arg3' + Iam: + AdditionalIamPolicies: + - Policy: "arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess" Scheduling: Scheduler: {{ scheduler }} {% if scheduler == "awsbatch" %}AwsBatchQueues:{% else %}SlurmQueues:{% endif %} @@ -130,4 +154,4 @@ Monitoring: SharedStorage: - MountDir: /shared Name: name1 - StorageType: Ebs \ No newline at end of file + StorageType: Ebs diff --git a/tests/integration-tests/utils.py b/tests/integration-tests/utils.py index 5bf5210539..1b567caca4 100644 --- a/tests/integration-tests/utils.py +++ b/tests/integration-tests/utils.py @@ -563,8 +563,10 @@ def check_head_node_security_group(region, cluster, port, expected_cidr): assert_that(target["IpRanges"][0]["CidrIp"]).is_equal_to(expected_cidr) -def check_status(cluster, cluster_status=None, head_node_status=None, compute_fleet_status=None): - """Check the cluster's status and its head and compute status is as expected.""" +def check_status( + cluster, cluster_status=None, head_node_status=None, compute_fleet_status=None, login_nodes_status=None +): + """Check the cluster's status and its head, compute, and login nodes statuses are as expected.""" cluster_info = cluster.describe_cluster() if cluster_status: assert_that(cluster_info["clusterStatus"]).is_equal_to(cluster_status) @@ -572,6 +574,8 @@ def check_status(cluster, cluster_status=None, head_node_status=None, compute_fl assert_that(cluster_info["headNode"]["state"]).is_equal_to(head_node_status) if compute_fleet_status: assert_that(cluster_info["computeFleetStatus"]).is_equal_to(compute_fleet_status) + if login_nodes_status: + assert_that(cluster_info["loginNodes"]["status"]).is_equal_to(login_nodes_status) @retry(wait_fixed=seconds(20), stop_max_delay=minutes(5)) From 1468430964e2ac2019a3a7245d398f51b8bb7182 Mon Sep 17 00:00:00 2001 From: Hanwen Date: Mon, 1 Jul 2024 07:13:09 -0700 Subject: [PATCH 2/3] Add Github Manual workflow to bump version Signed-off-by: Hanwen --- .github/workflows/bump_version.yml | 44 ++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 .github/workflows/bump_version.yml diff --git a/.github/workflows/bump_version.yml b/.github/workflows/bump_version.yml new file mode 100644 index 0000000000..98a581a668 --- /dev/null +++ b/.github/workflows/bump_version.yml @@ -0,0 +1,44 @@ +# Bump Version workflow that is triggered manually +name: Bump Version + +on: + workflow_dispatch: + # Inputs the workflow accepts. + inputs: + pcluster-version: + description: 'The target version of ParallelCluster CLI' + required: true + type: string + branch: + description: 'The Github branch name' + required: true + type: string + +jobs: + create-pull-requests: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + ref: ${{ inputs.branch }} + - uses: actions/setup-java@v1 + with: + java-version: 11 + - run: | + sudo npm install -g redoc-cli + sudo snap install yq + - name: Modifiy Code to Change version + run: ./util/bump-version.sh --version ${{ inputs.pcluster-version }} + + - name: Create a Pull Request + uses: peter-evans/create-pull-request@v6 + with: + commit-message: 'Bump version to ${{ inputs.pcluster-version }}' + title: 'Bump version to ${{ inputs.pcluster-version }}' + body: | + This PR contains version bump. + Auto-generated by Github Action + branch: versionbump${{ inputs.branch }}${{ inputs.pcluster-version }} + delete-branch: true + labels: skip-changelog-update From 35899aa38aae36e44da45522de41a9b2acc80557 Mon Sep 17 00:00:00 2001 From: Hanwen Date: Mon, 1 Jul 2024 07:22:05 -0700 Subject: [PATCH 3/3] Add Github Manual workflow to bump version for AWS Batch CLI Signed-off-by: Hanwen --- .../workflows/bump_version_awsbatch_cli.yml | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 .github/workflows/bump_version_awsbatch_cli.yml diff --git a/.github/workflows/bump_version_awsbatch_cli.yml b/.github/workflows/bump_version_awsbatch_cli.yml new file mode 100644 index 0000000000..526c3cdde5 --- /dev/null +++ b/.github/workflows/bump_version_awsbatch_cli.yml @@ -0,0 +1,38 @@ +# Bump Version workflow that is triggered manually +name: Bump Version for AWSBatch CLI + +on: + workflow_dispatch: + # Inputs the workflow accepts. + inputs: + awsbatch-cli-version: + description: 'The target version of AWSBatch CLI' + required: true + type: string + branch: + description: 'The Github branch name' + required: true + type: string + +jobs: + create-pull-requests: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + ref: ${{ inputs.branch }} + - name: Modifiy Code to Change version + run: ./util/bump-awsbatch-cli-version.sh ${{ inputs.awsbatch-cli-version }} + + - name: Create a Pull Request + uses: peter-evans/create-pull-request@v6 + with: + commit-message: 'Bump version for awsbatch-cli to ${{ inputs.awsbatch-cli-version }}' + title: 'Bump version for awsbatch-cli to ${{ inputs.awsbatch-cli-version }}' + body: | + This PR contains version bump for awsbatch-cli. + Auto-generated by Github Action + branch: versionbumpbatch${{ inputs.branch }}${{ inputs.awsbatch-cli-version }} + delete-branch: true + labels: skip-changelog-update