Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[develop] [POC] Add new DebugLevel DevSetting parameter #5590

Draft
wants to merge 2 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cli/src/pcluster/config/cluster_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1121,6 +1121,7 @@ def __init__(
instance_types_data: str = None,
timeouts: Timeouts = None,
compute_startup_time_metric_enabled: bool = None,
debug_level: str = None,
**kwargs,
):
super().__init__(**kwargs)
Expand All @@ -1131,6 +1132,7 @@ def __init__(
self.compute_startup_time_metric_enabled = Resource.init_param(
compute_startup_time_metric_enabled, default=False
)
self.debug_level = Resource.init_param(debug_level, default="info")

def _register_validators(self, context: ValidatorContext = None):
super()._register_validators(context)
Expand Down
22 changes: 13 additions & 9 deletions cli/src/pcluster/resources/compute_node/user_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ write_files:
"head_node_private_ip": "${HeadNodePrivateIp}",
"directory_service": {
"enabled": "${DirectoryServiceEnabled}"
}
},
"debug_level": "${DebugLevel}"
}
}
- path: /etc/chef/client.rb
Expand All @@ -125,11 +126,14 @@ write_files:
function error_exit
{
echo "Bootstrap failed with error: $1"
# wait logs flush before signaling the failure
sleep 10
# TODO: add possibility to override this behavior and keep the instance for debugging
shutdown -h now
exit 1
if [ "${DebugLevel}" != "info" ]; then
echo "Skipping termination because debug_level is set to ${DebugLevel}"
else
# wait logs flush before signaling the failure
sleep 10
shutdown -h now
exit 1
fi
}
function vendor_cookbook
{
Expand Down Expand Up @@ -214,11 +218,11 @@ write_files:
jq --argfile f1 /tmp/dna.json --argfile f2 /tmp/extra.json -n '$f1 * $f2' > /etc/chef/dna.json || ( echo "jq not installed or invalid extra_json"; cp /tmp/dna.json /etc/chef/dna.json)
{
pushd /etc/chef &&
cinc-client --local-mode --config /etc/chef/client.rb --log_level info --force-formatter --no-color --chef-zero-port 8889 --json-attributes /etc/chef/dna.json --override-runlist aws-parallelcluster-entrypoints::init &&
cinc-client --local-mode --config /etc/chef/client.rb --log_level ${DebugLevel} --force-formatter --no-color --chef-zero-port 8889 --json-attributes /etc/chef/dna.json --override-runlist aws-parallelcluster-entrypoints::init &&
/opt/parallelcluster/scripts/fetch_and_run -preinstall &&
cinc-client --local-mode --config /etc/chef/client.rb --log_level info --force-formatter --no-color --chef-zero-port 8889 --json-attributes /etc/chef/dna.json --override-runlist aws-parallelcluster-entrypoints::config &&
cinc-client --local-mode --config /etc/chef/client.rb --log_level ${DebugLevel} --force-formatter --no-color --chef-zero-port 8889 --json-attributes /etc/chef/dna.json --override-runlist aws-parallelcluster-entrypoints::config &&
/opt/parallelcluster/scripts/fetch_and_run -postinstall &&
cinc-client --local-mode --config /etc/chef/client.rb --log_level info --force-formatter --no-color --chef-zero-port 8889 --json-attributes /etc/chef/dna.json --override-runlist aws-parallelcluster-entrypoints::finalize &&
cinc-client --local-mode --config /etc/chef/client.rb --log_level ${DebugLevel} --force-formatter --no-color --chef-zero-port 8889 --json-attributes /etc/chef/dna.json --override-runlist aws-parallelcluster-entrypoints::finalize &&
popd
} || error_exit 'Failed to run bootstrap recipes. If --norollback was specified, check /var/log/cfn-init.log and /var/log/cloud-init-output.log.'

Expand Down
1 change: 1 addition & 0 deletions cli/src/pcluster/schemas/cluster_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -1070,6 +1070,7 @@ class ClusterDevSettingsSchema(BaseDevSettingsSchema):
instance_types_data = fields.Str(metadata={"update_policy": UpdatePolicy.SUPPORTED})
timeouts = fields.Nested(TimeoutsSchema, metadata={"update_policy": UpdatePolicy.SUPPORTED})
compute_startup_time_metric_enabled = fields.Bool(metadata={"update_policy": UpdatePolicy.SUPPORTED})
debug_level = fields.Str(metadata={"update_policy": UpdatePolicy.SUPPORTED})

@post_load
def make_resource(self, data, **kwargs):
Expand Down
1 change: 1 addition & 0 deletions cli/src/pcluster/templates/cdk_builder_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def get_common_user_data_env(node: Union[HeadNode, SlurmQueue, LoginNodesPool],
"CookbookVersion": COOKBOOK_PACKAGES_VERSIONS["cookbook"],
"ChefVersion": COOKBOOK_PACKAGES_VERSIONS["chef"],
"BerkshelfVersion": COOKBOOK_PACKAGES_VERSIONS["berkshelf"],
"DebugLevel": config.dev_settings.debug_level,
}


Expand Down
12 changes: 8 additions & 4 deletions cli/src/pcluster/templates/cluster_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -1362,7 +1362,8 @@ def _add_head_node(self):
"commands": {
"chef": {
"command": (
"cinc-client --local-mode --config /etc/chef/client.rb --log_level info "
"cinc-client --local-mode --config /etc/chef/client.rb "
f"--log_level {self.config.dev_settings.debug_level} "
"--logfile /var/log/chef-client.log --force-formatter --no-color "
"--chef-zero-port 8889 --json-attributes /etc/chef/dna.json "
"--override-runlist aws-parallelcluster-entrypoints::init"
Expand All @@ -1378,7 +1379,8 @@ def _add_head_node(self):
"commands": {
"chef": {
"command": (
"cinc-client --local-mode --config /etc/chef/client.rb --log_level info "
"cinc-client --local-mode --config /etc/chef/client.rb "
f"--log_level {self.config.dev_settings.debug_level} "
"--logfile /var/log/chef-client.log --force-formatter --no-color "
"--chef-zero-port 8889 --json-attributes /etc/chef/dna.json "
"--override-runlist aws-parallelcluster-entrypoints::config"
Expand All @@ -1394,7 +1396,8 @@ def _add_head_node(self):
"commands": {
"chef": {
"command": (
"cinc-client --local-mode --config /etc/chef/client.rb --log_level info "
"cinc-client --local-mode --config /etc/chef/client.rb "
f"--log_level {self.config.dev_settings.debug_level} "
"--logfile /var/log/chef-client.log --force-formatter --no-color "
"--chef-zero-port 8889 --json-attributes /etc/chef/dna.json "
"--override-runlist aws-parallelcluster-entrypoints::finalize"
Expand All @@ -1414,7 +1417,8 @@ def _add_head_node(self):
"chef": {
"command": (
". /etc/profile.d/pcluster.sh; "
"cinc-client --local-mode --config /etc/chef/client.rb --log_level info"
"cinc-client --local-mode --config /etc/chef/client.rb "
f"--log_level {self.config.dev_settings.debug_level}"
" --logfile /var/log/chef-client.log --force-formatter --no-color"
" --chef-zero-port 8889 --json-attributes /etc/chef/dna.json"
" --override-runlist aws-parallelcluster-entrypoints::update &&"
Expand Down
4 changes: 3 additions & 1 deletion cli/src/pcluster/templates/queues_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,9 @@ def _add_compute_resource_launch_template(
instance_market_options=self._launch_template_builder.get_instance_market_options(
queue, compute_resource
),
instance_initiated_shutdown_behavior="terminate",
instance_initiated_shutdown_behavior="stop"
if self._config.dev_settings.debug_level != "info"
else "terminate",
capacity_reservation_specification=self._launch_template_builder.get_capacity_reservation(
queue,
compute_resource,
Expand Down
Loading