Skip to content

Commit

Permalink
Merge branch 'andrew/add-verbose-icos-flag' into 'master'
Browse files Browse the repository at this point in the history
feat(ICSUP-3837): Add verbose flag to help debug NP support issues and add logrotation to host

In cases where networking fails and we don't have logs, node provider support becomes very difficult.

For HostOS networking issues, we can almost always debug the issue from a screenshot of the console of the error message.

And for most GuestOS networking issues, the issue is almost always accompanied by a HostOS networking issue, so we can use that to aid debugging.

Recently though, we've had a [NP support issue](https://matrix.to/#/!jUsknEqNdJobySpXuN:matrix.org/$G3zb444IxJW8rmAKGgkXjhqNBpxb7hYi15cNH60KgG8?via=matrix.org&via=greensteps.cn&via=rudd-o.com) where HostOS networking and logs were fine, but GuestOS networking failed and we couldn't see why without GuestOS logs. But because we do not give console access to node providers, we could not see the GuestOS logs.

This motivated us to add a 'verbose' flag that pipes the GuestOS console to the Host terminal. We don't believe this to be a security risk because we are already giving 3rd party logging access to NPs soon, and access to the GuestOS console itself is still restricted.

This MR also adds logrotation to the HostOS (note: logrotation was already on HostOS, but now it's being explicitly included in the base image and patched to run properly) 

See merge request dfinity-lab/public/ic!20045
  • Loading branch information
andrewbattat committed Jul 3, 2024
2 parents aab2230 + 78e491e commit 6cb282e
Show file tree
Hide file tree
Showing 18 changed files with 113 additions and 17 deletions.
3 changes: 2 additions & 1 deletion ic-os/components/hostos-scripts/guestos/guestos.xml.template
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
<memory unit='GiB'>{{ resources_memory }}</memory>
<currentMemory unit='GiB'>{{ resources_memory }}</currentMemory>
<vcpu placement='static'>64</vcpu>
{{ cpu_spec }}
{{ cpu_spec }}
<resource>
<partition>/machine</partition>
</resource>
Expand Down Expand Up @@ -135,6 +135,7 @@
<model name='isa-serial'/>
</target>
<alias name='serial0'/>
<log file='/var/log/libvirt/qemu/guestos-serial.log' append='off'/>
</serial>
<console type='pty' tty='/dev/pts/3'>
<source path='/dev/pts/3'/>
Expand Down
10 changes: 5 additions & 5 deletions ic-os/components/hostos-scripts/guestos/kvm-cpu.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<cpu mode='host-passthrough' migratable='off'>
<cache mode='passthrough'/>
<topology sockets='2' cores='16' threads='2'/>
<feature policy="require" name="topoext"/>
</cpu>
<cpu mode='host-passthrough' migratable='off'>
<cache mode='passthrough'/>
<topology sockets='2' cores='16' threads='2'/>
<feature policy="require" name="topoext"/>
</cpu>
2 changes: 1 addition & 1 deletion ic-os/components/hostos-scripts/guestos/qemu-cpu.xml
Original file line number Diff line number Diff line change
@@ -1 +1 @@
<cpu mode='host-model' migratable='off'/>
<cpu mode='host-model' migratable='off'/>
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
/var/log/libvirt/qemu/guestos-serial.log {
daily
rotate 7
compress
missingok
notifempty
copytruncate
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[Unit]
Description=If verbose flag enabled, pipe GuestOS console to the Host terminal
Requires=guestos.service
After=guestos.service

[Service]
ExecStart=/opt/ic/bin/verbose-logging.sh
Restart=always

[Install]
WantedBy=multi-user.target
27 changes: 27 additions & 0 deletions ic-os/components/hostos-scripts/verbose-logging/verbose-logging.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/bash

CONFIG="${CONFIG:=/boot/config/config.ini}"

function read_variables() {
# Read limited set of keys. Be extra-careful quoting values as it could
# otherwise lead to executing arbitrary shell code!
while IFS="=" read -r key value; do
case "$key" in
"verbose") verbose="${value}" ;;
esac
done <"${CONFIG}"
}

read_variables

if [[ "${verbose,,}" == "true" ]]; then
echo "##########################################" >/dev/tty1
echo "### STARTING GUESTOS CONSOLE LOGS... ###" >/dev/tty1
echo "##########################################" >/dev/tty1

# log slowly so as not to overwhelm the host terminal
tail -f /var/log/libvirt/qemu/guestos-serial.log | while read -r line; do
echo "$line" >/dev/tty1
sleep 0.075
done
fi
4 changes: 4 additions & 0 deletions ic-os/components/hostos.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ component_files = {
Label("hostos-scripts/monitoring/monitor-power.service"): "/etc/systemd/system/monitor-power.service",
Label("hostos-scripts/monitoring/monitor-power.timer"): "/etc/systemd/system/monitor-power.timer",
Label("hostos-scripts/build-bootstrap-config-image.sh"): "/opt/ic/bin/build-bootstrap-config-image.sh",
Label("hostos-scripts/verbose-logging/verbose-logging.sh"): "/opt/ic/bin/verbose-logging.sh",
Label("hostos-scripts/verbose-logging/verbose-logging.service"): "/etc/systemd/system/verbose-logging.service",
Label("hostos-scripts/verbose-logging/logrotate.d/verbose-logging"): "/etc/logrotate.d/verbose-logging",

# early-boot
Label("early-boot/relabel-machine-id/relabel-machine-id.sh"): "/opt/ic/bin/relabel-machine-id.sh",
Expand Down Expand Up @@ -65,6 +68,7 @@ component_files = {
Label("monitoring/metrics-proxy/hostos/metrics-proxy.yaml"): "/etc/metrics-proxy.yaml",
Label("monitoring/metrics-proxy/metrics-proxy.service"): "/etc/systemd/system/metrics-proxy.service",
Label("monitoring/journald.conf"): "/etc/systemd/journald.conf",
Label("monitoring/logrotate/override.conf"): "/etc/systemd/system/logrotate.service.d/override.conf",

# networking
Label("networking/generate-network-config/hostos/generate-network-config.service"): "/etc/systemd/system/generate-network-config.service",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,8 @@ RequiredBy=setup-ssh-account-keys.service
Type=oneshot
RemainAfterExit=true
ExecStart=/opt/ic/bin/bootstrap-ic-node.sh

# All services that networking depends on log their outputs to the console
# and are piped to the host terminal if the verbose flag is enabled.
StandardOutput=journal+console
StandardError=journal+console
2 changes: 2 additions & 0 deletions ic-os/components/monitoring/logrotate/override.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[Service]
StateDirectory=logrotate
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,8 @@ WantedBy=multi-user.target
Type=oneshot
RemainAfterExit=true
ExecStart=/opt/ic/bin/guestos_tool generate-network-config

# All services that networking depends on log their outputs to the console
# and are piped to the host terminal if the verbose flag is enabled.
StandardOutput=journal+console
StandardError=journal+console
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,8 @@ WantedBy=multi-user.target
Type=oneshot
RemainAfterExit=true
ExecStart=/opt/ic/bin/setup-ssh-account-keys.sh

# All services that networking depends on log their outputs to the console
# and are piped to the host terminal if the verbose flag is enabled.
StandardOutput=journal+console
StandardError=journal+console
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,7 @@ Type=oneshot
RemainAfterExit=true
ExecStart=/opt/ic/bin/upgrade-shared-data-store.sh

# All services that networking depends on log their outputs to the console
# and are piped to the host terminal if the verbose flag is enabled.
StandardOutput=journal+console
StandardError=journal+console
16 changes: 13 additions & 3 deletions ic-os/dev-tools/bare_metal_deployment/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ class Args:
# If present - decompress `upload_img` and inject this into config.ini
inject_image_domain: Optional[str] = None

# If present - decompress `upload_img` and inject this into config.ini
inject_image_verbose: Optional[str] = None

# Path to the setupos-inject-configuration tool. Necessary if any inject* args are present
inject_configuration_tool: Optional[str] = None

Expand Down Expand Up @@ -489,7 +492,8 @@ def inject_config_into_image(setupos_inject_configuration_path: Path,
compressed_image_path: Path,
ipv6_prefix: str,
ipv6_gateway: str,
ipv4_args: Optional[Ipv4Args]) -> Path:
ipv4_args: Optional[Ipv4Args],
verbose: Optional[str]) -> Path:
"""
Transform the compressed image.
* Decompress image into working_dir
Expand Down Expand Up @@ -520,7 +524,11 @@ def is_executable(p: Path) -> bool:
ipv4_part += f"--ipv4-prefix-length {ipv4_args.prefix_length} "
ipv4_part += f"--domain {ipv4_args.domain} "

invoke.run(f"{setupos_inject_configuration_path} {image_part} {prefix_part} {gateway_part} {ipv4_part}", echo=True)
verbose_part = ""
if verbose:
verbose_part = f"--verbose {verbose} "

invoke.run(f"{setupos_inject_configuration_path} {image_part} {prefix_part} {gateway_part} {ipv4_part} {verbose_part}", echo=True)

# Reuse the name of the compressed image path in the working directory
result_filename = compressed_image_path.name
Expand Down Expand Up @@ -576,7 +584,9 @@ def main():
Path(args.upload_img),
args.inject_image_ipv6_prefix,
args.inject_image_ipv6_gateway,
ipv4_args)
ipv4_args,
args.inject_image_verbose
)

upload_to_file_share(
modified_image_path,
Expand Down
1 change: 1 addition & 0 deletions ic-os/dev-tools/bare_metal_deployment/zh2-dll01.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ inject_image_ipv4_address: 212.71.126.49
inject_image_ipv4_gateway: 212.71.126.54
inject_image_ipv4_prefix_length: 29
inject_image_domain: bare-metal-deployment.icp5.io
inject_image_verbose: false
2 changes: 0 additions & 2 deletions ic-os/hostos/context/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,6 @@ RUN systemctl disable \
apt-daily.timer \
apt-daily-upgrade.service \
apt-daily-upgrade.timer \
logrotate.service \
logrotate.timer \
motd-news.service \
motd-news.timer

Expand Down
1 change: 1 addition & 0 deletions ic-os/hostos/context/packages.common
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ sudo
udev
usbutils
zstd
logrotate

# SELinux support
selinux-policy-default
Expand Down
6 changes: 6 additions & 0 deletions ic-os/setupos/config/config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,9 @@ ipv6_gateway=2a00:fb01:400:200::1
# ----------------------------------
# Define the domain name for your node. Ensure proper configuration of A and AAAA records for this domain.
# domain=node1.example.com

# ----------------------------------
# Verbose flag
# ----------------------------------
# Verbose flag is used to log GuestOS console logs to the Host terminal. Only uncomment if directed to by the Troubleshooting Node Deployment Errors wiki page
# verbose=true
18 changes: 13 additions & 5 deletions rs/ic_os/setupos-inject-configuration/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ struct Cli {
image_path: PathBuf,

#[command(flatten)]
network: NetworkConfig,
config_ini: ConfigIni,

#[arg(long)]
private_key_path: Option<PathBuf>,
Expand All @@ -37,7 +37,7 @@ struct Cli {
}

#[derive(Args)]
struct NetworkConfig {
struct ConfigIni {
#[arg(long)]
ipv6_prefix: Option<String>,

Expand All @@ -56,6 +56,9 @@ struct NetworkConfig {
#[arg(long)]
domain: Option<String>,

#[arg(long)]
verbose: Option<String>,

#[arg(long)]
mgmt_mac: Option<String>,
}
Expand Down Expand Up @@ -92,7 +95,7 @@ async fn main() -> Result<(), Error> {

// Update config.ini
let config_ini = NamedTempFile::new()?;
write_config(config_ini.path(), &cli.network)
write_config(config_ini.path(), &cli.config_ini)
.await
.context("failed to write config file")?;
config
Expand Down Expand Up @@ -174,17 +177,18 @@ async fn main() -> Result<(), Error> {
Ok(())
}

async fn write_config(path: &Path, cfg: &NetworkConfig) -> Result<(), Error> {
async fn write_config(path: &Path, cfg: &ConfigIni) -> Result<(), Error> {
let mut f = File::create(path).context("failed to create config file")?;

let NetworkConfig {
let ConfigIni {
ipv6_prefix,
ipv6_gateway,
mgmt_mac,
ipv4_address,
ipv4_gateway,
ipv4_prefix_length,
domain,
verbose,
} = cfg;

if let (Some(ipv6_prefix), Some(ipv6_gateway)) = (ipv6_prefix, ipv6_gateway) {
Expand All @@ -207,6 +211,10 @@ async fn write_config(path: &Path, cfg: &NetworkConfig) -> Result<(), Error> {
writeln!(&mut f, "mgmt_mac={}", mgmt_mac)?;
}

if let Some(verbose) = verbose {
writeln!(&mut f, "verbose={}", verbose)?;
}

Ok(())
}

Expand Down

0 comments on commit 6cb282e

Please sign in to comment.