Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for per CPU core monitoring #301

Merged
merged 2 commits into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions flent/runners.py
Original file line number Diff line number Diff line change
Expand Up @@ -2326,9 +2326,12 @@ class CpuStatsRunner(ProcessRunner):
separated by '\n---\n and a timestamp to be present in the form 'Time:
xxxxxx.xxx' (e.g. the output of `date '+Time: %s.%N'`).

The first line is the total CPU load, and the following lines are the load of
each core.
"""

time_re = re.compile(r"^Time: (?P<timestamp>\d+\.\d+)", re.MULTILINE)
value_re = re.compile(r"^\d+ \d+ (?P<load>\d+\.\d+)$", re.MULTILINE)
value_re = re.compile(r"^cpu(?P<core_nr>\d+)?: (?P<load>\d+\.\d+)", re.MULTILINE)

def __init__(self, interval, length, host='localhost', **kwargs):
self.interval = interval
Expand All @@ -2341,8 +2344,6 @@ def parse(self, output, error):
raw_values = []
metadata = {}
for part in self.split_stream(output):
# Split out individual qdisc entries (in case there are more than
# one). If so, discard the root qdisc and sum the rest.
timestamp = self.time_re.search(part)
if timestamp is None:
continue
Expand All @@ -2351,10 +2352,14 @@ def parse(self, output, error):

if value is None:
continue

matches = {}

for k, v in list(value.groupdict().items()):
v = float(v)
for m in self.value_re.finditer(part):
core_nr = m.group("core_nr")
load = m.group("load")
k = f'cpu{core_nr}' if core_nr is not None else 'load'
v = float(load)
if k not in matches:
matches[k] = v
else:
Expand Down
49 changes: 41 additions & 8 deletions flent/scripts/stat_iterate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,50 @@ done

# $5 is IDLE, $6 is IOWAIT; we count both as idle time
command_string=$(cat <<EOF
(for x in \$(seq $count); do date '+Time: %s.%N'; cat /proc/stat; sleep $interval ;done ) | awk 'BEGIN {idle=0; total=0}
\$1 == "cpu" { sum=0; for (i=2;i<=NF;i++) { sum+=\$i };
if(total>0) {print \$5+\$6-idle " " sum-total " " 1-(\$5+\$6-idle)/(sum-total);}
idle=\$5+\$6; total=sum
}
\$1 == "Time:" { print "---\n" \$0 }'
set -o noglob
awk -v COUNT=$count -v INTERVAL=$interval '
function get_cpu_usage(count) {
FS = " ";
IDLE_FIELD = 5;
IOWAIT_FIELD = 6;
PROC_CPU = "/proc/stat";
while ((getline < PROC_CPU) > 0) {
if (\$0 !~ /^cpu/)
break;
cpu_idle_prev[\$1] = cpu_idle[\$1];
cpu_total_prev[\$1] = cpu_total[\$1];
cpu_idle[\$1] = 0;
cpu_total[\$1] = 0;
for (i = 2; i <= NF; i++) {
if (i == IDLE_FIELD || i == IOWAIT_FIELD)
cpu_idle[\$1] += \$i;
cpu_total[\$1] += \$i;
}
idle = cpu_idle[\$1] - cpu_idle_prev[\$1];
total = cpu_total[\$1] - cpu_total_prev[\$1];
cpu_usage = (total != 0) ? (1 - (idle / total)) : 0
if (count)
printf("%s: %f\n", \$1, cpu_usage);
}
close(PROC_CPU);
}

BEGIN {
date_cmd = "date \"+Time: %s.%N\""
for (loop = 0; loop < COUNT; loop++) {
print("---");
(date_cmd) | getline date;
print(date);
close(date_cmd);
get_cpu_usage(loop);
system("sleep " INTERVAL);
}
}'
EOF
)

if [ "$host" == "localhost" ]; then
eval $command_string
eval "$command_string"
else
echo $command_string | ssh $host sh
echo "$command_string" | ssh "$host" sh
fi
21 changes: 21 additions & 0 deletions flent/tests/cpu_stats.inc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ for host in CPU_STATS_HOSTS:

if CPU_STATS_HOSTS:

# Average CPU load
PLOTS['cpu'] = {'description': 'CPU stats',
'type': 'timeseries',
'axis_labels': ['Load'],
Expand All @@ -26,6 +27,7 @@ if CPU_STATS_HOSTS:
PLOTS['cpu_box'] = {'description': 'CPU stats (box plot)',
'type': 'box',
'parent': 'cpu'}

PLOTS['cpu_bar'] = {'description': 'CPU stats (bar plot)',
'type': 'bar',
'parent': 'cpu'}
Expand All @@ -42,3 +44,22 @@ if CPU_STATS_HOSTS:
PLOTS['cpu_bar_combine'] = {'description': 'CPU stats (bar combine plot)',
'type': 'bar_combine',
'parent': 'cpu_box_combine'}


# Per core CPU load
PLOTS['cpu_core'] = {'description': 'Per core CPU stats',
'type': 'timeseries',
'axis_labels': ['Load'],
'series': [
{'data': glob('cpu_stats_*'),
'raw_key': glob('cpu*'),
'label': 'CPU core load'},
]}

PLOTS['cpu_core_box'] = {'description': 'Per core CPU stats (box plot)',
'type': 'box',
'parent': 'cpu_core'}

PLOTS['cpu_core_bar'] = {'description': 'Per core CPU stats (bar plot)',
'type': 'bar',
'parent': 'cpu_core'}
Binary file not shown.
108 changes: 102 additions & 6 deletions unittests/test_plotters.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,97 @@
'ytick.minor.size': 0,
}

# Plots that may fail validation
PLOTS_MAY_FAIL = set(('tcp_cwnd', 'tcp_rtt', 'tcp_rtt_cdf',
'tcp_rtt_box_combine', 'tcp_rtt_bar_combine', 'tcp_pacing',
'all_scaled_delivery', 'tcp_delivery_rate', 'tcp_delivery_with_rtt'))
# Some flent test files intentionally lack plots. This list contains the empty
# plots to ensure that they do not fail. The lack of data is either because the
# test did not run with the required flag or because the file is simply older
# than the feature.
MISSING_PLOTS = {
'test-http-1up.flent.gz': set((
'tcp_cwnd',
'tcp_delivery_rate',
'tcp_pacing',
'tcp_rtt',
'tcp_rtt_bar_combine',
'tcp_rtt_box_combine',
'tcp_rtt_cdf',
)),
'test-http.flent.gz': set((
'tcp_cwnd',
'tcp_delivery_rate',
'tcp_pacing',
'tcp_rtt',
'tcp_rtt_bar_combine',
'tcp_rtt_box_combine',
'tcp_rtt_cdf',
)),
'test-rrul-icmp.flent.gz': set((
'tcp_cwnd',
'tcp_delivery_rate',
'tcp_pacing',
'tcp_rtt',
'tcp_rtt_bar_combine',
'tcp_rtt_box_combine',
'tcp_rtt_cdf',
)),
'test-rrul.flent.gz': set((
'cpu_core',
'cpu_core_bar',
'cpu_core_box',
'tcp_delivery_rate',
'tcp_pacing',
'tcp_rtt_bar_combine',
'tcp_rtt_box_combine',
'tcp_rtt_cdf',
)),
'test-rrul_be-socket_stats.flent.gz': set((
'tcp_rtt_bar_combine',
'tcp_rtt_box_combine',
'tcp_rtt_cdf',
)),
'test-rtt-fair.flent.gz': set((
'tcp_cwnd',
'tcp_delivery_rate',
'tcp_pacing',
'tcp_rtt',
'tcp_rtt_bar_combine',
'tcp_rtt_box_combine',
'tcp_rtt_cdf',
)),
'test-tcp_nup.flent.gz': set((
'tcp_delivery_rate',
'tcp_pacing',
'tcp_rtt_bar_combine',
'tcp_rtt_box_combine',
'tcp_rtt_cdf',
)),
'test-tcp_1up_noping-cpu_stats.flent.gz': set((
'tcp_cwnd',
'tcp_pacing',
'tcp_rtt',
'tcp_rtt_cdf',
'tcp_rtt_box_combine',
'tcp_rtt_bar_combine',
)),
'test-voip-1up.flent.gz': set((
'tcp_cwnd',
'tcp_delivery_rate',
'tcp_pacing',
'tcp_rtt',
'tcp_rtt_bar_combine',
'tcp_rtt_box_combine',
'tcp_rtt_cdf',
)),
'test-voip-rrul.flent.gz': set((
'tcp_cwnd',
'tcp_delivery_rate',
'tcp_pacing',
'tcp_rtt',
'tcp_rtt_bar_combine',
'tcp_rtt_box_combine',
'tcp_rtt_cdf',
)),
}


class PlottersTestCase(ForkingTestCase):

Expand Down Expand Up @@ -235,7 +322,11 @@ def runTest(self):
formatter = formatters.new(self.settings)
formatter.format([r])
res, plen = formatter.verify()
if not res and p not in PLOTS_MAY_FAIL:
filename = os.path.basename(self.filename)
if filename in MISSING_PLOTS and p in MISSING_PLOTS[filename]:
continue

if not res:
raise self.failureException(
"Verification of plot '%s' failed: %s" % (p, plen))
except self.failureException:
Expand Down Expand Up @@ -279,7 +370,12 @@ def runTest(self):
for p in self.settings.PLOTS.keys():
plot = pool.apply(plot_one, (self.settings, p, results))
res, plen = plot.verify()
if not res and p not in PLOTS_MAY_FAIL:

filename = os.path.basename(self.filename)
if filename in MISSING_PLOTS and p in MISSING_PLOTS[filename]:
continue

if not res:
raise self.failureException(
"Verification of plot '%s' failed: %s" % (p, plen))

Expand Down
Loading