Resource Utilization
Resource Utilization¶
CPU, memory, disk I/O, and network throughput analysis for PQ Devnet clients.
This notebook examines container-level resource usage using cAdvisor metrics:
- CPU usage (cores) per client
- Memory working set and RSS per client
- Disk read/write throughput and usage
- Network receive/transmit throughput
Show code
import json
from pathlib import Path
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from IPython.display import HTML, display
# Set default renderer for static HTML output
import plotly.io as pio
pio.renderers.default = "notebook"
Show code
# Resolve devnet_id
DATA_DIR = Path("../data")
if devnet_id is None:
# Use latest devnet from manifest
devnets_path = DATA_DIR / "devnets.json"
if devnets_path.exists():
with open(devnets_path) as f:
devnets = json.load(f).get("devnets", [])
if devnets:
devnet_id = devnets[-1]["id"] # Latest
print(f"Using latest devnet: {devnet_id}")
else:
raise ValueError("No devnets.json found. Run 'just detect-devnets' first.")
DEVNET_DIR = DATA_DIR / devnet_id
print(f"Loading data from: {DEVNET_DIR}")
Show code
# Load devnet metadata
with open(DATA_DIR / "devnets.json") as f:
devnets_data = json.load(f)
devnet_info = next((d for d in devnets_data["devnets"] if d["id"] == devnet_id), None)
if devnet_info:
print(f"Devnet: {devnet_info['id']}")
print(f"Duration: {devnet_info['duration_hours']:.1f} hours")
print(f"Time: {devnet_info['start_time']} to {devnet_info['end_time']}")
print(f"Slots: {devnet_info['start_slot']} \u2192 {devnet_info['end_slot']}")
print(f"Clients: {', '.join(devnet_info['clients'])}")
Show code
def format_bytes(val: float) -> str:
"""Format bytes to human-readable units."""
for unit in ["B", "KB", "MB", "GB", "TB"]:
if abs(val) < 1024:
return f"{val:.1f} {unit}"
val /= 1024
return f"{val:.1f} PB"
def format_bytes_per_sec(val: float) -> str:
"""Format bytes/s to human-readable units."""
return format_bytes(val) + "/s"
Load Data¶
Show code
# Load container resource data
data_files = {
"cpu": "container_cpu.parquet",
"memory": "container_memory.parquet",
"disk_io": "container_disk_io.parquet",
"network": "container_network.parquet",
}
# Infrastructure containers irrelevant to devnet client analysis
EXCLUDED_CONTAINERS = {"unknown", "cadvisor", "prometheus", "promtail", "node-exporter", "node_exporter", "grafana"}
# Aggregation strategy per data type:
# - cpu/memory: max (gauge-like, take the active container's value)
# - disk_io/network: sum (per-device/interface rates should be summed)
AGG_STRATEGY = {"cpu": "max", "memory": "max", "disk_io": "sum", "network": "sum"}
# Group-by columns per data type (all have container+timestamp, some have metric)
GROUP_COLS = {
"cpu": ["container", "timestamp"],
"memory": ["container", "metric", "timestamp"],
"disk_io": ["container", "metric", "timestamp"],
"network": ["container", "metric", "timestamp"],
}
dfs = {}
for key, filename in data_files.items():
path = DEVNET_DIR / filename
if path.exists():
df = pd.read_parquet(path)
df = df[~df["container"].isin(EXCLUDED_CONTAINERS)]
# Deduplicate: multiple Prometheus series (interfaces, devices, container
# IDs after restarts) can produce duplicate rows per container+timestamp.
df = df.groupby(GROUP_COLS[key], as_index=False)["value"].agg(AGG_STRATEGY[key])
dfs[key] = df
print(f"{key}: {len(df)} records, containers: {df['container'].nunique()}")
else:
dfs[key] = pd.DataFrame()
print(f"{key}: no data (file not found)")
# Unified container list from devnet metadata (includes all containers via cAdvisor)
all_containers = sorted(f"{c}_0" for c in devnet_info["clients"])
n_cols = min(len(all_containers), 2)
n_rows = -(-len(all_containers) // n_cols)
print(f"\nAll containers ({len(all_containers)}): {all_containers}")
CPU Usage¶
CPU cores used per container over time, derived from rate(container_cpu_usage_seconds_total[5m]).
Show code
cpu_df = dfs["cpu"]
if cpu_df.empty:
print("No CPU data available")
else:
fig = make_subplots(
rows=n_rows, cols=n_cols,
subplot_titles=all_containers,
vertical_spacing=0.12 / max(n_rows - 1, 1) * 2,
horizontal_spacing=0.08,
)
for i, container in enumerate(all_containers):
row = i // n_cols + 1
col = i % n_cols + 1
cdf = cpu_df[cpu_df["container"] == container].sort_values("timestamp")
if not cdf.empty:
fig.add_trace(
go.Scatter(
x=cdf["timestamp"], y=cdf["value"],
name=container, showlegend=False,
line=dict(color="#636EFA"),
),
row=row, col=col,
)
else:
fig.add_trace(
go.Scatter(x=[None], y=[None], showlegend=False, hoverinfo='skip'),
row=row, col=col,
)
_n = (row - 1) * n_cols + col
_s = "" if _n == 1 else str(_n)
fig.add_annotation(
text="No data available",
xref=f"x{_s} domain", yref=f"y{_s} domain",
x=0.5, y=0.5,
showarrow=False,
font=dict(size=12, color="#999"),
)
fig.update_yaxes(title_text="CPU (cores)", row=row, col=col)
fig.update_layout(
title="CPU Usage per Container",
height=270 * n_rows,
)
fig.show()
Show code
# CPU summary statistics
if not cpu_df.empty:
cpu_summary = cpu_df.groupby("container")["value"].agg(
["mean", "max", "min", "std"]
).round(3)
cpu_summary.columns = ["Mean (cores)", "Max (cores)", "Min (cores)", "Std Dev"]
cpu_summary = cpu_summary.sort_index()
display(cpu_summary)
Memory Usage¶
Memory consumption per container, including working set (total usage minus inactive file cache) and RSS (Resident Set Size -- anonymous memory only, excluding file-backed pages). The gap between the two shows active file cache usage.
Show code
mem_df = dfs["memory"]
if mem_df.empty:
print("No memory data available")
else:
# Combine working_set and rss for per-container comparison
mem_plot_df = mem_df[mem_df["metric"].isin(["working_set", "rss"])].copy()
if not mem_plot_df.empty:
mem_plot_df["value_mb"] = mem_plot_df["value"] / (1024 * 1024)
fig = make_subplots(
rows=n_rows, cols=n_cols,
subplot_titles=all_containers,
vertical_spacing=0.12 / max(n_rows - 1, 1) * 2,
horizontal_spacing=0.08,
)
colors = {"working_set": "#636EFA", "rss": "#EF553B"}
legend_added = set()
for i, container in enumerate(all_containers):
row = i // n_cols + 1
col = i % n_cols + 1
cdf = mem_plot_df[mem_plot_df["container"] == container]
if not cdf.empty:
for metric in ["working_set", "rss"]:
mdf = cdf[cdf["metric"] == metric].sort_values("timestamp")
if mdf.empty:
continue
show_legend = metric not in legend_added
legend_added.add(metric)
fig.add_trace(
go.Scatter(
x=mdf["timestamp"], y=mdf["value_mb"],
name=metric, legendgroup=metric,
showlegend=show_legend,
line=dict(color=colors[metric]),
),
row=row, col=col,
)
else:
fig.add_trace(
go.Scatter(x=[None], y=[None], showlegend=False, hoverinfo='skip'),
row=row, col=col,
)
_n = (row - 1) * n_cols + col
_s = "" if _n == 1 else str(_n)
fig.add_annotation(
text="No data available",
xref=f"x{_s} domain", yref=f"y{_s} domain",
x=0.5, y=0.5,
showarrow=False,
font=dict(size=12, color="#999"),
)
fig.update_yaxes(title_text="MB", row=row, col=col)
fig.update_layout(
title="Memory Usage per Container (Working Set vs RSS)",
height=270 * n_rows,
)
fig.show()
Show code
# Memory summary
if not mem_df.empty:
ws_df = mem_df[mem_df["metric"] == "working_set"]
if not ws_df.empty:
mem_summary = ws_df.groupby("container")["value"].agg(["mean", "max"]).reset_index()
mem_summary["Mean"] = mem_summary["mean"].apply(format_bytes)
mem_summary["Peak"] = mem_summary["max"].apply(format_bytes)
mem_summary = mem_summary.rename(columns={"container": "Container"})[["Container", "Mean", "Peak"]]
mem_summary = mem_summary.sort_values("Container")
display(mem_summary.set_index("Container"))
Disk I/O¶
Disk read/write throughput and total disk usage per container.
Show code
disk_df = dfs["disk_io"]
if disk_df.empty:
print("No disk I/O data available")
else:
# Read/write throughput per container
throughput_df = disk_df[disk_df["metric"].isin(["read_throughput", "write_throughput"])].copy()
if not throughput_df.empty:
throughput_df["value_mb"] = throughput_df["value"] / (1024 * 1024)
fig = make_subplots(
rows=n_rows, cols=n_cols,
subplot_titles=all_containers,
vertical_spacing=0.12 / max(n_rows - 1, 1) * 2,
horizontal_spacing=0.08,
)
colors = {"read_throughput": "#636EFA", "write_throughput": "#EF553B"}
legend_added = set()
for i, container in enumerate(all_containers):
row = i // n_cols + 1
col = i % n_cols + 1
cdf = throughput_df[throughput_df["container"] == container]
if not cdf.empty:
for metric in ["read_throughput", "write_throughput"]:
mdf = cdf[cdf["metric"] == metric].sort_values("timestamp")
if mdf.empty:
continue
label = metric.replace("_throughput", "")
show_legend = metric not in legend_added
legend_added.add(metric)
fig.add_trace(
go.Scatter(
x=mdf["timestamp"], y=mdf["value_mb"],
name=label, legendgroup=metric,
showlegend=show_legend,
line=dict(color=colors[metric]),
),
row=row, col=col,
)
else:
fig.add_trace(
go.Scatter(x=[None], y=[None], showlegend=False, hoverinfo='skip'),
row=row, col=col,
)
_n = (row - 1) * n_cols + col
_s = "" if _n == 1 else str(_n)
fig.add_annotation(
text="No data available",
xref=f"x{_s} domain", yref=f"y{_s} domain",
x=0.5, y=0.5,
showarrow=False,
font=dict(size=12, color="#999"),
)
fig.update_yaxes(title_text="MB/s", row=row, col=col)
fig.update_layout(
title="Disk I/O Throughput per Container (Read vs Write)",
height=270 * n_rows,
)
fig.show()
Show code
# Disk usage over time per container
if not disk_df.empty:
usage_df = disk_df[disk_df["metric"] == "disk_usage"].copy()
if not usage_df.empty:
usage_df["value_gb"] = usage_df["value"] / (1024 * 1024 * 1024)
fig = make_subplots(
rows=n_rows, cols=n_cols,
subplot_titles=all_containers,
vertical_spacing=0.12 / max(n_rows - 1, 1) * 2,
horizontal_spacing=0.08,
)
for i, container in enumerate(all_containers):
row = i // n_cols + 1
col = i % n_cols + 1
cdf = usage_df[usage_df["container"] == container].sort_values("timestamp")
if not cdf.empty:
fig.add_trace(
go.Scatter(
x=cdf["timestamp"], y=cdf["value_gb"],
name=container, showlegend=False,
line=dict(color="#636EFA"),
),
row=row, col=col,
)
else:
fig.add_trace(
go.Scatter(x=[None], y=[None], showlegend=False, hoverinfo='skip'),
row=row, col=col,
)
_n = (row - 1) * n_cols + col
_s = "" if _n == 1 else str(_n)
fig.add_annotation(
text="No data available",
xref=f"x{_s} domain", yref=f"y{_s} domain",
x=0.5, y=0.5,
showarrow=False,
font=dict(size=12, color="#999"),
)
fig.update_yaxes(title_text="GB", row=row, col=col)
fig.update_layout(
title="Disk Usage per Container",
height=270 * n_rows,
)
fig.show()
Network Throughput¶
Network receive (rx) and transmit (tx) throughput per container.
Show code
net_df = dfs["network"]
if net_df.empty:
print("No network data available")
else:
net_df["value_mb"] = net_df["value"] / (1024 * 1024)
fig = make_subplots(
rows=n_rows, cols=n_cols,
subplot_titles=all_containers,
vertical_spacing=0.12 / max(n_rows - 1, 1) * 2,
horizontal_spacing=0.08,
)
colors = {"rx": "#636EFA", "tx": "#EF553B"}
legend_added = set()
for i, container in enumerate(all_containers):
row = i // n_cols + 1
col = i % n_cols + 1
cdf = net_df[net_df["container"] == container]
if not cdf.empty:
for metric in ["rx", "tx"]:
mdf = cdf[cdf["metric"] == metric].sort_values("timestamp")
if mdf.empty:
continue
show_legend = metric not in legend_added
legend_added.add(metric)
fig.add_trace(
go.Scatter(
x=mdf["timestamp"], y=mdf["value_mb"],
name=metric, legendgroup=metric,
showlegend=show_legend,
line=dict(color=colors[metric]),
),
row=row, col=col,
)
else:
fig.add_trace(
go.Scatter(x=[None], y=[None], showlegend=False, hoverinfo='skip'),
row=row, col=col,
)
_n = (row - 1) * n_cols + col
_s = "" if _n == 1 else str(_n)
fig.add_annotation(
text="No data available",
xref=f"x{_s} domain", yref=f"y{_s} domain",
x=0.5, y=0.5,
showarrow=False,
font=dict(size=12, color="#999"),
)
fig.update_yaxes(title_text="MB/s", row=row, col=col)
fig.update_layout(
title="Network Throughput per Container (RX vs TX)",
height=270 * n_rows,
)
fig.show()
Summary¶
Peak and average resource usage per container across the devnet.
Show code
# Build summary table across all resource types
summary_rows = []
# CPU
if not cpu_df.empty:
for container, group in cpu_df.groupby("container"):
summary_rows.append({
"Container": container,
"Avg CPU (cores)": f"{group['value'].mean():.3f}",
"Peak CPU (cores)": f"{group['value'].max():.3f}",
})
# Memory
if not mem_df.empty:
ws_df = mem_df[mem_df["metric"] == "working_set"]
for container, group in ws_df.groupby("container"):
existing = next((r for r in summary_rows if r["Container"] == container), None)
if existing is None:
existing = {"Container": container}
summary_rows.append(existing)
existing["Avg Memory"] = format_bytes(group["value"].mean())
existing["Peak Memory"] = format_bytes(group["value"].max())
# Network
if not net_df.empty:
for container, group in net_df.groupby("container"):
existing = next((r for r in summary_rows if r["Container"] == container), None)
if existing is None:
existing = {"Container": container}
summary_rows.append(existing)
rx = group[group["metric"] == "rx"]["value"]
tx = group[group["metric"] == "tx"]["value"]
if not rx.empty:
existing["Avg RX"] = format_bytes_per_sec(rx.mean())
if not tx.empty:
existing["Avg TX"] = format_bytes_per_sec(tx.mean())
if summary_rows:
summary_df = pd.DataFrame(summary_rows).set_index("Container").sort_index().fillna("-")
display(summary_df)
else:
print("No resource data available for summary.")
Show code
print(f"Devnet: {devnet_id}")
if devnet_info:
print(f"Duration: {devnet_info['duration_hours']:.1f} hours")
print(f"Containers analyzed: {cpu_df['container'].nunique() if not cpu_df.empty else 0}")