Resource Utilization

Show all code

Resource Utilization¶

CPU, memory, disk I/O, and network throughput analysis for PQ Devnet clients.

This notebook examines container-level resource usage using cAdvisor metrics:

CPU usage (cores) per client
Memory working set and RSS per client
Disk read/write throughput and usage
Network receive/transmit throughput

Show code

import json
from pathlib import Path

import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from IPython.display import HTML, display

# Set default renderer for static HTML output
import plotly.io as pio
pio.renderers.default = "notebook"

Show code

# Resolve devnet_id
DATA_DIR = Path("../data")

if devnet_id is None:
    # Use latest devnet from manifest
    devnets_path = DATA_DIR / "devnets.json"
    if devnets_path.exists():
        with open(devnets_path) as f:
            devnets = json.load(f).get("devnets", [])
        if devnets:
            devnet_id = devnets[-1]["id"]  # Latest
            print(f"Using latest devnet: {devnet_id}")
    else:
        raise ValueError("No devnets.json found. Run 'just detect-devnets' first.")

DEVNET_DIR = DATA_DIR / devnet_id
print(f"Loading data from: {DEVNET_DIR}")

Loading data from: ../data/pqdevnet-20260203T1634Z

Show code

# Load devnet metadata
with open(DATA_DIR / "devnets.json") as f:
    devnets_data = json.load(f)
    devnet_info = next((d for d in devnets_data["devnets"] if d["id"] == devnet_id), None)

if devnet_info:
    print(f"Devnet: {devnet_info['id']}")
    print(f"Duration: {devnet_info['duration_hours']:.1f} hours")
    print(f"Time: {devnet_info['start_time']} to {devnet_info['end_time']}")
    print(f"Slots: {devnet_info['start_slot']} \u2192 {devnet_info['end_slot']}")
    print(f"Clients: {', '.join(devnet_info['clients'])}")

Devnet: pqdevnet-20260203T1634Z
Duration: 22.8 hours
Time: 2026-02-03T16:34:46+00:00 to 2026-02-04T15:21:20+00:00
Slots: 0 → 20389
Clients: ethlambda, grandine, lantern, qlean, ream, zeam

Show code

def format_bytes(val: float) -> str:
    """Format bytes to human-readable units."""
    for unit in ["B", "KB", "MB", "GB", "TB"]:
        if abs(val) < 1024:
            return f"{val:.1f} {unit}"
        val /= 1024
    return f"{val:.1f} PB"


def format_bytes_per_sec(val: float) -> str:
    """Format bytes/s to human-readable units."""
    return format_bytes(val) + "/s"

Load Data¶

Show code

# Load container resource data
data_files = {
    "cpu": "container_cpu.parquet",
    "memory": "container_memory.parquet",
    "disk_io": "container_disk_io.parquet",
    "network": "container_network.parquet",
}

# Infrastructure containers irrelevant to devnet client analysis
EXCLUDED_CONTAINERS = {"unknown", "cadvisor", "prometheus", "promtail", "node-exporter", "node_exporter", "grafana"}

# Aggregation strategy per data type:
# - cpu/memory: max (gauge-like, take the active container's value)
# - disk_io/network: sum (per-device/interface rates should be summed)
AGG_STRATEGY = {"cpu": "max", "memory": "max", "disk_io": "sum", "network": "sum"}

# Group-by columns per data type (all have container+timestamp, some have metric)
GROUP_COLS = {
    "cpu": ["container", "timestamp"],
    "memory": ["container", "metric", "timestamp"],
    "disk_io": ["container", "metric", "timestamp"],
    "network": ["container", "metric", "timestamp"],
}

dfs = {}
for key, filename in data_files.items():
    path = DEVNET_DIR / filename
    if path.exists():
        df = pd.read_parquet(path)
        df = df[~df["container"].isin(EXCLUDED_CONTAINERS)]
        # Deduplicate: multiple Prometheus series (interfaces, devices, container
        # IDs after restarts) can produce duplicate rows per container+timestamp.
        df = df.groupby(GROUP_COLS[key], as_index=False)["value"].agg(AGG_STRATEGY[key])
        dfs[key] = df
        print(f"{key}: {len(df)} records, containers: {df['container'].nunique()}")
    else:
        dfs[key] = pd.DataFrame()
        print(f"{key}: no data (file not found)")

# Unified container list from devnet metadata (includes all containers via cAdvisor)
all_containers = sorted(f"{c}_0" for c in devnet_info["clients"])
n_cols = min(len(all_containers), 2)
n_rows = -(-len(all_containers) // n_cols)
print(f"\nAll containers ({len(all_containers)}): {all_containers}")

cpu: 1639 records, containers: 6
memory: 16498 records, containers: 7

disk_io: 2732 records, containers: 5

network: 3278 records, containers: 6

All containers (6): ['ethlambda_0', 'grandine_0', 'lantern_0', 'qlean_0', 'ream_0', 'zeam_0']

CPU Usage¶

CPU cores used per container over time, derived from rate(container_cpu_usage_seconds_total[5m]).

Show code

cpu_df = dfs["cpu"]

if cpu_df.empty:
    print("No CPU data available")
else:
    fig = make_subplots(
        rows=n_rows, cols=n_cols,
        subplot_titles=all_containers,
        vertical_spacing=0.12 / max(n_rows - 1, 1) * 2,
        horizontal_spacing=0.08,
    )

    for i, container in enumerate(all_containers):
        row = i // n_cols + 1
        col = i % n_cols + 1
        cdf = cpu_df[cpu_df["container"] == container].sort_values("timestamp")
        if not cdf.empty:
            fig.add_trace(
                go.Scatter(
                    x=cdf["timestamp"], y=cdf["value"],
                    name=container, showlegend=False,
                    line=dict(color="#636EFA"),
                ),
                row=row, col=col,
            )
        else:
            fig.add_trace(
                go.Scatter(x=[None], y=[None], showlegend=False, hoverinfo='skip'),
                row=row, col=col,
            )
            _n = (row - 1) * n_cols + col
            _s = "" if _n == 1 else str(_n)
            fig.add_annotation(
                text="No data available",
                xref=f"x{_s} domain", yref=f"y{_s} domain",
                x=0.5, y=0.5,
                showarrow=False,
                font=dict(size=12, color="#999"),
            )
        fig.update_yaxes(title_text="CPU (cores)", row=row, col=col)

    fig.update_layout(
        title="CPU Usage per Container",
        height=270 * n_rows,
    )
    fig.show()

Show code

# CPU summary statistics
if not cpu_df.empty:
    cpu_summary = cpu_df.groupby("container")["value"].agg(
        ["mean", "max", "min", "std"]
    ).round(3)
    cpu_summary.columns = ["Mean (cores)", "Max (cores)", "Min (cores)", "Std Dev"]
    cpu_summary = cpu_summary.sort_index()
    display(cpu_summary)

	Mean (cores)	Max (cores)	Min (cores)	Std Dev
container
ethlambda_0	0.210	0.835	0.000	0.285
grandine_0	0.064	0.077	0.017	0.005
lantern_0	1.168	3.452	0.342	0.408
qlean_0	0.284	0.456	0.096	0.087
ream_0	0.788	1.162	0.164	0.278
zeam_0	0.121	0.396	0.065	0.053

Memory Usage¶

Memory consumption per container, including working set (total usage minus inactive file cache) and RSS (Resident Set Size -- anonymous memory only, excluding file-backed pages). The gap between the two shows active file cache usage.

Show code

mem_df = dfs["memory"]

if mem_df.empty:
    print("No memory data available")
else:
    # Combine working_set and rss for per-container comparison
    mem_plot_df = mem_df[mem_df["metric"].isin(["working_set", "rss"])].copy()
    if not mem_plot_df.empty:
        mem_plot_df["value_mb"] = mem_plot_df["value"] / (1024 * 1024)

        fig = make_subplots(
            rows=n_rows, cols=n_cols,
            subplot_titles=all_containers,
            vertical_spacing=0.12 / max(n_rows - 1, 1) * 2,
            horizontal_spacing=0.08,
        )

        colors = {"working_set": "#636EFA", "rss": "#EF553B"}
        legend_added = set()

        for i, container in enumerate(all_containers):
            row = i // n_cols + 1
            col = i % n_cols + 1
            cdf = mem_plot_df[mem_plot_df["container"] == container]
            if not cdf.empty:
                for metric in ["working_set", "rss"]:
                    mdf = cdf[cdf["metric"] == metric].sort_values("timestamp")
                    if mdf.empty:
                        continue
                    show_legend = metric not in legend_added
                    legend_added.add(metric)
                    fig.add_trace(
                        go.Scatter(
                            x=mdf["timestamp"], y=mdf["value_mb"],
                            name=metric, legendgroup=metric,
                            showlegend=show_legend,
                            line=dict(color=colors[metric]),
                        ),
                        row=row, col=col,
                    )
            else:
                fig.add_trace(
                    go.Scatter(x=[None], y=[None], showlegend=False, hoverinfo='skip'),
                    row=row, col=col,
                )
                _n = (row - 1) * n_cols + col
                _s = "" if _n == 1 else str(_n)
                fig.add_annotation(
                    text="No data available",
                    xref=f"x{_s} domain", yref=f"y{_s} domain",
                    x=0.5, y=0.5,
                    showarrow=False,
                    font=dict(size=12, color="#999"),
                )
            fig.update_yaxes(title_text="MB", row=row, col=col)

        fig.update_layout(
            title="Memory Usage per Container (Working Set vs RSS)",
            height=270 * n_rows,
        )
        fig.show()

Show code

# Memory summary
if not mem_df.empty:
    ws_df = mem_df[mem_df["metric"] == "working_set"]
    if not ws_df.empty:
        mem_summary = ws_df.groupby("container")["value"].agg(["mean", "max"]).reset_index()
        mem_summary["Mean"] = mem_summary["mean"].apply(format_bytes)
        mem_summary["Peak"] = mem_summary["max"].apply(format_bytes)
        mem_summary = mem_summary.rename(columns={"container": "Container"})[["Container", "Mean", "Peak"]]
        mem_summary = mem_summary.sort_values("Container")
        display(mem_summary.set_index("Container"))

	Mean	Peak
Container
ethlambda_0	4.0 GB	14.3 GB
grandine_0	3.1 GB	7.9 GB
lantern_0	7.8 GB	8.3 GB
loki	11.6 MB	24.0 MB
qlean_0	1.6 GB	2.4 GB
ream_0	4.4 GB	11.2 GB
zeam_0	5.4 GB	8.6 GB

Disk I/O¶

Disk read/write throughput and total disk usage per container.

Show code

disk_df = dfs["disk_io"]

if disk_df.empty:
    print("No disk I/O data available")
else:
    # Read/write throughput per container
    throughput_df = disk_df[disk_df["metric"].isin(["read_throughput", "write_throughput"])].copy()
    if not throughput_df.empty:
        throughput_df["value_mb"] = throughput_df["value"] / (1024 * 1024)

        fig = make_subplots(
            rows=n_rows, cols=n_cols,
            subplot_titles=all_containers,
            vertical_spacing=0.12 / max(n_rows - 1, 1) * 2,
            horizontal_spacing=0.08,
        )

        colors = {"read_throughput": "#636EFA", "write_throughput": "#EF553B"}
        legend_added = set()

        for i, container in enumerate(all_containers):
            row = i // n_cols + 1
            col = i % n_cols + 1
            cdf = throughput_df[throughput_df["container"] == container]
            if not cdf.empty:
                for metric in ["read_throughput", "write_throughput"]:
                    mdf = cdf[cdf["metric"] == metric].sort_values("timestamp")
                    if mdf.empty:
                        continue
                    label = metric.replace("_throughput", "")
                    show_legend = metric not in legend_added
                    legend_added.add(metric)
                    fig.add_trace(
                        go.Scatter(
                            x=mdf["timestamp"], y=mdf["value_mb"],
                            name=label, legendgroup=metric,
                            showlegend=show_legend,
                            line=dict(color=colors[metric]),
                        ),
                        row=row, col=col,
                    )
            else:
                fig.add_trace(
                    go.Scatter(x=[None], y=[None], showlegend=False, hoverinfo='skip'),
                    row=row, col=col,
                )
                _n = (row - 1) * n_cols + col
                _s = "" if _n == 1 else str(_n)
                fig.add_annotation(
                    text="No data available",
                    xref=f"x{_s} domain", yref=f"y{_s} domain",
                    x=0.5, y=0.5,
                    showarrow=False,
                    font=dict(size=12, color="#999"),
                )
            fig.update_yaxes(title_text="MB/s", row=row, col=col)

        fig.update_layout(
            title="Disk I/O Throughput per Container (Read vs Write)",
            height=270 * n_rows,
        )
        fig.show()

Show code

# Disk usage over time per container
if not disk_df.empty:
    usage_df = disk_df[disk_df["metric"] == "disk_usage"].copy()
    if not usage_df.empty:
        usage_df["value_gb"] = usage_df["value"] / (1024 * 1024 * 1024)

        fig = make_subplots(
            rows=n_rows, cols=n_cols,
            subplot_titles=all_containers,
            vertical_spacing=0.12 / max(n_rows - 1, 1) * 2,
            horizontal_spacing=0.08,
        )

        for i, container in enumerate(all_containers):
            row = i // n_cols + 1
            col = i % n_cols + 1
            cdf = usage_df[usage_df["container"] == container].sort_values("timestamp")
            if not cdf.empty:
                fig.add_trace(
                    go.Scatter(
                        x=cdf["timestamp"], y=cdf["value_gb"],
                        name=container, showlegend=False,
                        line=dict(color="#636EFA"),
                    ),
                    row=row, col=col,
                )
            else:
                fig.add_trace(
                    go.Scatter(x=[None], y=[None], showlegend=False, hoverinfo='skip'),
                    row=row, col=col,
                )
                _n = (row - 1) * n_cols + col
                _s = "" if _n == 1 else str(_n)
                fig.add_annotation(
                    text="No data available",
                    xref=f"x{_s} domain", yref=f"y{_s} domain",
                    x=0.5, y=0.5,
                    showarrow=False,
                    font=dict(size=12, color="#999"),
                )
            fig.update_yaxes(title_text="GB", row=row, col=col)

        fig.update_layout(
            title="Disk Usage per Container",
            height=270 * n_rows,
        )
        fig.show()

Network Throughput¶

Network receive (rx) and transmit (tx) throughput per container.

Show code

net_df = dfs["network"]

if net_df.empty:
    print("No network data available")
else:
    net_df["value_mb"] = net_df["value"] / (1024 * 1024)

    fig = make_subplots(
        rows=n_rows, cols=n_cols,
        subplot_titles=all_containers,
        vertical_spacing=0.12 / max(n_rows - 1, 1) * 2,
        horizontal_spacing=0.08,
    )

    colors = {"rx": "#636EFA", "tx": "#EF553B"}
    legend_added = set()

    for i, container in enumerate(all_containers):
        row = i // n_cols + 1
        col = i % n_cols + 1
        cdf = net_df[net_df["container"] == container]
        if not cdf.empty:
            for metric in ["rx", "tx"]:
                mdf = cdf[cdf["metric"] == metric].sort_values("timestamp")
                if mdf.empty:
                    continue
                show_legend = metric not in legend_added
                legend_added.add(metric)
                fig.add_trace(
                    go.Scatter(
                        x=mdf["timestamp"], y=mdf["value_mb"],
                        name=metric, legendgroup=metric,
                        showlegend=show_legend,
                        line=dict(color=colors[metric]),
                    ),
                    row=row, col=col,
                )
        else:
            fig.add_trace(
                go.Scatter(x=[None], y=[None], showlegend=False, hoverinfo='skip'),
                row=row, col=col,
            )
            _n = (row - 1) * n_cols + col
            _s = "" if _n == 1 else str(_n)
            fig.add_annotation(
                text="No data available",
                xref=f"x{_s} domain", yref=f"y{_s} domain",
                x=0.5, y=0.5,
                showarrow=False,
                font=dict(size=12, color="#999"),
            )
        fig.update_yaxes(title_text="MB/s", row=row, col=col)

    fig.update_layout(
        title="Network Throughput per Container (RX vs TX)",
        height=270 * n_rows,
    )
    fig.show()

Summary¶

Peak and average resource usage per container across the devnet.

Show code

# Build summary table across all resource types
summary_rows = []

# CPU
if not cpu_df.empty:
    for container, group in cpu_df.groupby("container"):
        summary_rows.append({
            "Container": container,
            "Avg CPU (cores)": f"{group['value'].mean():.3f}",
            "Peak CPU (cores)": f"{group['value'].max():.3f}",
        })

# Memory
if not mem_df.empty:
    ws_df = mem_df[mem_df["metric"] == "working_set"]
    for container, group in ws_df.groupby("container"):
        existing = next((r for r in summary_rows if r["Container"] == container), None)
        if existing is None:
            existing = {"Container": container}
            summary_rows.append(existing)
        existing["Avg Memory"] = format_bytes(group["value"].mean())
        existing["Peak Memory"] = format_bytes(group["value"].max())

# Network
if not net_df.empty:
    for container, group in net_df.groupby("container"):
        existing = next((r for r in summary_rows if r["Container"] == container), None)
        if existing is None:
            existing = {"Container": container}
            summary_rows.append(existing)
        rx = group[group["metric"] == "rx"]["value"]
        tx = group[group["metric"] == "tx"]["value"]
        if not rx.empty:
            existing["Avg RX"] = format_bytes_per_sec(rx.mean())
        if not tx.empty:
            existing["Avg TX"] = format_bytes_per_sec(tx.mean())

if summary_rows:
    summary_df = pd.DataFrame(summary_rows).set_index("Container").sort_index().fillna("-")
    display(summary_df)
else:
    print("No resource data available for summary.")

	Avg CPU (cores)	Peak CPU (cores)	Avg Memory	Peak Memory	Avg RX	Avg TX
Container
ethlambda_0	0.210	0.835	4.0 GB	14.3 GB	22.7 KB/s	59.7 KB/s
grandine_0	0.064	0.077	3.1 GB	7.9 GB	16.2 KB/s	9.3 KB/s
lantern_0	1.168	3.452	7.8 GB	8.3 GB	33.7 KB/s	34.3 KB/s
loki	-	-	11.6 MB	24.0 MB	-	-
qlean_0	0.284	0.456	1.6 GB	2.4 GB	50.1 KB/s	33.7 KB/s
ream_0	0.788	1.162	4.4 GB	11.2 GB	32.8 KB/s	57.5 KB/s
zeam_0	0.121	0.396	5.4 GB	8.6 GB	22.8 KB/s	44.0 KB/s

Show code

print(f"Devnet: {devnet_id}")
if devnet_info:
    print(f"Duration: {devnet_info['duration_hours']:.1f} hours")
print(f"Containers analyzed: {cpu_df['container'].nunique() if not cpu_df.empty else 0}")

Devnet: pqdevnet-20260203T1634Z
Duration: 22.8 hours
Containers analyzed: 6