diff --git a/README.md b/README.md
index 922c479d..7fac6641 100644
--- a/README.md
+++ b/README.md
@@ -88,6 +88,7 @@ Reference runners live under `runners/` (see each folder’s `meta.json`). The t
 | Hardware | Runner folder | Framework | A | B | C | D | E | F | G |
 |---|---|---|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
 | NVIDIA GPU | `nvidia_sglang_c43a8309` | SGLang | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
+| NVIDIA GPU | `nvidia_vllm020_0f6c56e4` | vLLM | ⋯ | ⋯ | ⋯ | ⋯ | ⋯ | ⋯ | ⋯ |
 | NVIDIA GPU | `nvidia_vllm_47f5d58e` | vLLM | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
 | NVIDIA V100 (SM70) | `nvidia_onecat_vllm_12a253c2` | 1Cat-vLLM | ⋯ | ⋯ | ⋯ | ⋯ | ⋯ | — | ⋯ |
 | AMD GPU | `amd_vllm_rocm_6c18cd8f` | vLLM-ROCm | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
diff --git a/configs/runner_configs/runner_nvidia_vllm020_0f6c56e4.yaml.example b/configs/runner_configs/runner_nvidia_vllm020_0f6c56e4.yaml.example
new file mode 100644
index 00000000..906f7d7c
--- /dev/null
+++ b/configs/runner_configs/runner_nvidia_vllm020_0f6c56e4.yaml.example
@@ -0,0 +1,82 @@
+# AccelMark runner config — nvidia_vllm020_0f6c56e4 (vLLM 0.20 on NVIDIA)
+#
+# Copy this file to runner_nvidia_vllm020_0f6c56e4.yaml (remove .example suffix)
+# and edit as needed for your hardware. The actual .yaml is gitignored.
+#
+# These settings adapt the runner to your hardware environment.
+# They are recorded in result.json task.extra_config for transparency
+# but are NOT part of the benchmark identity (not hashed into run_id).
+#
+# Merge priority: CLI flags > suite-specific > global defaults > runner defaults
+
+# ── Global defaults (apply to all suites) ─────────────────────────────────────
+
+# Tensor parallel size — number of GPUs to use (default: 1)
+tensor_parallel_size: 1
+
+# Disable CUDAGraph/compilation. Required for pre-Ampere GPUs (V100, T4).
+# Set to true if you encounter CUDA graph errors on older hardware.
+enforce_eager: false
+
+# Maximum number of sequences in a batch (default: 512).
+# Reduce on low-memory GPUs: 128 for 16 GB, 64 for 12 GB or less.
+max_num_seqs: 512
+
+# Fraction of GPU memory reserved for the KV cache (default: 0.90).
+# Reduce if you get OOM errors: try 0.80 for tighter memory budgets.
+gpu_memory_utilization: 0.90
+
+# Pass-through kwargs forwarded directly to vLLM LLM() / AsyncEngineArgs().
+# Use for any vLLM setting not listed above. See vLLM docs for valid keys:
+# https://docs.vllm.ai/en/latest/api/vllm/engine/arg_utils.html
+#
+# 0.20-specific knobs you may want to set (uncomment as needed):
+# engine_kwargs:
+#   # FlashAttention 4 is the 0.20 default for MLA prefill; uncomment to pin
+#   # for reproducibility or to force back to FA3 / Triton fallback.
+#   # attention_backend: FLASH_ATTN_4
+#
+#   # Model Runner V2 + new CUDA-graph paths:
+#   # compilation_config:
+#   #   cudagraph_mode: full_and_piecewise
+#
+#   # TurboQuant 2-bit KV cache (suite_C, --precision turboquant):
+#   # kv_cache_dtype: turboquant
+#
+#   swap_space: 8
+#   max_seq_len_to_capture: 4096
+
+# ── Suite-specific overrides ───────────────────────────────────────────────────
+# Keys here override the global defaults above for a specific suite only.
+# Only the section matching the current suite is used — other suite sections
+# are never loaded or recorded.
+
+suites:
+  suite_C:
+    # Quantization suite (FP8/W8A8/W8A16 via compressed-tensors).
+    # enforce_eager disables CUDA graphs — required for W8A8/W8A16 accuracy on vLLM 0.20.
+    # Note: FP8 still fails on Ampere (A100, sm < 8.9): vLLM 0.20 uses broken Marlin
+    # weight-only FP8 fallback. Use H100+ for Suite C FP8, or vLLM 0.7.3 runner on A100.
+    enforce_eager: true
+
+  suite_D:
+    # Long-context suite — reduce batch size and reserve more memory.
+    max_num_seqs: 64
+    gpu_memory_utilization: 0.85
+
+  suite_F:
+    # Consumer/edge GPU — enforce_eager often needed for pre-Ampere chips
+    # enforce_eager: true
+    max_num_seqs: 128
+
+# ── Speculative decoding (suite_A extra scenario) ─────────────────────────────
+# Uncomment this section to run the speculative scenario.
+# The draft model runs on the same GPU as the target model.
+# speculative decoding is configured via vLLM engine_kwargs.
+#
+# suites:
+#   suite_A:
+#     engine_kwargs:
+#       speculative_model: "meta-llama/Llama-3.2-1B-Instruct"
+#       num_speculative_tokens: 4
+#       speculative_draft_tensor_parallel_size: 1
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/accuracy/accuracy.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/accuracy/accuracy.json
new file mode 100644
index 00000000..d837d1a7
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.61,
+  "baseline_delta": 0.01,
+  "valid": true,
+  "framework": "vLLM",
+  "precision": "BF16",
+  "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/burst/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/burst/result.json
new file mode 100644
index 00000000..133a65b6
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/burst/result.json
@@ -0,0 +1,160 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T04:31:01.283634+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": null,
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "burst",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "burst": {
+      "sla_ttft_ms": 500,
+      "burst_steady_qps": 5,
+      "burst_peak_qps": 25,
+      "burst_duration_seconds": 30,
+      "burst_interval_seconds": 120,
+      "steady_requests_total": 1812,
+      "burst_requests_total": 2245,
+      "steady_ttft_p50_ms": 39.39,
+      "steady_ttft_p99_ms": 79.1,
+      "burst_ttft_p50_ms": 7082.87,
+      "burst_ttft_p99_ms": 17212.99,
+      "sla_met_during_burst": false,
+      "burst_degradation_ratio": 217.605,
+      "results_by_cycle": [
+        {
+          "cycle": 1,
+          "steady_requests": 581,
+          "burst_requests": 760,
+          "steady_ttft_p99_ms": 89.81,
+          "burst_ttft_p99_ms": 17855.37
+        },
+        {
+          "cycle": 2,
+          "steady_requests": 595,
+          "burst_requests": 734,
+          "steady_ttft_p99_ms": 47.72,
+          "burst_ttft_p99_ms": 16592.12
+        },
+        {
+          "cycle": 3,
+          "steady_requests": 636,
+          "burst_requests": 751,
+          "steady_ttft_p99_ms": 48.05,
+          "burst_ttft_p99_ms": 16579.9
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "05:55:58",
+    "run_id": "8f83bfab",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T05:46:54.960197+00:00",
+    "benchmark_end_time": "2026-05-18T05:55:58.450157+00:00",
+    "benchmark_elapsed_minutes": 9.1,
+    "model_load_seconds": 39.5
+  }
+}
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/env_info.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/env_info.json
new file mode 100644
index 00000000..ccee9205
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/env_info.json
@@ -0,0 +1,49 @@
+{
+  "collected_at": "2026-05-18T04:31:01.283634+00:00",
+  "accelerators": [
+    {
+      "index": 0,
+      "name": "NVIDIA A100-SXM4-80GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 80.0,
+      "driver_version": "580.65.06",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    }
+  ],
+  "accelerator_platform": "nvidia",
+  "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+  "intra_node_interconnect": null,
+  "cpu": {
+    "model": "AMD EPYC 7742 64-Core Processor",
+    "physical_cores": 128,
+    "logical_cores": 255,
+    "numa_nodes": 2
+  },
+  "system_memory_gb": 1007.7,
+  "pcie_generation": "PCIe Gen 4",
+  "cpu_accelerator_bandwidth_gbs": null,
+  "network_interfaces": [
+    {
+      "name": "mlx5_0",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_1",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_2",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    }
+  ],
+  "os": "Ubuntu 22.04.4 LTS",
+  "python_version": "3.12.0",
+  "kernel_version": "5.15.0-60-generic",
+  "runtime_version": "CUDA 13.0",
+  "pytorch_version": "2.11.0+cu130"
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/interactive/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/interactive/result.json
new file mode 100644
index 00000000..5232cac0
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/interactive/result.json
@@ -0,0 +1,132 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T04:31:01.283634+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": null,
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "interactive",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "interactive": {
+      "ttft_ms_p50": 27.78,
+      "ttft_ms_p90": 42.95,
+      "ttft_ms_p99": 59.38,
+      "tpot_ms_p50": 10.77,
+      "tpot_ms_p90": 10.84,
+      "tpot_ms_p99": 10.86,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 570.0
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "05:14:04",
+    "run_id": "8f83bfab",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T04:45:25.062974+00:00",
+    "benchmark_end_time": "2026-05-18T05:14:04.982045+00:00",
+    "benchmark_elapsed_minutes": 28.7,
+    "model_load_seconds": 40.4
+  }
+}
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/offline/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/offline/result.json
new file mode 100644
index 00000000..89283ef2
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/offline/result.json
@@ -0,0 +1,165 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T04:31:01.283634+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 3871.19,
+          "throughput_tokens_per_sec_per_chip": 3871.19,
+          "throughput_tokens_per_sec_total": 6746.91,
+          "elapsed_seconds_median": 8.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 3916.69,
+          "throughput_tokens_per_sec_per_chip": 3916.69,
+          "throughput_tokens_per_sec_total": 6785.8,
+          "elapsed_seconds_median": 8.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 3908.22,
+          "throughput_tokens_per_sec_per_chip": 3908.22,
+          "throughput_tokens_per_sec_total": 6779.65,
+          "elapsed_seconds_median": 8.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "04:35:38",
+    "run_id": "8f83bfab",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T04:33:49.881300+00:00",
+    "benchmark_end_time": "2026-05-18T04:35:38.648555+00:00",
+    "benchmark_elapsed_minutes": 1.8,
+    "model_load_seconds": 58.5
+  }
+}
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/online/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/online/result.json
new file mode 100644
index 00000000..8fe97d2f
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/online/result.json
@@ -0,0 +1,164 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T04:31:01.283634+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": null,
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 5,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 40.2,
+          "ttft_ms_p90": 60.2,
+          "ttft_ms_p99": 92.29,
+          "tpot_ms_p50": 13.21,
+          "tpot_ms_p90": 14.2,
+          "tpot_ms_p99": 14.71,
+          "elapsed_seconds_median": 69.1,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 75.24,
+          "ttft_ms_p90": 4335.91,
+          "ttft_ms_p99": 5305.98,
+          "tpot_ms_p50": 22.28,
+          "tpot_ms_p90": 24.58,
+          "tpot_ms_p99": 26.25,
+          "elapsed_seconds_median": 25.0,
+          "sla_met": false
+        },
+        {
+          "target_qps": 100,
+          "achieved_qps": 100.0,
+          "ttft_ms_p50": 1710.17,
+          "ttft_ms_p90": 10195.6,
+          "ttft_ms_p99": 10706.9,
+          "tpot_ms_p50": 22.18,
+          "tpot_ms_p90": 24.52,
+          "tpot_ms_p99": 28.04,
+          "elapsed_seconds_median": 22.3,
+          "sla_met": false
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "04:42:53",
+    "run_id": "8f83bfab",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T04:37:07.775120+00:00",
+    "benchmark_end_time": "2026-05-18T04:42:53.648821+00:00",
+    "benchmark_elapsed_minutes": 5.8,
+    "model_load_seconds": 60.9
+  }
+}
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/result.json
new file mode 100644
index 00000000..ca26d93b
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/result.json
@@ -0,0 +1,572 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T04:31:01.283634+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "interactive",
+      "sustained",
+      "burst"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 8,
+          "throughput_tokens_per_sec": 3871.19,
+          "throughput_tokens_per_sec_per_chip": 3871.19,
+          "throughput_tokens_per_sec_total": 6746.91,
+          "elapsed_seconds_median": 8.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 32,
+          "throughput_tokens_per_sec": 3916.69,
+          "throughput_tokens_per_sec_per_chip": 3916.69,
+          "throughput_tokens_per_sec_total": 6785.8,
+          "elapsed_seconds_median": 8.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 128,
+          "throughput_tokens_per_sec": 3908.22,
+          "throughput_tokens_per_sec_per_chip": 3908.22,
+          "throughput_tokens_per_sec_total": 6779.65,
+          "elapsed_seconds_median": 8.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 5,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 40.2,
+          "ttft_ms_p90": 60.2,
+          "ttft_ms_p99": 92.29,
+          "tpot_ms_p50": 13.21,
+          "tpot_ms_p90": 14.2,
+          "tpot_ms_p99": 14.71,
+          "elapsed_seconds_median": 69.1,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 75.24,
+          "ttft_ms_p90": 4335.91,
+          "ttft_ms_p99": 5305.98,
+          "tpot_ms_p50": 22.28,
+          "tpot_ms_p90": 24.58,
+          "tpot_ms_p99": 26.25,
+          "elapsed_seconds_median": 25.0,
+          "sla_met": false
+        },
+        {
+          "target_qps": 100,
+          "achieved_qps": 100.0,
+          "ttft_ms_p50": 1710.17,
+          "ttft_ms_p90": 10195.6,
+          "ttft_ms_p99": 10706.9,
+          "tpot_ms_p50": 22.18,
+          "tpot_ms_p90": 24.52,
+          "tpot_ms_p99": 28.04,
+          "elapsed_seconds_median": 22.3,
+          "sla_met": false
+        }
+      ]
+    },
+    "interactive": {
+      "ttft_ms_p50": 27.78,
+      "ttft_ms_p90": 42.95,
+      "ttft_ms_p99": 59.38,
+      "tpot_ms_p50": 10.77,
+      "tpot_ms_p90": 10.84,
+      "tpot_ms_p99": 10.86,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 570.0
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 666.3,
+          "tokens_out": 39988,
+          "tokens_in": 0,
+          "requests_completed": 115,
+          "ttft_ms_p50": 43.8,
+          "ttft_ms_p99": 345.6
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 707.4,
+          "tokens_out": 42459,
+          "tokens_in": 0,
+          "requests_completed": 126,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 40.9
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 710.2,
+          "tokens_out": 42613,
+          "tokens_in": 0,
+          "requests_completed": 122,
+          "ttft_ms_p50": 34.4,
+          "ttft_ms_p99": 35.9
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 728.3,
+          "tokens_out": 43675,
+          "tokens_in": 0,
+          "requests_completed": 126,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 36.1
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 707.4,
+          "tokens_out": 42459,
+          "tokens_in": 0,
+          "requests_completed": 121,
+          "ttft_ms_p50": 34.3,
+          "ttft_ms_p99": 41.4
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 711.2,
+          "tokens_out": 42670,
+          "tokens_in": 0,
+          "requests_completed": 128,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 40.3
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 708.9,
+          "tokens_out": 42534,
+          "tokens_in": 0,
+          "requests_completed": 122,
+          "ttft_ms_p50": 34.6,
+          "ttft_ms_p99": 35.8
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 701.6,
+          "tokens_out": 42083,
+          "tokens_in": 0,
+          "requests_completed": 121,
+          "ttft_ms_p50": 34.7,
+          "ttft_ms_p99": 40.0
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 723.0,
+          "tokens_out": 43388,
+          "tokens_in": 0,
+          "requests_completed": 123,
+          "ttft_ms_p50": 34.4,
+          "ttft_ms_p99": 39.8
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 702.0,
+          "tokens_out": 42108,
+          "tokens_in": 0,
+          "requests_completed": 124,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 39.2
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 716.4,
+          "tokens_out": 42995,
+          "tokens_in": 0,
+          "requests_completed": 125,
+          "ttft_ms_p50": 34.7,
+          "ttft_ms_p99": 39.5
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 718.5,
+          "tokens_out": 43125,
+          "tokens_in": 0,
+          "requests_completed": 124,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 40.7
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 716.5,
+          "tokens_out": 42976,
+          "tokens_in": 0,
+          "requests_completed": 123,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 36.0
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 708.7,
+          "tokens_out": 42543,
+          "tokens_in": 0,
+          "requests_completed": 125,
+          "ttft_ms_p50": 34.6,
+          "ttft_ms_p99": 41.1
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 711.1,
+          "tokens_out": 42666,
+          "tokens_in": 0,
+          "requests_completed": 125,
+          "ttft_ms_p50": 34.6,
+          "ttft_ms_p99": 36.9
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 715.6,
+          "tokens_out": 42902,
+          "tokens_in": 0,
+          "requests_completed": 120,
+          "ttft_ms_p50": 34.3,
+          "ttft_ms_p99": 36.2
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 699.2,
+          "tokens_out": 41971,
+          "tokens_in": 0,
+          "requests_completed": 122,
+          "ttft_ms_p50": 34.7,
+          "ttft_ms_p99": 37.1
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 721.0,
+          "tokens_out": 43276,
+          "tokens_in": 0,
+          "requests_completed": 123,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 39.7
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 689.9,
+          "tokens_out": 41386,
+          "tokens_in": 0,
+          "requests_completed": 124,
+          "ttft_ms_p50": 34.6,
+          "ttft_ms_p99": 36.0
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 718.4,
+          "tokens_out": 43086,
+          "tokens_in": 0,
+          "requests_completed": 123,
+          "ttft_ms_p50": 34.4,
+          "ttft_ms_p99": 36.1
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 720.0,
+          "tokens_out": 43224,
+          "tokens_in": 0,
+          "requests_completed": 125,
+          "ttft_ms_p50": 34.6,
+          "ttft_ms_p99": 36.0
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 726.1,
+          "tokens_out": 43543,
+          "tokens_in": 0,
+          "requests_completed": 123,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 40.6
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 713.8,
+          "tokens_out": 42835,
+          "tokens_in": 0,
+          "requests_completed": 128,
+          "ttft_ms_p50": 34.7,
+          "ttft_ms_p99": 36.2
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 694.3,
+          "tokens_out": 41650,
+          "tokens_in": 0,
+          "requests_completed": 119,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 38.5
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 709.4,
+          "tokens_out": 42580,
+          "tokens_in": 0,
+          "requests_completed": 124,
+          "ttft_ms_p50": 34.7,
+          "ttft_ms_p99": 40.6
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 720.1,
+          "tokens_out": 43188,
+          "tokens_in": 0,
+          "requests_completed": 123,
+          "ttft_ms_p50": 34.4,
+          "ttft_ms_p99": 36.1
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 714.8,
+          "tokens_out": 42892,
+          "tokens_in": 0,
+          "requests_completed": 126,
+          "ttft_ms_p50": 34.6,
+          "ttft_ms_p99": 40.1
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 705.6,
+          "tokens_out": 42347,
+          "tokens_in": 0,
+          "requests_completed": 122,
+          "ttft_ms_p50": 34.4,
+          "ttft_ms_p99": 40.7
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 725.1,
+          "tokens_out": 43505,
+          "tokens_in": 0,
+          "requests_completed": 125,
+          "ttft_ms_p50": 34.6,
+          "ttft_ms_p99": 40.7
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 712.3,
+      "throttle_ratio": 0.947,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -0.2
+    },
+    "burst": {
+      "sla_ttft_ms": 500,
+      "burst_steady_qps": 5,
+      "burst_peak_qps": 25,
+      "burst_duration_seconds": 30,
+      "burst_interval_seconds": 120,
+      "steady_requests_total": 1812,
+      "burst_requests_total": 2245,
+      "steady_ttft_p50_ms": 39.39,
+      "steady_ttft_p99_ms": 79.1,
+      "burst_ttft_p50_ms": 7082.87,
+      "burst_ttft_p99_ms": 17212.99,
+      "sla_met_during_burst": false,
+      "burst_degradation_ratio": 217.605,
+      "results_by_cycle": [
+        {
+          "cycle": 1,
+          "steady_requests": 581,
+          "burst_requests": 760,
+          "steady_ttft_p99_ms": 89.81,
+          "burst_ttft_p99_ms": 17855.37
+        },
+        {
+          "cycle": 2,
+          "steady_requests": 595,
+          "burst_requests": 734,
+          "steady_ttft_p99_ms": 47.72,
+          "burst_ttft_p99_ms": 16592.12
+        },
+        {
+          "cycle": 3,
+          "steady_requests": 636,
+          "burst_requests": 751,
+          "steady_ttft_p99_ms": 48.05,
+          "burst_ttft_p99_ms": 16579.9
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.61,
+    "baseline_delta": 0.01,
+    "valid": true,
+    "framework": "vLLM",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "04:35:38",
+    "run_id": "8f83bfab",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": "Partial run: ['offline', 'online', 'interactive', 'sustained', 'burst'] succeeded, ['speculative'] failed.",
+    "benchmark_start_time": "2026-05-18T04:33:49.881300+00:00",
+    "benchmark_end_time": "2026-05-18T04:35:38.648555+00:00",
+    "benchmark_elapsed_minutes": 75.5,
+    "model_load_seconds": 58.5,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'interactive', 'sustained', 'burst'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/offline",
+      "online": "results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/online",
+      "interactive": "results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/interactive",
+      "sustained": "results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/sustained",
+      "burst": "results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/burst"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/sustained/result.json
new file mode 100644
index 00000000..1fc95fb6
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab/sustained/result.json
@@ -0,0 +1,424 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_A",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T04:31:01.283634+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": null,
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 666.3,
+          "tokens_out": 39988,
+          "tokens_in": 0,
+          "requests_completed": 115,
+          "ttft_ms_p50": 43.8,
+          "ttft_ms_p99": 345.6
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 707.4,
+          "tokens_out": 42459,
+          "tokens_in": 0,
+          "requests_completed": 126,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 40.9
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 710.2,
+          "tokens_out": 42613,
+          "tokens_in": 0,
+          "requests_completed": 122,
+          "ttft_ms_p50": 34.4,
+          "ttft_ms_p99": 35.9
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 728.3,
+          "tokens_out": 43675,
+          "tokens_in": 0,
+          "requests_completed": 126,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 36.1
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 707.4,
+          "tokens_out": 42459,
+          "tokens_in": 0,
+          "requests_completed": 121,
+          "ttft_ms_p50": 34.3,
+          "ttft_ms_p99": 41.4
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 711.2,
+          "tokens_out": 42670,
+          "tokens_in": 0,
+          "requests_completed": 128,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 40.3
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 708.9,
+          "tokens_out": 42534,
+          "tokens_in": 0,
+          "requests_completed": 122,
+          "ttft_ms_p50": 34.6,
+          "ttft_ms_p99": 35.8
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 701.6,
+          "tokens_out": 42083,
+          "tokens_in": 0,
+          "requests_completed": 121,
+          "ttft_ms_p50": 34.7,
+          "ttft_ms_p99": 40.0
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 723.0,
+          "tokens_out": 43388,
+          "tokens_in": 0,
+          "requests_completed": 123,
+          "ttft_ms_p50": 34.4,
+          "ttft_ms_p99": 39.8
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 702.0,
+          "tokens_out": 42108,
+          "tokens_in": 0,
+          "requests_completed": 124,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 39.2
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 716.4,
+          "tokens_out": 42995,
+          "tokens_in": 0,
+          "requests_completed": 125,
+          "ttft_ms_p50": 34.7,
+          "ttft_ms_p99": 39.5
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 718.5,
+          "tokens_out": 43125,
+          "tokens_in": 0,
+          "requests_completed": 124,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 40.7
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 716.5,
+          "tokens_out": 42976,
+          "tokens_in": 0,
+          "requests_completed": 123,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 36.0
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 708.7,
+          "tokens_out": 42543,
+          "tokens_in": 0,
+          "requests_completed": 125,
+          "ttft_ms_p50": 34.6,
+          "ttft_ms_p99": 41.1
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 711.1,
+          "tokens_out": 42666,
+          "tokens_in": 0,
+          "requests_completed": 125,
+          "ttft_ms_p50": 34.6,
+          "ttft_ms_p99": 36.9
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 715.6,
+          "tokens_out": 42902,
+          "tokens_in": 0,
+          "requests_completed": 120,
+          "ttft_ms_p50": 34.3,
+          "ttft_ms_p99": 36.2
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 699.2,
+          "tokens_out": 41971,
+          "tokens_in": 0,
+          "requests_completed": 122,
+          "ttft_ms_p50": 34.7,
+          "ttft_ms_p99": 37.1
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 721.0,
+          "tokens_out": 43276,
+          "tokens_in": 0,
+          "requests_completed": 123,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 39.7
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 689.9,
+          "tokens_out": 41386,
+          "tokens_in": 0,
+          "requests_completed": 124,
+          "ttft_ms_p50": 34.6,
+          "ttft_ms_p99": 36.0
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 718.4,
+          "tokens_out": 43086,
+          "tokens_in": 0,
+          "requests_completed": 123,
+          "ttft_ms_p50": 34.4,
+          "ttft_ms_p99": 36.1
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 720.0,
+          "tokens_out": 43224,
+          "tokens_in": 0,
+          "requests_completed": 125,
+          "ttft_ms_p50": 34.6,
+          "ttft_ms_p99": 36.0
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 726.1,
+          "tokens_out": 43543,
+          "tokens_in": 0,
+          "requests_completed": 123,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 40.6
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 713.8,
+          "tokens_out": 42835,
+          "tokens_in": 0,
+          "requests_completed": 128,
+          "ttft_ms_p50": 34.7,
+          "ttft_ms_p99": 36.2
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 694.3,
+          "tokens_out": 41650,
+          "tokens_in": 0,
+          "requests_completed": 119,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 38.5
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 709.4,
+          "tokens_out": 42580,
+          "tokens_in": 0,
+          "requests_completed": 124,
+          "ttft_ms_p50": 34.7,
+          "ttft_ms_p99": 40.6
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 720.1,
+          "tokens_out": 43188,
+          "tokens_in": 0,
+          "requests_completed": 123,
+          "ttft_ms_p50": 34.4,
+          "ttft_ms_p99": 36.1
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 714.8,
+          "tokens_out": 42892,
+          "tokens_in": 0,
+          "requests_completed": 126,
+          "ttft_ms_p50": 34.6,
+          "ttft_ms_p99": 40.1
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 705.6,
+          "tokens_out": 42347,
+          "tokens_in": 0,
+          "requests_completed": 122,
+          "ttft_ms_p50": 34.4,
+          "ttft_ms_p99": 40.7
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 725.1,
+          "tokens_out": 43505,
+          "tokens_in": 0,
+          "requests_completed": 125,
+          "ttft_ms_p50": 34.6,
+          "ttft_ms_p99": 40.7
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 712.3,
+      "throttle_ratio": 0.947,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -0.2
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "05:45:22",
+    "run_id": "8f83bfab",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_A_nvidia_vllm020_0f6c56e4_8f83bfab",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T05:15:13.541588+00:00",
+    "benchmark_end_time": "2026-05-18T05:45:22.333860+00:00",
+    "benchmark_elapsed_minutes": 30.1,
+    "model_load_seconds": 41.7
+  }
+}
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/accuracy/accuracy.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/accuracy/accuracy.json
new file mode 100644
index 00000000..95fced50
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.56,
+  "baseline_delta": 0.0,
+  "valid": true,
+  "framework": "vLLM",
+  "precision": "BF16",
+  "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/offline/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/offline/result.json
new file mode 100644
index 00000000..b2759764
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/offline/result.json
@@ -0,0 +1,178 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T05:56:25.789998+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 3888.91,
+          "throughput_tokens_per_sec_per_chip": 3888.91,
+          "throughput_tokens_per_sec_total": 6956.79,
+          "elapsed_seconds_median": 9.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 3886.06,
+          "throughput_tokens_per_sec_per_chip": 3886.06,
+          "throughput_tokens_per_sec_total": 6943.56,
+          "elapsed_seconds_median": 9.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 3885.21,
+          "throughput_tokens_per_sec_per_chip": 3885.21,
+          "throughput_tokens_per_sec_total": 6935.62,
+          "elapsed_seconds_median": 9.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 3887.73,
+          "throughput_tokens_per_sec_per_chip": 3887.73,
+          "throughput_tokens_per_sec_total": 6949.35,
+          "elapsed_seconds_median": 9.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "06:01:31",
+    "run_id": "ffd81462",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T05:59:03.115858+00:00",
+    "benchmark_end_time": "2026-05-18T06:01:31.820089+00:00",
+    "benchmark_elapsed_minutes": 2.5,
+    "model_load_seconds": 35.2
+  }
+}
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/online/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/online/result.json
new file mode 100644
index 00000000..fcd1a857
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/online/result.json
@@ -0,0 +1,176 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T05:56:25.789998+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": null,
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 10,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 41.15,
+          "ttft_ms_p90": 61.6,
+          "ttft_ms_p99": 96.42,
+          "tpot_ms_p50": 13.32,
+          "tpot_ms_p90": 14.41,
+          "tpot_ms_p99": 14.81,
+          "elapsed_seconds_median": 68.9,
+          "sla_met": true
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 52.06,
+          "ttft_ms_p90": 63.35,
+          "ttft_ms_p99": 69.96,
+          "tpot_ms_p50": 17.62,
+          "tpot_ms_p90": 18.95,
+          "tpot_ms_p99": 19.52,
+          "elapsed_seconds_median": 36.5,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 86.32,
+          "ttft_ms_p90": 5056.12,
+          "ttft_ms_p99": 5979.32,
+          "tpot_ms_p50": 22.47,
+          "tpot_ms_p90": 25.06,
+          "tpot_ms_p99": 26.76,
+          "elapsed_seconds_median": 26.1,
+          "sla_met": false
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 891.06,
+          "ttft_ms_p90": 8527.64,
+          "ttft_ms_p99": 9213.57,
+          "tpot_ms_p50": 22.37,
+          "tpot_ms_p90": 24.86,
+          "tpot_ms_p99": 30.84,
+          "elapsed_seconds_median": 23.9,
+          "sla_met": false
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "06:10:41",
+    "run_id": "ffd81462",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T06:02:58.126932+00:00",
+    "benchmark_end_time": "2026-05-18T06:10:41.342285+00:00",
+    "benchmark_elapsed_minutes": 7.7,
+    "model_load_seconds": 60.4
+  }
+}
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/result.json
new file mode 100644
index 00000000..75e68ff6
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/result.json
@@ -0,0 +1,395 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T05:56:25.789998+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "sustained"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 3888.91,
+          "throughput_tokens_per_sec_per_chip": 3888.91,
+          "throughput_tokens_per_sec_total": 6956.79,
+          "elapsed_seconds_median": 9.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 3886.06,
+          "throughput_tokens_per_sec_per_chip": 3886.06,
+          "throughput_tokens_per_sec_total": 6943.56,
+          "elapsed_seconds_median": 9.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 3885.21,
+          "throughput_tokens_per_sec_per_chip": 3885.21,
+          "throughput_tokens_per_sec_total": 6935.62,
+          "elapsed_seconds_median": 9.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 3887.73,
+          "throughput_tokens_per_sec_per_chip": 3887.73,
+          "throughput_tokens_per_sec_total": 6949.35,
+          "elapsed_seconds_median": 9.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 10,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 41.15,
+          "ttft_ms_p90": 61.6,
+          "ttft_ms_p99": 96.42,
+          "tpot_ms_p50": 13.32,
+          "tpot_ms_p90": 14.41,
+          "tpot_ms_p99": 14.81,
+          "elapsed_seconds_median": 68.9,
+          "sla_met": true
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 52.06,
+          "ttft_ms_p90": 63.35,
+          "ttft_ms_p99": 69.96,
+          "tpot_ms_p50": 17.62,
+          "tpot_ms_p90": 18.95,
+          "tpot_ms_p99": 19.52,
+          "elapsed_seconds_median": 36.5,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 86.32,
+          "ttft_ms_p90": 5056.12,
+          "ttft_ms_p99": 5979.32,
+          "tpot_ms_p50": 22.47,
+          "tpot_ms_p90": 25.06,
+          "tpot_ms_p99": 26.76,
+          "elapsed_seconds_median": 26.1,
+          "sla_met": false
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 891.06,
+          "ttft_ms_p90": 8527.64,
+          "ttft_ms_p99": 9213.57,
+          "tpot_ms_p50": 22.37,
+          "tpot_ms_p90": 24.86,
+          "tpot_ms_p99": 30.84,
+          "elapsed_seconds_median": 23.9,
+          "sla_met": false
+        }
+      ]
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 655.0,
+          "tokens_out": 39334,
+          "tokens_in": 0,
+          "requests_completed": 109,
+          "ttft_ms_p50": 44.4,
+          "ttft_ms_p99": 401.8
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 711.0,
+          "tokens_out": 42652,
+          "tokens_in": 0,
+          "requests_completed": 124,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 42.5
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 698.3,
+          "tokens_out": 41902,
+          "tokens_in": 0,
+          "requests_completed": 116,
+          "ttft_ms_p50": 34.6,
+          "ttft_ms_p99": 41.3
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 718.8,
+          "tokens_out": 43114,
+          "tokens_in": 0,
+          "requests_completed": 116,
+          "ttft_ms_p50": 34.6,
+          "ttft_ms_p99": 36.5
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 724.4,
+          "tokens_out": 43451,
+          "tokens_in": 0,
+          "requests_completed": 124,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 41.7
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 701.8,
+          "tokens_out": 42133,
+          "tokens_in": 0,
+          "requests_completed": 115,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 36.1
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 706.9,
+          "tokens_out": 42401,
+          "tokens_in": 0,
+          "requests_completed": 122,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 42.9
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 720.6,
+          "tokens_out": 43232,
+          "tokens_in": 0,
+          "requests_completed": 120,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 41.5
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 697.1,
+          "tokens_out": 41830,
+          "tokens_in": 0,
+          "requests_completed": 116,
+          "ttft_ms_p50": 34.6,
+          "ttft_ms_p99": 43.9
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 726.2,
+          "tokens_out": 43597,
+          "tokens_in": 0,
+          "requests_completed": 123,
+          "ttft_ms_p50": 34.4,
+          "ttft_ms_p99": 35.9
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 701.9,
+          "tokens_out": 42083,
+          "tokens_in": 0,
+          "requests_completed": 116,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 36.1
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 728.3,
+          "tokens_out": 43715,
+          "tokens_in": 0,
+          "requests_completed": 121,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 43.1
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 688.9,
+          "tokens_out": 41331,
+          "tokens_in": 0,
+          "requests_completed": 119,
+          "ttft_ms_p50": 34.6,
+          "ttft_ms_p99": 41.5
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 717.5,
+          "tokens_out": 43059,
+          "tokens_in": 0,
+          "requests_completed": 119,
+          "ttft_ms_p50": 34.6,
+          "ttft_ms_p99": 41.7
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 706.9,
+      "throttle_ratio": 0.899,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -360.1
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.56,
+    "baseline_delta": 0.0,
+    "valid": true,
+    "framework": "vLLM",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "06:01:31",
+    "run_id": "ffd81462",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T05:59:03.115858+00:00",
+    "benchmark_end_time": "2026-05-18T06:01:31.820089+00:00",
+    "benchmark_elapsed_minutes": 25.3,
+    "model_load_seconds": 35.2,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'sustained'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/offline",
+      "online": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/online",
+      "sustained": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/sustained"
+    }
+  }
+}
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/sustained/result.json
new file mode 100644
index 00000000..ee4ebabf
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/sustained/result.json
@@ -0,0 +1,274 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T05:56:25.789998+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": null,
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 655.0,
+          "tokens_out": 39334,
+          "tokens_in": 0,
+          "requests_completed": 109,
+          "ttft_ms_p50": 44.4,
+          "ttft_ms_p99": 401.8
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 711.0,
+          "tokens_out": 42652,
+          "tokens_in": 0,
+          "requests_completed": 124,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 42.5
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 698.3,
+          "tokens_out": 41902,
+          "tokens_in": 0,
+          "requests_completed": 116,
+          "ttft_ms_p50": 34.6,
+          "ttft_ms_p99": 41.3
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 718.8,
+          "tokens_out": 43114,
+          "tokens_in": 0,
+          "requests_completed": 116,
+          "ttft_ms_p50": 34.6,
+          "ttft_ms_p99": 36.5
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 724.4,
+          "tokens_out": 43451,
+          "tokens_in": 0,
+          "requests_completed": 124,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 41.7
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 701.8,
+          "tokens_out": 42133,
+          "tokens_in": 0,
+          "requests_completed": 115,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 36.1
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 706.9,
+          "tokens_out": 42401,
+          "tokens_in": 0,
+          "requests_completed": 122,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 42.9
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 720.6,
+          "tokens_out": 43232,
+          "tokens_in": 0,
+          "requests_completed": 120,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 41.5
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 697.1,
+          "tokens_out": 41830,
+          "tokens_in": 0,
+          "requests_completed": 116,
+          "ttft_ms_p50": 34.6,
+          "ttft_ms_p99": 43.9
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 726.2,
+          "tokens_out": 43597,
+          "tokens_in": 0,
+          "requests_completed": 123,
+          "ttft_ms_p50": 34.4,
+          "ttft_ms_p99": 35.9
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 701.9,
+          "tokens_out": 42083,
+          "tokens_in": 0,
+          "requests_completed": 116,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 36.1
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 728.3,
+          "tokens_out": 43715,
+          "tokens_in": 0,
+          "requests_completed": 121,
+          "ttft_ms_p50": 34.5,
+          "ttft_ms_p99": 43.1
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 688.9,
+          "tokens_out": 41331,
+          "tokens_in": 0,
+          "requests_completed": 119,
+          "ttft_ms_p50": 34.6,
+          "ttft_ms_p99": 41.5
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 717.5,
+          "tokens_out": 43059,
+          "tokens_in": 0,
+          "requests_completed": 119,
+          "ttft_ms_p50": 34.6,
+          "ttft_ms_p99": 41.7
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 706.9,
+      "throttle_ratio": 0.899,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -360.1
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "06:26:58",
+    "run_id": "ffd81462",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T06:11:50.049074+00:00",
+    "benchmark_end_time": "2026-05-18T06:26:58.575027+00:00",
+    "benchmark_elapsed_minutes": 15.1,
+    "model_load_seconds": 41.2
+  }
+}
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/env_info.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/env_info.json
new file mode 100644
index 00000000..a73d4175
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/env_info.json
@@ -0,0 +1,49 @@
+{
+  "collected_at": "2026-05-18T05:56:25.789998+00:00",
+  "accelerators": [
+    {
+      "index": 0,
+      "name": "NVIDIA A100-SXM4-80GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 80.0,
+      "driver_version": "580.65.06",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    }
+  ],
+  "accelerator_platform": "nvidia",
+  "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+  "intra_node_interconnect": null,
+  "cpu": {
+    "model": "AMD EPYC 7742 64-Core Processor",
+    "physical_cores": 128,
+    "logical_cores": 255,
+    "numa_nodes": 2
+  },
+  "system_memory_gb": 1007.7,
+  "pcie_generation": "PCIe Gen 4",
+  "cpu_accelerator_bandwidth_gbs": null,
+  "network_interfaces": [
+    {
+      "name": "mlx5_0",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_1",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_2",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    }
+  ],
+  "os": "Ubuntu 22.04.4 LTS",
+  "python_version": "3.12.0",
+  "kernel_version": "5.15.0-60-generic",
+  "runtime_version": "CUDA 13.0",
+  "pytorch_version": "2.11.0+cu130"
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/result.json
new file mode 100644
index 00000000..32d0a7b7
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/result.json
@@ -0,0 +1,1519 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original",
+    "_note": "suite model_id. Each precision level uses its own quantized checkpoint."
+  },
+  "task": {
+    "scenarios_run": [
+      "accuracy",
+      "offline",
+      "online",
+      "sustained"
+    ],
+    "precision_levels_run": [
+      "BF16",
+      "FP8",
+      "W8A8",
+      "W8A16",
+      "W4A16"
+    ],
+    "precision_levels_skipped": [
+      "FP16"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": null
+  },
+  "metrics": {
+    "quantization": {
+      "results_by_precision": [
+        {
+          "precision": "BF16",
+          "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+          "best_throughput_tokens_per_sec": 3888.91,
+          "accuracy_score": 0.56,
+          "accuracy_baseline_delta": 0.0,
+          "accuracy_valid": true,
+          "quality_efficiency": 2177.8,
+          "speedup_vs_bf16": 1.0,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 1,
+              "throughput_tokens_per_sec": 3888.91,
+              "throughput_tokens_per_sec_per_chip": 3888.91,
+              "throughput_tokens_per_sec_total": 6956.79,
+              "elapsed_seconds_median": 9.2,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 4,
+              "throughput_tokens_per_sec": 3886.06,
+              "throughput_tokens_per_sec_per_chip": 3886.06,
+              "throughput_tokens_per_sec_total": 6943.56,
+              "elapsed_seconds_median": 9.2,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 16,
+              "throughput_tokens_per_sec": 3885.21,
+              "throughput_tokens_per_sec_per_chip": 3885.21,
+              "throughput_tokens_per_sec_total": 6935.62,
+              "elapsed_seconds_median": 9.2,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 64,
+              "throughput_tokens_per_sec": 3887.73,
+              "throughput_tokens_per_sec_per_chip": 3887.73,
+              "throughput_tokens_per_sec_total": 6949.35,
+              "elapsed_seconds_median": 9.2,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "bf16",
+          "effective_dtype": "bfloat16",
+          "quantization_method": null
+        },
+        {
+          "precision": "FP8",
+          "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8",
+          "best_throughput_tokens_per_sec": 4141.71,
+          "accuracy_score": 0.0,
+          "accuracy_baseline_delta": -0.58,
+          "accuracy_valid": false,
+          "quality_efficiency": null,
+          "speedup_vs_bf16": 1.065,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 1,
+              "throughput_tokens_per_sec": 4141.71,
+              "throughput_tokens_per_sec_per_chip": 4141.71,
+              "throughput_tokens_per_sec_total": 6418.35,
+              "elapsed_seconds_median": 12.4,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 4,
+              "throughput_tokens_per_sec": 4130.72,
+              "throughput_tokens_per_sec_per_chip": 4130.72,
+              "throughput_tokens_per_sec_total": 6401.32,
+              "elapsed_seconds_median": 12.4,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 16,
+              "throughput_tokens_per_sec": 4124.42,
+              "throughput_tokens_per_sec_per_chip": 4124.42,
+              "throughput_tokens_per_sec_total": 6391.57,
+              "elapsed_seconds_median": 12.4,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 64,
+              "throughput_tokens_per_sec": 4131.44,
+              "throughput_tokens_per_sec_per_chip": 4131.44,
+              "throughput_tokens_per_sec_total": 6402.45,
+              "elapsed_seconds_median": 12.4,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "fp8",
+          "effective_dtype": "bfloat16",
+          "quantization_method": "compressed-tensors"
+        },
+        {
+          "precision": "W8A8",
+          "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
+          "best_throughput_tokens_per_sec": 3208.11,
+          "accuracy_score": 0.59,
+          "accuracy_baseline_delta": 0.0,
+          "accuracy_valid": true,
+          "quality_efficiency": 1892.8,
+          "speedup_vs_bf16": 0.825,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 1,
+              "throughput_tokens_per_sec": 3208.11,
+              "throughput_tokens_per_sec_per_chip": 3208.11,
+              "throughput_tokens_per_sec_total": 5840.36,
+              "elapsed_seconds_median": 10.7,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 4,
+              "throughput_tokens_per_sec": 3140.16,
+              "throughput_tokens_per_sec_per_chip": 3140.16,
+              "throughput_tokens_per_sec_total": 5706.63,
+              "elapsed_seconds_median": 11.0,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 16,
+              "throughput_tokens_per_sec": 3193.23,
+              "throughput_tokens_per_sec_per_chip": 3193.23,
+              "throughput_tokens_per_sec_total": 5813.28,
+              "elapsed_seconds_median": 10.7,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 64,
+              "throughput_tokens_per_sec": 3175.58,
+              "throughput_tokens_per_sec_per_chip": 3175.58,
+              "throughput_tokens_per_sec_total": 5786.77,
+              "elapsed_seconds_median": 10.8,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "w8a8",
+          "effective_dtype": "bfloat16",
+          "quantization_method": "compressed-tensors"
+        },
+        {
+          "precision": "W8A16",
+          "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+          "best_throughput_tokens_per_sec": 3547.44,
+          "accuracy_score": 0.58,
+          "accuracy_baseline_delta": -0.01,
+          "accuracy_valid": true,
+          "quality_efficiency": 2057.5,
+          "speedup_vs_bf16": 0.912,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 1,
+              "throughput_tokens_per_sec": 3533.68,
+              "throughput_tokens_per_sec_per_chip": 3533.68,
+              "throughput_tokens_per_sec_total": 6328.84,
+              "elapsed_seconds_median": 10.1,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 4,
+              "throughput_tokens_per_sec": 3510.7,
+              "throughput_tokens_per_sec_per_chip": 3510.7,
+              "throughput_tokens_per_sec_total": 6292.5,
+              "elapsed_seconds_median": 10.1,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 16,
+              "throughput_tokens_per_sec": 3535.13,
+              "throughput_tokens_per_sec_per_chip": 3535.13,
+              "throughput_tokens_per_sec_total": 6324.07,
+              "elapsed_seconds_median": 10.1,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 64,
+              "throughput_tokens_per_sec": 3547.44,
+              "throughput_tokens_per_sec_per_chip": 3547.44,
+              "throughput_tokens_per_sec_total": 6336.33,
+              "elapsed_seconds_median": 10.1,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "w8a16",
+          "effective_dtype": "bfloat16",
+          "quantization_method": "compressed-tensors"
+        },
+        {
+          "precision": "W4A16",
+          "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+          "best_throughput_tokens_per_sec": 1889.19,
+          "accuracy_score": 0.56,
+          "accuracy_baseline_delta": -0.01,
+          "accuracy_valid": true,
+          "quality_efficiency": 1057.9,
+          "speedup_vs_bf16": 0.486,
+          "results_by_concurrency": [
+            {
+              "client_concurrency": 1,
+              "throughput_tokens_per_sec": 1889.19,
+              "throughput_tokens_per_sec_per_chip": 1889.19,
+              "throughput_tokens_per_sec_total": 3433.47,
+              "elapsed_seconds_median": 18.2,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 4,
+              "throughput_tokens_per_sec": 1862.45,
+              "throughput_tokens_per_sec_per_chip": 1862.45,
+              "throughput_tokens_per_sec_total": 3376.95,
+              "elapsed_seconds_median": 18.6,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 16,
+              "throughput_tokens_per_sec": 1861.34,
+              "throughput_tokens_per_sec_per_chip": 1861.34,
+              "throughput_tokens_per_sec_total": 3375.2,
+              "elapsed_seconds_median": 18.6,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            },
+            {
+              "client_concurrency": 64,
+              "throughput_tokens_per_sec": 1851.04,
+              "throughput_tokens_per_sec_per_chip": 1851.04,
+              "throughput_tokens_per_sec_total": 3367.3,
+              "elapsed_seconds_median": 18.6,
+              "peak_memory_gb": null,
+              "power_watts_avg": null,
+              "power_watts_peak": null,
+              "oom": false,
+              "_throughput_note": "output_only",
+              "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+            }
+          ],
+          "result_dir": "w4a16",
+          "effective_dtype": "float16",
+          "quantization_method": "gptq"
+        }
+      ]
+    },
+    "derived": {},
+    "quantization_online": {
+      "results_by_precision": [
+        {
+          "precision": "BF16",
+          "max_valid_qps": 10,
+          "results_by_qps": [
+            {
+              "target_qps": 5,
+              "achieved_qps": 5.0,
+              "ttft_ms_p50": 41.15,
+              "ttft_ms_p90": 61.6,
+              "ttft_ms_p99": 96.42,
+              "tpot_ms_p50": 13.32,
+              "tpot_ms_p90": 14.41,
+              "tpot_ms_p99": 14.81,
+              "elapsed_seconds_median": 68.9,
+              "sla_met": true
+            },
+            {
+              "target_qps": 10,
+              "achieved_qps": 10.0,
+              "ttft_ms_p50": 52.06,
+              "ttft_ms_p90": 63.35,
+              "ttft_ms_p99": 69.96,
+              "tpot_ms_p50": 17.62,
+              "tpot_ms_p90": 18.95,
+              "tpot_ms_p99": 19.52,
+              "elapsed_seconds_median": 36.5,
+              "sla_met": true
+            },
+            {
+              "target_qps": 25,
+              "achieved_qps": 25.0,
+              "ttft_ms_p50": 86.32,
+              "ttft_ms_p90": 5056.12,
+              "ttft_ms_p99": 5979.32,
+              "tpot_ms_p50": 22.47,
+              "tpot_ms_p90": 25.06,
+              "tpot_ms_p99": 26.76,
+              "elapsed_seconds_median": 26.1,
+              "sla_met": false
+            },
+            {
+              "target_qps": 50,
+              "achieved_qps": 50.0,
+              "ttft_ms_p50": 891.06,
+              "ttft_ms_p90": 8527.64,
+              "ttft_ms_p99": 9213.57,
+              "tpot_ms_p50": 22.37,
+              "tpot_ms_p90": 24.86,
+              "tpot_ms_p99": 30.84,
+              "elapsed_seconds_median": 23.9,
+              "sla_met": false
+            }
+          ]
+        },
+        {
+          "precision": "FP8",
+          "max_valid_qps": 5,
+          "results_by_qps": [
+            {
+              "target_qps": 5,
+              "achieved_qps": 5.0,
+              "ttft_ms_p50": 53.12,
+              "ttft_ms_p90": 73.69,
+              "ttft_ms_p99": 115.17,
+              "tpot_ms_p50": 18.68,
+              "tpot_ms_p90": 20.12,
+              "tpot_ms_p99": 20.68,
+              "elapsed_seconds_median": 72.2,
+              "sla_met": true
+            },
+            {
+              "target_qps": 10,
+              "achieved_qps": 10.0,
+              "ttft_ms_p50": 84.99,
+              "ttft_ms_p90": 1892.71,
+              "ttft_ms_p99": 3191.04,
+              "tpot_ms_p50": 26.19,
+              "tpot_ms_p90": 27.78,
+              "tpot_ms_p99": 28.06,
+              "elapsed_seconds_median": 43.0,
+              "sla_met": false
+            },
+            {
+              "target_qps": 25,
+              "achieved_qps": 25.0,
+              "ttft_ms_p50": 6847.07,
+              "ttft_ms_p90": 15210.93,
+              "ttft_ms_p99": 16362.21,
+              "tpot_ms_p50": 25.97,
+              "tpot_ms_p90": 26.93,
+              "tpot_ms_p99": 27.06,
+              "elapsed_seconds_median": 38.8,
+              "sla_met": false
+            },
+            {
+              "target_qps": 50,
+              "achieved_qps": 50.0,
+              "ttft_ms_p50": 10056.63,
+              "ttft_ms_p90": 20353.69,
+              "ttft_ms_p99": 21149.06,
+              "tpot_ms_p50": 25.43,
+              "tpot_ms_p90": 26.15,
+              "tpot_ms_p99": 26.2,
+              "elapsed_seconds_median": 37.3,
+              "sla_met": false
+            }
+          ]
+        },
+        {
+          "precision": "W8A8",
+          "max_valid_qps": 10,
+          "results_by_qps": [
+            {
+              "target_qps": 5,
+              "achieved_qps": 5.0,
+              "ttft_ms_p50": 55.34,
+              "ttft_ms_p90": 63.29,
+              "ttft_ms_p99": 69.75,
+              "tpot_ms_p50": 20.67,
+              "tpot_ms_p90": 20.88,
+              "tpot_ms_p99": 21.3,
+              "elapsed_seconds_median": 72.9,
+              "sla_met": true
+            },
+            {
+              "target_qps": 10,
+              "achieved_qps": 10.0,
+              "ttft_ms_p50": 57.42,
+              "ttft_ms_p90": 66.6,
+              "ttft_ms_p99": 69.92,
+              "tpot_ms_p50": 21.28,
+              "tpot_ms_p90": 22.19,
+              "tpot_ms_p99": 22.28,
+              "elapsed_seconds_median": 39.6,
+              "sla_met": true
+            },
+            {
+              "target_qps": 25,
+              "achieved_qps": 25.0,
+              "ttft_ms_p50": 74.55,
+              "ttft_ms_p90": 4438.81,
+              "ttft_ms_p99": 5421.82,
+              "tpot_ms_p50": 22.53,
+              "tpot_ms_p90": 23.69,
+              "tpot_ms_p99": 25.14,
+              "elapsed_seconds_median": 27.7,
+              "sla_met": false
+            },
+            {
+              "target_qps": 50,
+              "achieved_qps": 50.0,
+              "ttft_ms_p50": 985.11,
+              "ttft_ms_p90": 8331.22,
+              "ttft_ms_p99": 8868.55,
+              "tpot_ms_p50": 23.38,
+              "tpot_ms_p90": 24.38,
+              "tpot_ms_p99": 26.79,
+              "elapsed_seconds_median": 25.6,
+              "sla_met": false
+            }
+          ]
+        },
+        {
+          "precision": "W8A16",
+          "max_valid_qps": 10,
+          "results_by_qps": [
+            {
+              "target_qps": 5,
+              "achieved_qps": 5.0,
+              "ttft_ms_p50": 46.32,
+              "ttft_ms_p90": 62.37,
+              "ttft_ms_p99": 104.49,
+              "tpot_ms_p50": 16.66,
+              "tpot_ms_p90": 17.7,
+              "tpot_ms_p99": 18.28,
+              "elapsed_seconds_median": 70.3,
+              "sla_met": true
+            },
+            {
+              "target_qps": 10,
+              "achieved_qps": 10.0,
+              "ttft_ms_p50": 57.25,
+              "ttft_ms_p90": 72.45,
+              "ttft_ms_p99": 81.09,
+              "tpot_ms_p50": 20.79,
+              "tpot_ms_p90": 22.4,
+              "tpot_ms_p99": 23.2,
+              "elapsed_seconds_median": 37.9,
+              "sla_met": true
+            },
+            {
+              "target_qps": 25,
+              "achieved_qps": 25.0,
+              "ttft_ms_p50": 93.4,
+              "ttft_ms_p90": 6429.1,
+              "ttft_ms_p99": 7429.34,
+              "tpot_ms_p50": 25.13,
+              "tpot_ms_p90": 28.01,
+              "tpot_ms_p99": 30.79,
+              "elapsed_seconds_median": 28.8,
+              "sla_met": false
+            },
+            {
+              "target_qps": 50,
+              "achieved_qps": 50.0,
+              "ttft_ms_p50": 1306.21,
+              "ttft_ms_p90": 9937.77,
+              "ttft_ms_p99": 10640.43,
+              "tpot_ms_p50": 25.06,
+              "tpot_ms_p90": 27.42,
+              "tpot_ms_p99": 34.24,
+              "elapsed_seconds_median": 26.4,
+              "sla_met": false
+            }
+          ]
+        },
+        {
+          "precision": "W4A16",
+          "max_valid_qps": 10,
+          "results_by_qps": [
+            {
+              "target_qps": 5,
+              "achieved_qps": 5.0,
+              "ttft_ms_p50": 50.42,
+              "ttft_ms_p90": 64.62,
+              "ttft_ms_p99": 104.73,
+              "tpot_ms_p50": 18.15,
+              "tpot_ms_p90": 19.16,
+              "tpot_ms_p99": 19.62,
+              "elapsed_seconds_median": 71.1,
+              "sla_met": true
+            },
+            {
+              "target_qps": 10,
+              "achieved_qps": 10.0,
+              "ttft_ms_p50": 57.89,
+              "ttft_ms_p90": 72.82,
+              "ttft_ms_p99": 84.96,
+              "tpot_ms_p50": 21.11,
+              "tpot_ms_p90": 23.03,
+              "tpot_ms_p99": 24.31,
+              "elapsed_seconds_median": 38.5,
+              "sla_met": true
+            },
+            {
+              "target_qps": 25,
+              "achieved_qps": 25.0,
+              "ttft_ms_p50": 97.24,
+              "ttft_ms_p90": 6365.08,
+              "ttft_ms_p99": 7073.94,
+              "tpot_ms_p50": 25.46,
+              "tpot_ms_p90": 27.98,
+              "tpot_ms_p99": 31.21,
+              "elapsed_seconds_median": 29.2,
+              "sla_met": false
+            },
+            {
+              "target_qps": 50,
+              "achieved_qps": 50.0,
+              "ttft_ms_p50": 918.12,
+              "ttft_ms_p90": 9805.4,
+              "ttft_ms_p99": 10437.5,
+              "tpot_ms_p50": 25.2,
+              "tpot_ms_p90": 27.67,
+              "tpot_ms_p99": 32.45,
+              "elapsed_seconds_median": 26.7,
+              "sla_met": false
+            }
+          ]
+        }
+      ]
+    },
+    "quantization_sustained": {
+      "results_by_precision": [
+        {
+          "precision": "BF16",
+          "sustained_throughput_tokens_per_sec": 706.9,
+          "throttle_ratio": 0.899,
+          "throttle_onset_minute": 1.0,
+          "ttft_p99_drift_ms": -360.1,
+          "sustained_concurrency": 8,
+          "duration_minutes": 15,
+          "samples": [
+            {
+              "minute": 1.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 655.0,
+              "tokens_out": 39334,
+              "tokens_in": 0,
+              "requests_completed": 109,
+              "ttft_ms_p50": 44.4,
+              "ttft_ms_p99": 401.8
+            },
+            {
+              "minute": 2.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 711.0,
+              "tokens_out": 42652,
+              "tokens_in": 0,
+              "requests_completed": 124,
+              "ttft_ms_p50": 34.5,
+              "ttft_ms_p99": 42.5
+            },
+            {
+              "minute": 3.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 698.3,
+              "tokens_out": 41902,
+              "tokens_in": 0,
+              "requests_completed": 116,
+              "ttft_ms_p50": 34.6,
+              "ttft_ms_p99": 41.3
+            },
+            {
+              "minute": 4.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 718.8,
+              "tokens_out": 43114,
+              "tokens_in": 0,
+              "requests_completed": 116,
+              "ttft_ms_p50": 34.6,
+              "ttft_ms_p99": 36.5
+            },
+            {
+              "minute": 5.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 724.4,
+              "tokens_out": 43451,
+              "tokens_in": 0,
+              "requests_completed": 124,
+              "ttft_ms_p50": 34.5,
+              "ttft_ms_p99": 41.7
+            },
+            {
+              "minute": 6.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 701.8,
+              "tokens_out": 42133,
+              "tokens_in": 0,
+              "requests_completed": 115,
+              "ttft_ms_p50": 34.5,
+              "ttft_ms_p99": 36.1
+            },
+            {
+              "minute": 7.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 706.9,
+              "tokens_out": 42401,
+              "tokens_in": 0,
+              "requests_completed": 122,
+              "ttft_ms_p50": 34.5,
+              "ttft_ms_p99": 42.9
+            },
+            {
+              "minute": 8.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 720.6,
+              "tokens_out": 43232,
+              "tokens_in": 0,
+              "requests_completed": 120,
+              "ttft_ms_p50": 34.5,
+              "ttft_ms_p99": 41.5
+            },
+            {
+              "minute": 9.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 697.1,
+              "tokens_out": 41830,
+              "tokens_in": 0,
+              "requests_completed": 116,
+              "ttft_ms_p50": 34.6,
+              "ttft_ms_p99": 43.9
+            },
+            {
+              "minute": 10.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 726.2,
+              "tokens_out": 43597,
+              "tokens_in": 0,
+              "requests_completed": 123,
+              "ttft_ms_p50": 34.4,
+              "ttft_ms_p99": 35.9
+            },
+            {
+              "minute": 11.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 701.9,
+              "tokens_out": 42083,
+              "tokens_in": 0,
+              "requests_completed": 116,
+              "ttft_ms_p50": 34.5,
+              "ttft_ms_p99": 36.1
+            },
+            {
+              "minute": 12.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 728.3,
+              "tokens_out": 43715,
+              "tokens_in": 0,
+              "requests_completed": 121,
+              "ttft_ms_p50": 34.5,
+              "ttft_ms_p99": 43.1
+            },
+            {
+              "minute": 13.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 688.9,
+              "tokens_out": 41331,
+              "tokens_in": 0,
+              "requests_completed": 119,
+              "ttft_ms_p50": 34.6,
+              "ttft_ms_p99": 41.5
+            },
+            {
+              "minute": 14.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 717.5,
+              "tokens_out": 43059,
+              "tokens_in": 0,
+              "requests_completed": 119,
+              "ttft_ms_p50": 34.6,
+              "ttft_ms_p99": 41.7
+            }
+          ]
+        },
+        {
+          "precision": "FP8",
+          "sustained_throughput_tokens_per_sec": 438.9,
+          "throttle_ratio": 0.856,
+          "throttle_onset_minute": 1.0,
+          "ttft_p99_drift_ms": -644.7,
+          "sustained_concurrency": 8,
+          "duration_minutes": 15,
+          "samples": [
+            {
+              "minute": 1.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 409.6,
+              "tokens_out": 24576,
+              "tokens_in": 0,
+              "requests_completed": 48,
+              "ttft_ms_p50": 178.1,
+              "ttft_ms_p99": 701.6
+            },
+            {
+              "minute": 2.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 409.6,
+              "tokens_out": 24576,
+              "tokens_in": 0,
+              "requests_completed": 48,
+              "ttft_ms_p50": 186.0,
+              "ttft_ms_p99": 236.5
+            },
+            {
+              "minute": 3.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 477.8,
+              "tokens_out": 28672,
+              "tokens_in": 0,
+              "requests_completed": 56,
+              "ttft_ms_p50": 51.0,
+              "ttft_ms_p99": 125.3
+            },
+            {
+              "minute": 4.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 409.5,
+              "tokens_out": 24576,
+              "tokens_in": 0,
+              "requests_completed": 48,
+              "ttft_ms_p50": 50.3,
+              "ttft_ms_p99": 55.6
+            },
+            {
+              "minute": 5.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 478.0,
+              "tokens_out": 28672,
+              "tokens_in": 0,
+              "requests_completed": 56,
+              "ttft_ms_p50": 50.6,
+              "ttft_ms_p99": 54.6
+            },
+            {
+              "minute": 6.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 409.3,
+              "tokens_out": 24576,
+              "tokens_in": 0,
+              "requests_completed": 48,
+              "ttft_ms_p50": 51.2,
+              "ttft_ms_p99": 57.3
+            },
+            {
+              "minute": 7.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 478.2,
+              "tokens_out": 28672,
+              "tokens_in": 0,
+              "requests_completed": 56,
+              "ttft_ms_p50": 50.5,
+              "ttft_ms_p99": 56.4
+            },
+            {
+              "minute": 8.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 409.4,
+              "tokens_out": 24576,
+              "tokens_in": 0,
+              "requests_completed": 48,
+              "ttft_ms_p50": 51.3,
+              "ttft_ms_p99": 62.9
+            },
+            {
+              "minute": 9.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 478.0,
+              "tokens_out": 28672,
+              "tokens_in": 0,
+              "requests_completed": 56,
+              "ttft_ms_p50": 51.1,
+              "ttft_ms_p99": 59.5
+            },
+            {
+              "minute": 10.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 409.4,
+              "tokens_out": 24576,
+              "tokens_in": 0,
+              "requests_completed": 48,
+              "ttft_ms_p50": 50.5,
+              "ttft_ms_p99": 55.7
+            },
+            {
+              "minute": 11.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 478.2,
+              "tokens_out": 28672,
+              "tokens_in": 0,
+              "requests_completed": 56,
+              "ttft_ms_p50": 51.4,
+              "ttft_ms_p99": 58.1
+            },
+            {
+              "minute": 12.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 409.6,
+              "tokens_out": 24576,
+              "tokens_in": 0,
+              "requests_completed": 48,
+              "ttft_ms_p50": 51.6,
+              "ttft_ms_p99": 59.1
+            },
+            {
+              "minute": 13.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 477.6,
+              "tokens_out": 28672,
+              "tokens_in": 0,
+              "requests_completed": 56,
+              "ttft_ms_p50": 51.4,
+              "ttft_ms_p99": 58.2
+            },
+            {
+              "minute": 14.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 409.8,
+              "tokens_out": 24576,
+              "tokens_in": 0,
+              "requests_completed": 48,
+              "ttft_ms_p50": 51.3,
+              "ttft_ms_p99": 56.9
+            }
+          ]
+        },
+        {
+          "precision": "W8A8",
+          "sustained_throughput_tokens_per_sec": 399.4,
+          "throttle_ratio": 0.879,
+          "throttle_onset_minute": 1.0,
+          "ttft_p99_drift_ms": -331.5,
+          "sustained_concurrency": 8,
+          "duration_minutes": 15,
+          "samples": [
+            {
+              "minute": 1.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 366.9,
+              "tokens_out": 22031,
+              "tokens_in": 0,
+              "requests_completed": 63,
+              "ttft_ms_p50": 59.1,
+              "ttft_ms_p99": 396.4
+            },
+            {
+              "minute": 2.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 402.7,
+              "tokens_out": 24148,
+              "tokens_in": 0,
+              "requests_completed": 71,
+              "ttft_ms_p50": 58.8,
+              "ttft_ms_p99": 63.4
+            },
+            {
+              "minute": 3.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 400.8,
+              "tokens_out": 24050,
+              "tokens_in": 0,
+              "requests_completed": 66,
+              "ttft_ms_p50": 58.5,
+              "ttft_ms_p99": 61.0
+            },
+            {
+              "minute": 4.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 402.1,
+              "tokens_out": 24127,
+              "tokens_in": 0,
+              "requests_completed": 66,
+              "ttft_ms_p50": 58.0,
+              "ttft_ms_p99": 61.2
+            },
+            {
+              "minute": 5.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 395.3,
+              "tokens_out": 23722,
+              "tokens_in": 0,
+              "requests_completed": 71,
+              "ttft_ms_p50": 58.7,
+              "ttft_ms_p99": 64.5
+            },
+            {
+              "minute": 6.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 398.4,
+              "tokens_out": 23893,
+              "tokens_in": 0,
+              "requests_completed": 64,
+              "ttft_ms_p50": 58.7,
+              "ttft_ms_p99": 63.0
+            },
+            {
+              "minute": 7.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 410.0,
+              "tokens_out": 24605,
+              "tokens_in": 0,
+              "requests_completed": 66,
+              "ttft_ms_p50": 58.3,
+              "ttft_ms_p99": 62.1
+            },
+            {
+              "minute": 8.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 398.6,
+              "tokens_out": 23918,
+              "tokens_in": 0,
+              "requests_completed": 71,
+              "ttft_ms_p50": 59.5,
+              "ttft_ms_p99": 65.7
+            },
+            {
+              "minute": 9.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 390.3,
+              "tokens_out": 23418,
+              "tokens_in": 0,
+              "requests_completed": 64,
+              "ttft_ms_p50": 58.3,
+              "ttft_ms_p99": 62.8
+            },
+            {
+              "minute": 10.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 417.2,
+              "tokens_out": 25045,
+              "tokens_in": 0,
+              "requests_completed": 69,
+              "ttft_ms_p50": 58.2,
+              "ttft_ms_p99": 61.0
+            },
+            {
+              "minute": 11.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 391.1,
+              "tokens_out": 23462,
+              "tokens_in": 0,
+              "requests_completed": 70,
+              "ttft_ms_p50": 57.8,
+              "ttft_ms_p99": 62.4
+            },
+            {
+              "minute": 12.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 408.7,
+              "tokens_out": 24514,
+              "tokens_in": 0,
+              "requests_completed": 66,
+              "ttft_ms_p50": 58.5,
+              "ttft_ms_p99": 65.6
+            },
+            {
+              "minute": 13.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 415.2,
+              "tokens_out": 24925,
+              "tokens_in": 0,
+              "requests_completed": 71,
+              "ttft_ms_p50": 58.2,
+              "ttft_ms_p99": 61.3
+            },
+            {
+              "minute": 14.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 395.0,
+              "tokens_out": 23687,
+              "tokens_in": 0,
+              "requests_completed": 67,
+              "ttft_ms_p50": 58.0,
+              "ttft_ms_p99": 64.9
+            }
+          ]
+        },
+        {
+          "precision": "W8A16",
+          "sustained_throughput_tokens_per_sec": 494.1,
+          "throttle_ratio": 0.905,
+          "throttle_onset_minute": null,
+          "ttft_p99_drift_ms": -320.5,
+          "sustained_concurrency": 8,
+          "duration_minutes": 15,
+          "samples": [
+            {
+              "minute": 1.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 456.8,
+              "tokens_out": 27416,
+              "tokens_in": 0,
+              "requests_completed": 81,
+              "ttft_ms_p50": 50.0,
+              "ttft_ms_p99": 372.0
+            },
+            {
+              "minute": 2.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 504.0,
+              "tokens_out": 30242,
+              "tokens_in": 0,
+              "requests_completed": 82,
+              "ttft_ms_p50": 47.1,
+              "ttft_ms_p99": 71.8
+            },
+            {
+              "minute": 3.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 486.6,
+              "tokens_out": 29207,
+              "tokens_in": 0,
+              "requests_completed": 83,
+              "ttft_ms_p50": 47.1,
+              "ttft_ms_p99": 54.4
+            },
+            {
+              "minute": 4.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 499.0,
+              "tokens_out": 29921,
+              "tokens_in": 0,
+              "requests_completed": 85,
+              "ttft_ms_p50": 46.8,
+              "ttft_ms_p99": 52.6
+            },
+            {
+              "minute": 5.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 496.0,
+              "tokens_out": 29768,
+              "tokens_in": 0,
+              "requests_completed": 79,
+              "ttft_ms_p50": 46.9,
+              "ttft_ms_p99": 49.5
+            },
+            {
+              "minute": 6.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 498.3,
+              "tokens_out": 29901,
+              "tokens_in": 0,
+              "requests_completed": 84,
+              "ttft_ms_p50": 47.0,
+              "ttft_ms_p99": 52.3
+            },
+            {
+              "minute": 7.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 495.4,
+              "tokens_out": 29715,
+              "tokens_in": 0,
+              "requests_completed": 85,
+              "ttft_ms_p50": 46.7,
+              "ttft_ms_p99": 50.2
+            },
+            {
+              "minute": 8.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 496.1,
+              "tokens_out": 29779,
+              "tokens_in": 0,
+              "requests_completed": 81,
+              "ttft_ms_p50": 46.9,
+              "ttft_ms_p99": 53.9
+            },
+            {
+              "minute": 9.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 503.2,
+              "tokens_out": 30195,
+              "tokens_in": 0,
+              "requests_completed": 85,
+              "ttft_ms_p50": 47.3,
+              "ttft_ms_p99": 54.0
+            },
+            {
+              "minute": 10.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 489.6,
+              "tokens_out": 29369,
+              "tokens_in": 0,
+              "requests_completed": 83,
+              "ttft_ms_p50": 46.9,
+              "ttft_ms_p99": 52.3
+            },
+            {
+              "minute": 11.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 504.8,
+              "tokens_out": 30299,
+              "tokens_in": 0,
+              "requests_completed": 81,
+              "ttft_ms_p50": 46.8,
+              "ttft_ms_p99": 52.2
+            },
+            {
+              "minute": 12.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 500.4,
+              "tokens_out": 30017,
+              "tokens_in": 0,
+              "requests_completed": 85,
+              "ttft_ms_p50": 46.7,
+              "ttft_ms_p99": 50.1
+            },
+            {
+              "minute": 13.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 494.6,
+              "tokens_out": 29670,
+              "tokens_in": 0,
+              "requests_completed": 84,
+              "ttft_ms_p50": 46.8,
+              "ttft_ms_p99": 51.2
+            },
+            {
+              "minute": 14.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 492.1,
+              "tokens_out": 29528,
+              "tokens_in": 0,
+              "requests_completed": 81,
+              "ttft_ms_p50": 47.0,
+              "ttft_ms_p99": 51.5
+            }
+          ]
+        },
+        {
+          "precision": "W4A16",
+          "sustained_throughput_tokens_per_sec": 437.3,
+          "throttle_ratio": 0.897,
+          "throttle_onset_minute": 1.0,
+          "ttft_p99_drift_ms": -632.2,
+          "sustained_concurrency": 8,
+          "duration_minutes": 15,
+          "samples": [
+            {
+              "minute": 1.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 409.4,
+              "tokens_out": 24574,
+              "tokens_in": 0,
+              "requests_completed": 73,
+              "ttft_ms_p50": 55.4,
+              "ttft_ms_p99": 690.1
+            },
+            {
+              "minute": 2.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 431.5,
+              "tokens_out": 25899,
+              "tokens_in": 0,
+              "requests_completed": 75,
+              "ttft_ms_p50": 53.4,
+              "ttft_ms_p99": 74.1
+            },
+            {
+              "minute": 3.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 456.2,
+              "tokens_out": 27364,
+              "tokens_in": 0,
+              "requests_completed": 77,
+              "ttft_ms_p50": 53.1,
+              "ttft_ms_p99": 56.8
+            },
+            {
+              "minute": 4.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 439.7,
+              "tokens_out": 26374,
+              "tokens_in": 0,
+              "requests_completed": 75,
+              "ttft_ms_p50": 53.2,
+              "ttft_ms_p99": 58.1
+            },
+            {
+              "minute": 5.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 430.2,
+              "tokens_out": 25819,
+              "tokens_in": 0,
+              "requests_completed": 75,
+              "ttft_ms_p50": 53.5,
+              "ttft_ms_p99": 56.1
+            },
+            {
+              "minute": 6.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 432.7,
+              "tokens_out": 25962,
+              "tokens_in": 0,
+              "requests_completed": 74,
+              "ttft_ms_p50": 53.9,
+              "ttft_ms_p99": 57.4
+            },
+            {
+              "minute": 7.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 452.2,
+              "tokens_out": 27140,
+              "tokens_in": 0,
+              "requests_completed": 77,
+              "ttft_ms_p50": 53.4,
+              "ttft_ms_p99": 57.2
+            },
+            {
+              "minute": 8.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 436.1,
+              "tokens_out": 26169,
+              "tokens_in": 0,
+              "requests_completed": 74,
+              "ttft_ms_p50": 53.6,
+              "ttft_ms_p99": 57.7
+            },
+            {
+              "minute": 9.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 432.3,
+              "tokens_out": 25934,
+              "tokens_in": 0,
+              "requests_completed": 76,
+              "ttft_ms_p50": 53.2,
+              "ttft_ms_p99": 57.8
+            },
+            {
+              "minute": 10.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 431.9,
+              "tokens_out": 25908,
+              "tokens_in": 0,
+              "requests_completed": 73,
+              "ttft_ms_p50": 53.4,
+              "ttft_ms_p99": 56.8
+            },
+            {
+              "minute": 11.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 449.0,
+              "tokens_out": 26936,
+              "tokens_in": 0,
+              "requests_completed": 77,
+              "ttft_ms_p50": 53.4,
+              "ttft_ms_p99": 57.9
+            },
+            {
+              "minute": 12.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 445.3,
+              "tokens_out": 26739,
+              "tokens_in": 0,
+              "requests_completed": 75,
+              "ttft_ms_p50": 53.1,
+              "ttft_ms_p99": 59.4
+            },
+            {
+              "minute": 13.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 441.9,
+              "tokens_out": 26490,
+              "tokens_in": 0,
+              "requests_completed": 78,
+              "ttft_ms_p50": 53.3,
+              "ttft_ms_p99": 55.8
+            },
+            {
+              "minute": 14.0,
+              "is_warmup": false,
+              "throughput_tokens_per_sec": 433.6,
+              "tokens_out": 26022,
+              "tokens_in": 0,
+              "requests_completed": 73,
+              "ttft_ms_p50": 53.0,
+              "ttft_ms_p99": 57.9
+            }
+          ]
+        }
+      ]
+    }
+  },
+  "accuracy": null,
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "06:01:31",
+    "run_id": "ffd81462",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T05:59:03.115858+00:00",
+    "benchmark_end_time": "2026-05-18T06:01:31.820089+00:00",
+    "benchmark_elapsed_minutes": 134.3,
+    "model_load_seconds": 35.2,
+    "benchmark_elapsed_minutes_note": "Sum of per-precision benchmark_elapsed_minutes (excludes sleep gaps and orchestrator overhead).",
+    "scenario_dirs": {
+      "bf16/offline": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/offline",
+      "bf16/online": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/online",
+      "bf16/sustained": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/bf16/sustained",
+      "fp8/offline": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/fp8/offline",
+      "fp8/online": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/fp8/online",
+      "fp8/sustained": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/fp8/sustained",
+      "w8a8/offline": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/offline",
+      "w8a8/online": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/online",
+      "w8a8/sustained": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/sustained",
+      "w8a16/offline": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/offline",
+      "w8a16/online": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/online",
+      "w8a16/sustained": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/sustained",
+      "w4a16/offline": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/offline",
+      "w4a16/online": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/online",
+      "w4a16/sustained": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/sustained"
+    },
+    "precision_dirs": {
+      "BF16": "bf16",
+      "FP8": "fp8",
+      "W8A8": "w8a8",
+      "W8A16": "w8a16",
+      "W4A16": "w4a16"
+    },
+    "precision_model_map": {
+      "BF16": {
+        "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+        "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+        "dtype_override": "bfloat16"
+      },
+      "FP8": {
+        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8",
+        "model_revision": "12fd6884d2585dd4d020373e7f39f74507b31866",
+        "engine_kwargs": {
+          "quantization": "compressed-tensors"
+        },
+        "_note": "Static per-tensor FP8 (weights + activations). Requires Ampere+ (A100, A800, H20). Skipped automatically on FP16-only hardware."
+      },
+      "W8A8": {
+        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
+        "model_revision": "e2bfb7d92784ad7d1b606c2f9644d3cefb2ec708",
+        "engine_kwargs": {
+          "quantization": "compressed-tensors"
+        },
+        "_note": "INT8 weights + INT8 activations via compressed-tensors. Exercises native int8 tensor cores."
+      },
+      "W8A16": {
+        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+        "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+        "engine_kwargs": {
+          "quantization": "compressed-tensors"
+        },
+        "_note": "INT8 weights, FP16 activations. Weight-only quantization \u2014 reduces memory bandwidth, not compute dtype."
+      },
+      "W4A16": {
+        "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+        "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+        "engine_kwargs": {
+          "quantization": "gptq"
+        },
+        "_note": "INT4 weights, FP16 activations via GPTQ Marlin kernels. Weight-only quantization \u2014 larger memory saving than W8A16."
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/accuracy/accuracy.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/accuracy/accuracy.json
new file mode 100644
index 00000000..b3311bd9
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.56,
+  "baseline_delta": -0.01,
+  "valid": true,
+  "framework": "vLLM",
+  "precision": "W4A16",
+  "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/offline/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/offline/result.json
new file mode 100644
index 00000000..cc0424b3
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/offline/result.json
@@ -0,0 +1,183 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T05:56:25.789998+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+    "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+    "model_name": null,
+    "model_note": "INT4 weight-only quantization by RedHatAI using AWQ. Weights INT4, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "float16",
+    "quantization_method": "gptq",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": {
+      "tensor_parallel_size": 1,
+      "enforce_eager": true,
+      "max_num_seqs": 512,
+      "gpu_memory_utilization": 0.9
+    },
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 1889.19,
+          "throughput_tokens_per_sec_per_chip": 1889.19,
+          "throughput_tokens_per_sec_total": 3433.47,
+          "elapsed_seconds_median": 18.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 1862.45,
+          "throughput_tokens_per_sec_per_chip": 1862.45,
+          "throughput_tokens_per_sec_total": 3376.95,
+          "elapsed_seconds_median": 18.6,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 1861.34,
+          "throughput_tokens_per_sec_per_chip": 1861.34,
+          "throughput_tokens_per_sec_total": 3375.2,
+          "elapsed_seconds_median": 18.6,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 1851.04,
+          "throughput_tokens_per_sec_per_chip": 1851.04,
+          "throughput_tokens_per_sec_total": 3367.3,
+          "elapsed_seconds_median": 18.6,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "14:44:34",
+    "run_id": "b1eb2d96",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_b1eb2d96",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T14:39:39.920688+00:00",
+    "benchmark_end_time": "2026-05-18T14:44:34.781477+00:00",
+    "benchmark_elapsed_minutes": 4.9,
+    "model_load_seconds": 18.5
+  }
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/online/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/online/result.json
new file mode 100644
index 00000000..72619342
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/online/result.json
@@ -0,0 +1,181 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T05:56:25.789998+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+    "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+    "model_name": null,
+    "model_note": "INT4 weight-only quantization by RedHatAI using AWQ. Weights INT4, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": null,
+    "quantization_method": "gptq",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": {
+      "tensor_parallel_size": 1,
+      "enforce_eager": true,
+      "max_num_seqs": 512,
+      "gpu_memory_utilization": 0.9
+    },
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 10,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 50.42,
+          "ttft_ms_p90": 64.62,
+          "ttft_ms_p99": 104.73,
+          "tpot_ms_p50": 18.15,
+          "tpot_ms_p90": 19.16,
+          "tpot_ms_p99": 19.62,
+          "elapsed_seconds_median": 71.1,
+          "sla_met": true
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 57.89,
+          "ttft_ms_p90": 72.82,
+          "ttft_ms_p99": 84.96,
+          "tpot_ms_p50": 21.11,
+          "tpot_ms_p90": 23.03,
+          "tpot_ms_p99": 24.31,
+          "elapsed_seconds_median": 38.5,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 97.24,
+          "ttft_ms_p90": 6365.08,
+          "ttft_ms_p99": 7073.94,
+          "tpot_ms_p50": 25.46,
+          "tpot_ms_p90": 27.98,
+          "tpot_ms_p99": 31.21,
+          "elapsed_seconds_median": 29.2,
+          "sla_met": false
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 918.12,
+          "ttft_ms_p90": 9805.4,
+          "ttft_ms_p99": 10437.5,
+          "tpot_ms_p50": 25.2,
+          "tpot_ms_p90": 27.67,
+          "tpot_ms_p99": 32.45,
+          "elapsed_seconds_median": 26.7,
+          "sla_met": false
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "14:53:48",
+    "run_id": "b1eb2d96",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_b1eb2d96",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T14:45:34.287656+00:00",
+    "benchmark_end_time": "2026-05-18T14:53:48.716951+00:00",
+    "benchmark_elapsed_minutes": 8.2,
+    "model_load_seconds": 29.3
+  }
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/result.json
new file mode 100644
index 00000000..27e0744a
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/result.json
@@ -0,0 +1,400 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T05:56:25.789998+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+    "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+    "model_name": null,
+    "model_note": "INT4 weight-only quantization by RedHatAI using AWQ. Weights INT4, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "float16",
+    "quantization_method": "gptq",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "sustained"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": {
+      "tensor_parallel_size": 1,
+      "enforce_eager": true,
+      "max_num_seqs": 512,
+      "gpu_memory_utilization": 0.9
+    }
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 1889.19,
+          "throughput_tokens_per_sec_per_chip": 1889.19,
+          "throughput_tokens_per_sec_total": 3433.47,
+          "elapsed_seconds_median": 18.2,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 1862.45,
+          "throughput_tokens_per_sec_per_chip": 1862.45,
+          "throughput_tokens_per_sec_total": 3376.95,
+          "elapsed_seconds_median": 18.6,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 1861.34,
+          "throughput_tokens_per_sec_per_chip": 1861.34,
+          "throughput_tokens_per_sec_total": 3375.2,
+          "elapsed_seconds_median": 18.6,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 1851.04,
+          "throughput_tokens_per_sec_per_chip": 1851.04,
+          "throughput_tokens_per_sec_total": 3367.3,
+          "elapsed_seconds_median": 18.6,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 10,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 50.42,
+          "ttft_ms_p90": 64.62,
+          "ttft_ms_p99": 104.73,
+          "tpot_ms_p50": 18.15,
+          "tpot_ms_p90": 19.16,
+          "tpot_ms_p99": 19.62,
+          "elapsed_seconds_median": 71.1,
+          "sla_met": true
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 57.89,
+          "ttft_ms_p90": 72.82,
+          "ttft_ms_p99": 84.96,
+          "tpot_ms_p50": 21.11,
+          "tpot_ms_p90": 23.03,
+          "tpot_ms_p99": 24.31,
+          "elapsed_seconds_median": 38.5,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 97.24,
+          "ttft_ms_p90": 6365.08,
+          "ttft_ms_p99": 7073.94,
+          "tpot_ms_p50": 25.46,
+          "tpot_ms_p90": 27.98,
+          "tpot_ms_p99": 31.21,
+          "elapsed_seconds_median": 29.2,
+          "sla_met": false
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 918.12,
+          "ttft_ms_p90": 9805.4,
+          "ttft_ms_p99": 10437.5,
+          "tpot_ms_p50": 25.2,
+          "tpot_ms_p90": 27.67,
+          "tpot_ms_p99": 32.45,
+          "elapsed_seconds_median": 26.7,
+          "sla_met": false
+        }
+      ]
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 409.4,
+          "tokens_out": 24574,
+          "tokens_in": 0,
+          "requests_completed": 73,
+          "ttft_ms_p50": 55.4,
+          "ttft_ms_p99": 690.1
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 431.5,
+          "tokens_out": 25899,
+          "tokens_in": 0,
+          "requests_completed": 75,
+          "ttft_ms_p50": 53.4,
+          "ttft_ms_p99": 74.1
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 456.2,
+          "tokens_out": 27364,
+          "tokens_in": 0,
+          "requests_completed": 77,
+          "ttft_ms_p50": 53.1,
+          "ttft_ms_p99": 56.8
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 439.7,
+          "tokens_out": 26374,
+          "tokens_in": 0,
+          "requests_completed": 75,
+          "ttft_ms_p50": 53.2,
+          "ttft_ms_p99": 58.1
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 430.2,
+          "tokens_out": 25819,
+          "tokens_in": 0,
+          "requests_completed": 75,
+          "ttft_ms_p50": 53.5,
+          "ttft_ms_p99": 56.1
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 432.7,
+          "tokens_out": 25962,
+          "tokens_in": 0,
+          "requests_completed": 74,
+          "ttft_ms_p50": 53.9,
+          "ttft_ms_p99": 57.4
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 452.2,
+          "tokens_out": 27140,
+          "tokens_in": 0,
+          "requests_completed": 77,
+          "ttft_ms_p50": 53.4,
+          "ttft_ms_p99": 57.2
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 436.1,
+          "tokens_out": 26169,
+          "tokens_in": 0,
+          "requests_completed": 74,
+          "ttft_ms_p50": 53.6,
+          "ttft_ms_p99": 57.7
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 432.3,
+          "tokens_out": 25934,
+          "tokens_in": 0,
+          "requests_completed": 76,
+          "ttft_ms_p50": 53.2,
+          "ttft_ms_p99": 57.8
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 431.9,
+          "tokens_out": 25908,
+          "tokens_in": 0,
+          "requests_completed": 73,
+          "ttft_ms_p50": 53.4,
+          "ttft_ms_p99": 56.8
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 449.0,
+          "tokens_out": 26936,
+          "tokens_in": 0,
+          "requests_completed": 77,
+          "ttft_ms_p50": 53.4,
+          "ttft_ms_p99": 57.9
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 445.3,
+          "tokens_out": 26739,
+          "tokens_in": 0,
+          "requests_completed": 75,
+          "ttft_ms_p50": 53.1,
+          "ttft_ms_p99": 59.4
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 441.9,
+          "tokens_out": 26490,
+          "tokens_in": 0,
+          "requests_completed": 78,
+          "ttft_ms_p50": 53.3,
+          "ttft_ms_p99": 55.8
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 433.6,
+          "tokens_out": 26022,
+          "tokens_in": 0,
+          "requests_completed": 73,
+          "ttft_ms_p50": 53.0,
+          "ttft_ms_p99": 57.9
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 437.3,
+      "throttle_ratio": 0.897,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -632.2
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.56,
+    "baseline_delta": -0.01,
+    "valid": true,
+    "framework": "vLLM",
+    "precision": "W4A16",
+    "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "14:44:34",
+    "run_id": "b1eb2d96",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_b1eb2d96",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T14:39:39.920688+00:00",
+    "benchmark_end_time": "2026-05-18T14:44:34.781477+00:00",
+    "benchmark_elapsed_minutes": 28.3,
+    "model_load_seconds": 18.5,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'sustained'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/offline",
+      "online": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/online",
+      "sustained": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/sustained"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/sustained/result.json
new file mode 100644
index 00000000..9982bb06
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w4a16/sustained/result.json
@@ -0,0 +1,279 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T05:56:25.789998+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
+    "model_revision": "70371b1b0ea0d4eacfe1ee9056ee805629921c6e",
+    "model_name": null,
+    "model_note": "INT4 weight-only quantization by RedHatAI using AWQ. Weights INT4, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": null,
+    "quantization_method": "gptq",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": {
+      "tensor_parallel_size": 1,
+      "enforce_eager": true,
+      "max_num_seqs": 512,
+      "gpu_memory_utilization": 0.9
+    },
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 409.4,
+          "tokens_out": 24574,
+          "tokens_in": 0,
+          "requests_completed": 73,
+          "ttft_ms_p50": 55.4,
+          "ttft_ms_p99": 690.1
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 431.5,
+          "tokens_out": 25899,
+          "tokens_in": 0,
+          "requests_completed": 75,
+          "ttft_ms_p50": 53.4,
+          "ttft_ms_p99": 74.1
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 456.2,
+          "tokens_out": 27364,
+          "tokens_in": 0,
+          "requests_completed": 77,
+          "ttft_ms_p50": 53.1,
+          "ttft_ms_p99": 56.8
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 439.7,
+          "tokens_out": 26374,
+          "tokens_in": 0,
+          "requests_completed": 75,
+          "ttft_ms_p50": 53.2,
+          "ttft_ms_p99": 58.1
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 430.2,
+          "tokens_out": 25819,
+          "tokens_in": 0,
+          "requests_completed": 75,
+          "ttft_ms_p50": 53.5,
+          "ttft_ms_p99": 56.1
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 432.7,
+          "tokens_out": 25962,
+          "tokens_in": 0,
+          "requests_completed": 74,
+          "ttft_ms_p50": 53.9,
+          "ttft_ms_p99": 57.4
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 452.2,
+          "tokens_out": 27140,
+          "tokens_in": 0,
+          "requests_completed": 77,
+          "ttft_ms_p50": 53.4,
+          "ttft_ms_p99": 57.2
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 436.1,
+          "tokens_out": 26169,
+          "tokens_in": 0,
+          "requests_completed": 74,
+          "ttft_ms_p50": 53.6,
+          "ttft_ms_p99": 57.7
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 432.3,
+          "tokens_out": 25934,
+          "tokens_in": 0,
+          "requests_completed": 76,
+          "ttft_ms_p50": 53.2,
+          "ttft_ms_p99": 57.8
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 431.9,
+          "tokens_out": 25908,
+          "tokens_in": 0,
+          "requests_completed": 73,
+          "ttft_ms_p50": 53.4,
+          "ttft_ms_p99": 56.8
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 449.0,
+          "tokens_out": 26936,
+          "tokens_in": 0,
+          "requests_completed": 77,
+          "ttft_ms_p50": 53.4,
+          "ttft_ms_p99": 57.9
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 445.3,
+          "tokens_out": 26739,
+          "tokens_in": 0,
+          "requests_completed": 75,
+          "ttft_ms_p50": 53.1,
+          "ttft_ms_p99": 59.4
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 441.9,
+          "tokens_out": 26490,
+          "tokens_in": 0,
+          "requests_completed": 78,
+          "ttft_ms_p50": 53.3,
+          "ttft_ms_p99": 55.8
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 433.6,
+          "tokens_out": 26022,
+          "tokens_in": 0,
+          "requests_completed": 73,
+          "ttft_ms_p50": 53.0,
+          "ttft_ms_p99": 57.9
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 437.3,
+      "throttle_ratio": 0.897,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -632.2
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "15:09:58",
+    "run_id": "b1eb2d96",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_b1eb2d96",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T14:54:46.184556+00:00",
+    "benchmark_end_time": "2026-05-18T15:09:58.042851+00:00",
+    "benchmark_elapsed_minutes": 15.2,
+    "model_load_seconds": 25.5
+  }
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/accuracy/accuracy.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/accuracy/accuracy.json
new file mode 100644
index 00000000..a6505b13
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.58,
+  "baseline_delta": -0.01,
+  "valid": true,
+  "framework": "vLLM",
+  "precision": "W8A16",
+  "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/offline/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/offline/result.json
new file mode 100644
index 00000000..6cb0ddaa
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/offline/result.json
@@ -0,0 +1,183 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T05:56:25.789998+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+    "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+    "model_name": null,
+    "model_note": "INT8 weight-only quantization by RedHatAI using llm-compressor. Weights INT8, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": {
+      "tensor_parallel_size": 1,
+      "enforce_eager": true,
+      "max_num_seqs": 512,
+      "gpu_memory_utilization": 0.9
+    },
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 3533.68,
+          "throughput_tokens_per_sec_per_chip": 3533.68,
+          "throughput_tokens_per_sec_total": 6328.84,
+          "elapsed_seconds_median": 10.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 3510.7,
+          "throughput_tokens_per_sec_per_chip": 3510.7,
+          "throughput_tokens_per_sec_total": 6292.5,
+          "elapsed_seconds_median": 10.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 3535.13,
+          "throughput_tokens_per_sec_per_chip": 3535.13,
+          "throughput_tokens_per_sec_total": 6324.07,
+          "elapsed_seconds_median": 10.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 3547.44,
+          "throughput_tokens_per_sec_per_chip": 3547.44,
+          "throughput_tokens_per_sec_total": 6336.33,
+          "elapsed_seconds_median": 10.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "14:10:48",
+    "run_id": "5b72ecb7",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_5b72ecb7",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T14:08:05.948198+00:00",
+    "benchmark_end_time": "2026-05-18T14:10:48.869711+00:00",
+    "benchmark_elapsed_minutes": 2.7,
+    "model_load_seconds": 25.3
+  }
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/online/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/online/result.json
new file mode 100644
index 00000000..8f59b189
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/online/result.json
@@ -0,0 +1,181 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T05:56:25.789998+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+    "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+    "model_name": null,
+    "model_note": "INT8 weight-only quantization by RedHatAI using llm-compressor. Weights INT8, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": null,
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": {
+      "tensor_parallel_size": 1,
+      "enforce_eager": true,
+      "max_num_seqs": 512,
+      "gpu_memory_utilization": 0.9
+    },
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 10,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 46.32,
+          "ttft_ms_p90": 62.37,
+          "ttft_ms_p99": 104.49,
+          "tpot_ms_p50": 16.66,
+          "tpot_ms_p90": 17.7,
+          "tpot_ms_p99": 18.28,
+          "elapsed_seconds_median": 70.3,
+          "sla_met": true
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 57.25,
+          "ttft_ms_p90": 72.45,
+          "ttft_ms_p99": 81.09,
+          "tpot_ms_p50": 20.79,
+          "tpot_ms_p90": 22.4,
+          "tpot_ms_p99": 23.2,
+          "elapsed_seconds_median": 37.9,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 93.4,
+          "ttft_ms_p90": 6429.1,
+          "ttft_ms_p99": 7429.34,
+          "tpot_ms_p50": 25.13,
+          "tpot_ms_p90": 28.01,
+          "tpot_ms_p99": 30.79,
+          "elapsed_seconds_median": 28.8,
+          "sla_met": false
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 1306.21,
+          "ttft_ms_p90": 9937.77,
+          "ttft_ms_p99": 10640.43,
+          "tpot_ms_p50": 25.06,
+          "tpot_ms_p90": 27.42,
+          "tpot_ms_p99": 34.24,
+          "elapsed_seconds_median": 26.4,
+          "sla_met": false
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "14:20:00",
+    "run_id": "5b72ecb7",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_5b72ecb7",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T14:11:50.787859+00:00",
+    "benchmark_end_time": "2026-05-18T14:20:00.278335+00:00",
+    "benchmark_elapsed_minutes": 8.2,
+    "model_load_seconds": 34.3
+  }
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/result.json
new file mode 100644
index 00000000..485a0fb3
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/result.json
@@ -0,0 +1,400 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T05:56:25.789998+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+    "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+    "model_name": null,
+    "model_note": "INT8 weight-only quantization by RedHatAI using llm-compressor. Weights INT8, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "sustained"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": {
+      "tensor_parallel_size": 1,
+      "enforce_eager": true,
+      "max_num_seqs": 512,
+      "gpu_memory_utilization": 0.9
+    }
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 3533.68,
+          "throughput_tokens_per_sec_per_chip": 3533.68,
+          "throughput_tokens_per_sec_total": 6328.84,
+          "elapsed_seconds_median": 10.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 3510.7,
+          "throughput_tokens_per_sec_per_chip": 3510.7,
+          "throughput_tokens_per_sec_total": 6292.5,
+          "elapsed_seconds_median": 10.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 3535.13,
+          "throughput_tokens_per_sec_per_chip": 3535.13,
+          "throughput_tokens_per_sec_total": 6324.07,
+          "elapsed_seconds_median": 10.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 3547.44,
+          "throughput_tokens_per_sec_per_chip": 3547.44,
+          "throughput_tokens_per_sec_total": 6336.33,
+          "elapsed_seconds_median": 10.1,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 10,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 46.32,
+          "ttft_ms_p90": 62.37,
+          "ttft_ms_p99": 104.49,
+          "tpot_ms_p50": 16.66,
+          "tpot_ms_p90": 17.7,
+          "tpot_ms_p99": 18.28,
+          "elapsed_seconds_median": 70.3,
+          "sla_met": true
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 57.25,
+          "ttft_ms_p90": 72.45,
+          "ttft_ms_p99": 81.09,
+          "tpot_ms_p50": 20.79,
+          "tpot_ms_p90": 22.4,
+          "tpot_ms_p99": 23.2,
+          "elapsed_seconds_median": 37.9,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 93.4,
+          "ttft_ms_p90": 6429.1,
+          "ttft_ms_p99": 7429.34,
+          "tpot_ms_p50": 25.13,
+          "tpot_ms_p90": 28.01,
+          "tpot_ms_p99": 30.79,
+          "elapsed_seconds_median": 28.8,
+          "sla_met": false
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 1306.21,
+          "ttft_ms_p90": 9937.77,
+          "ttft_ms_p99": 10640.43,
+          "tpot_ms_p50": 25.06,
+          "tpot_ms_p90": 27.42,
+          "tpot_ms_p99": 34.24,
+          "elapsed_seconds_median": 26.4,
+          "sla_met": false
+        }
+      ]
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 456.8,
+          "tokens_out": 27416,
+          "tokens_in": 0,
+          "requests_completed": 81,
+          "ttft_ms_p50": 50.0,
+          "ttft_ms_p99": 372.0
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 504.0,
+          "tokens_out": 30242,
+          "tokens_in": 0,
+          "requests_completed": 82,
+          "ttft_ms_p50": 47.1,
+          "ttft_ms_p99": 71.8
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 486.6,
+          "tokens_out": 29207,
+          "tokens_in": 0,
+          "requests_completed": 83,
+          "ttft_ms_p50": 47.1,
+          "ttft_ms_p99": 54.4
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 499.0,
+          "tokens_out": 29921,
+          "tokens_in": 0,
+          "requests_completed": 85,
+          "ttft_ms_p50": 46.8,
+          "ttft_ms_p99": 52.6
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 496.0,
+          "tokens_out": 29768,
+          "tokens_in": 0,
+          "requests_completed": 79,
+          "ttft_ms_p50": 46.9,
+          "ttft_ms_p99": 49.5
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 498.3,
+          "tokens_out": 29901,
+          "tokens_in": 0,
+          "requests_completed": 84,
+          "ttft_ms_p50": 47.0,
+          "ttft_ms_p99": 52.3
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 495.4,
+          "tokens_out": 29715,
+          "tokens_in": 0,
+          "requests_completed": 85,
+          "ttft_ms_p50": 46.7,
+          "ttft_ms_p99": 50.2
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 496.1,
+          "tokens_out": 29779,
+          "tokens_in": 0,
+          "requests_completed": 81,
+          "ttft_ms_p50": 46.9,
+          "ttft_ms_p99": 53.9
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 503.2,
+          "tokens_out": 30195,
+          "tokens_in": 0,
+          "requests_completed": 85,
+          "ttft_ms_p50": 47.3,
+          "ttft_ms_p99": 54.0
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 489.6,
+          "tokens_out": 29369,
+          "tokens_in": 0,
+          "requests_completed": 83,
+          "ttft_ms_p50": 46.9,
+          "ttft_ms_p99": 52.3
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 504.8,
+          "tokens_out": 30299,
+          "tokens_in": 0,
+          "requests_completed": 81,
+          "ttft_ms_p50": 46.8,
+          "ttft_ms_p99": 52.2
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 500.4,
+          "tokens_out": 30017,
+          "tokens_in": 0,
+          "requests_completed": 85,
+          "ttft_ms_p50": 46.7,
+          "ttft_ms_p99": 50.1
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 494.6,
+          "tokens_out": 29670,
+          "tokens_in": 0,
+          "requests_completed": 84,
+          "ttft_ms_p50": 46.8,
+          "ttft_ms_p99": 51.2
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 492.1,
+          "tokens_out": 29528,
+          "tokens_in": 0,
+          "requests_completed": 81,
+          "ttft_ms_p50": 47.0,
+          "ttft_ms_p99": 51.5
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 494.1,
+      "throttle_ratio": 0.905,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -320.5
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.58,
+    "baseline_delta": -0.01,
+    "valid": true,
+    "framework": "vLLM",
+    "precision": "W8A16",
+    "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "14:10:48",
+    "run_id": "5b72ecb7",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_5b72ecb7",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T14:08:05.948198+00:00",
+    "benchmark_end_time": "2026-05-18T14:10:48.869711+00:00",
+    "benchmark_elapsed_minutes": 26.1,
+    "model_load_seconds": 25.3,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'sustained'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/offline",
+      "online": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/online",
+      "sustained": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/sustained"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/sustained/result.json
new file mode 100644
index 00000000..0fa36d8f
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a16/sustained/result.json
@@ -0,0 +1,279 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T05:56:25.789998+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",
+    "model_revision": "38e03ba250017bf8ed3eeecd3a744e21f6b994a9",
+    "model_name": null,
+    "model_note": "INT8 weight-only quantization by RedHatAI using llm-compressor. Weights INT8, activations FP16.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": null,
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": {
+      "tensor_parallel_size": 1,
+      "enforce_eager": true,
+      "max_num_seqs": 512,
+      "gpu_memory_utilization": 0.9
+    },
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 456.8,
+          "tokens_out": 27416,
+          "tokens_in": 0,
+          "requests_completed": 81,
+          "ttft_ms_p50": 50.0,
+          "ttft_ms_p99": 372.0
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 504.0,
+          "tokens_out": 30242,
+          "tokens_in": 0,
+          "requests_completed": 82,
+          "ttft_ms_p50": 47.1,
+          "ttft_ms_p99": 71.8
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 486.6,
+          "tokens_out": 29207,
+          "tokens_in": 0,
+          "requests_completed": 83,
+          "ttft_ms_p50": 47.1,
+          "ttft_ms_p99": 54.4
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 499.0,
+          "tokens_out": 29921,
+          "tokens_in": 0,
+          "requests_completed": 85,
+          "ttft_ms_p50": 46.8,
+          "ttft_ms_p99": 52.6
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 496.0,
+          "tokens_out": 29768,
+          "tokens_in": 0,
+          "requests_completed": 79,
+          "ttft_ms_p50": 46.9,
+          "ttft_ms_p99": 49.5
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 498.3,
+          "tokens_out": 29901,
+          "tokens_in": 0,
+          "requests_completed": 84,
+          "ttft_ms_p50": 47.0,
+          "ttft_ms_p99": 52.3
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 495.4,
+          "tokens_out": 29715,
+          "tokens_in": 0,
+          "requests_completed": 85,
+          "ttft_ms_p50": 46.7,
+          "ttft_ms_p99": 50.2
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 496.1,
+          "tokens_out": 29779,
+          "tokens_in": 0,
+          "requests_completed": 81,
+          "ttft_ms_p50": 46.9,
+          "ttft_ms_p99": 53.9
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 503.2,
+          "tokens_out": 30195,
+          "tokens_in": 0,
+          "requests_completed": 85,
+          "ttft_ms_p50": 47.3,
+          "ttft_ms_p99": 54.0
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 489.6,
+          "tokens_out": 29369,
+          "tokens_in": 0,
+          "requests_completed": 83,
+          "ttft_ms_p50": 46.9,
+          "ttft_ms_p99": 52.3
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 504.8,
+          "tokens_out": 30299,
+          "tokens_in": 0,
+          "requests_completed": 81,
+          "ttft_ms_p50": 46.8,
+          "ttft_ms_p99": 52.2
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 500.4,
+          "tokens_out": 30017,
+          "tokens_in": 0,
+          "requests_completed": 85,
+          "ttft_ms_p50": 46.7,
+          "ttft_ms_p99": 50.1
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 494.6,
+          "tokens_out": 29670,
+          "tokens_in": 0,
+          "requests_completed": 84,
+          "ttft_ms_p50": 46.8,
+          "ttft_ms_p99": 51.2
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 492.1,
+          "tokens_out": 29528,
+          "tokens_in": 0,
+          "requests_completed": 81,
+          "ttft_ms_p50": 47.0,
+          "ttft_ms_p99": 51.5
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 494.1,
+      "throttle_ratio": 0.905,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -320.5
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "14:36:58",
+    "run_id": "5b72ecb7",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_5b72ecb7",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T14:21:44.687162+00:00",
+    "benchmark_end_time": "2026-05-18T14:36:58.078243+00:00",
+    "benchmark_elapsed_minutes": 15.2,
+    "model_load_seconds": 75.8
+  }
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/accuracy/accuracy.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/accuracy/accuracy.json
new file mode 100644
index 00000000..a4847b6d
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.59,
+  "baseline_delta": 0.0,
+  "valid": true,
+  "framework": "vLLM",
+  "precision": "W8A8",
+  "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/offline/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/offline/result.json
new file mode 100644
index 00000000..01e7a3d7
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/offline/result.json
@@ -0,0 +1,183 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T05:56:25.789998+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
+    "model_revision": "e2bfb7d92784ad7d1b606c2f9644d3cefb2ec708",
+    "model_name": null,
+    "model_note": "INT8 quantized by RedHatAI using llm-compressor (compressed-tensors). Both weights and activations quantized to INT8.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A8",
+    "effective_dtype": "bfloat16",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": {
+      "tensor_parallel_size": 1,
+      "enforce_eager": true,
+      "max_num_seqs": 512,
+      "gpu_memory_utilization": 0.9
+    },
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 3208.11,
+          "throughput_tokens_per_sec_per_chip": 3208.11,
+          "throughput_tokens_per_sec_total": 5840.36,
+          "elapsed_seconds_median": 10.7,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 3140.16,
+          "throughput_tokens_per_sec_per_chip": 3140.16,
+          "throughput_tokens_per_sec_total": 5706.63,
+          "elapsed_seconds_median": 11.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 3193.23,
+          "throughput_tokens_per_sec_per_chip": 3193.23,
+          "throughput_tokens_per_sec_total": 5813.28,
+          "elapsed_seconds_median": 10.7,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 3175.58,
+          "throughput_tokens_per_sec_per_chip": 3175.58,
+          "throughput_tokens_per_sec_total": 5786.77,
+          "elapsed_seconds_median": 10.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "13:39:44",
+    "run_id": "1b79437b",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_1b79437b",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T13:36:50.535504+00:00",
+    "benchmark_end_time": "2026-05-18T13:39:44.822889+00:00",
+    "benchmark_elapsed_minutes": 2.9,
+    "model_load_seconds": 18.2
+  }
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/online/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/online/result.json
new file mode 100644
index 00000000..5e5bd009
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/online/result.json
@@ -0,0 +1,181 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T05:56:25.789998+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
+    "model_revision": "e2bfb7d92784ad7d1b606c2f9644d3cefb2ec708",
+    "model_name": null,
+    "model_note": "INT8 quantized by RedHatAI using llm-compressor (compressed-tensors). Both weights and activations quantized to INT8.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A8",
+    "effective_dtype": null,
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": {
+      "tensor_parallel_size": 1,
+      "enforce_eager": true,
+      "max_num_seqs": 512,
+      "gpu_memory_utilization": 0.9
+    },
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 10,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 55.34,
+          "ttft_ms_p90": 63.29,
+          "ttft_ms_p99": 69.75,
+          "tpot_ms_p50": 20.67,
+          "tpot_ms_p90": 20.88,
+          "tpot_ms_p99": 21.3,
+          "elapsed_seconds_median": 72.9,
+          "sla_met": true
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 57.42,
+          "ttft_ms_p90": 66.6,
+          "ttft_ms_p99": 69.92,
+          "tpot_ms_p50": 21.28,
+          "tpot_ms_p90": 22.19,
+          "tpot_ms_p99": 22.28,
+          "elapsed_seconds_median": 39.6,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 74.55,
+          "ttft_ms_p90": 4438.81,
+          "ttft_ms_p99": 5421.82,
+          "tpot_ms_p50": 22.53,
+          "tpot_ms_p90": 23.69,
+          "tpot_ms_p99": 25.14,
+          "elapsed_seconds_median": 27.7,
+          "sla_met": false
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 985.11,
+          "ttft_ms_p90": 8331.22,
+          "ttft_ms_p99": 8868.55,
+          "tpot_ms_p50": 23.38,
+          "tpot_ms_p90": 24.38,
+          "tpot_ms_p99": 26.79,
+          "elapsed_seconds_median": 25.6,
+          "sla_met": false
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "13:48:53",
+    "run_id": "1b79437b",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_1b79437b",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T13:40:36.652448+00:00",
+    "benchmark_end_time": "2026-05-18T13:48:53.173908+00:00",
+    "benchmark_elapsed_minutes": 8.3,
+    "model_load_seconds": 23.6
+  }
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/result.json
new file mode 100644
index 00000000..ff6e59c1
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/result.json
@@ -0,0 +1,400 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T05:56:25.789998+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
+    "model_revision": "e2bfb7d92784ad7d1b606c2f9644d3cefb2ec708",
+    "model_name": null,
+    "model_note": "INT8 quantized by RedHatAI using llm-compressor (compressed-tensors). Both weights and activations quantized to INT8.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A8",
+    "effective_dtype": "bfloat16",
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "sustained"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": {
+      "tensor_parallel_size": 1,
+      "enforce_eager": true,
+      "max_num_seqs": 512,
+      "gpu_memory_utilization": 0.9
+    }
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 3208.11,
+          "throughput_tokens_per_sec_per_chip": 3208.11,
+          "throughput_tokens_per_sec_total": 5840.36,
+          "elapsed_seconds_median": 10.7,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 3140.16,
+          "throughput_tokens_per_sec_per_chip": 3140.16,
+          "throughput_tokens_per_sec_total": 5706.63,
+          "elapsed_seconds_median": 11.0,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 3193.23,
+          "throughput_tokens_per_sec_per_chip": 3193.23,
+          "throughput_tokens_per_sec_total": 5813.28,
+          "elapsed_seconds_median": 10.7,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 3175.58,
+          "throughput_tokens_per_sec_per_chip": 3175.58,
+          "throughput_tokens_per_sec_total": 5786.77,
+          "elapsed_seconds_median": 10.8,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 10,
+      "results_by_qps": [
+        {
+          "target_qps": 5,
+          "achieved_qps": 5.0,
+          "ttft_ms_p50": 55.34,
+          "ttft_ms_p90": 63.29,
+          "ttft_ms_p99": 69.75,
+          "tpot_ms_p50": 20.67,
+          "tpot_ms_p90": 20.88,
+          "tpot_ms_p99": 21.3,
+          "elapsed_seconds_median": 72.9,
+          "sla_met": true
+        },
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 57.42,
+          "ttft_ms_p90": 66.6,
+          "ttft_ms_p99": 69.92,
+          "tpot_ms_p50": 21.28,
+          "tpot_ms_p90": 22.19,
+          "tpot_ms_p99": 22.28,
+          "elapsed_seconds_median": 39.6,
+          "sla_met": true
+        },
+        {
+          "target_qps": 25,
+          "achieved_qps": 25.0,
+          "ttft_ms_p50": 74.55,
+          "ttft_ms_p90": 4438.81,
+          "ttft_ms_p99": 5421.82,
+          "tpot_ms_p50": 22.53,
+          "tpot_ms_p90": 23.69,
+          "tpot_ms_p99": 25.14,
+          "elapsed_seconds_median": 27.7,
+          "sla_met": false
+        },
+        {
+          "target_qps": 50,
+          "achieved_qps": 50.0,
+          "ttft_ms_p50": 985.11,
+          "ttft_ms_p90": 8331.22,
+          "ttft_ms_p99": 8868.55,
+          "tpot_ms_p50": 23.38,
+          "tpot_ms_p90": 24.38,
+          "tpot_ms_p99": 26.79,
+          "elapsed_seconds_median": 25.6,
+          "sla_met": false
+        }
+      ]
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 366.9,
+          "tokens_out": 22031,
+          "tokens_in": 0,
+          "requests_completed": 63,
+          "ttft_ms_p50": 59.1,
+          "ttft_ms_p99": 396.4
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 402.7,
+          "tokens_out": 24148,
+          "tokens_in": 0,
+          "requests_completed": 71,
+          "ttft_ms_p50": 58.8,
+          "ttft_ms_p99": 63.4
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 400.8,
+          "tokens_out": 24050,
+          "tokens_in": 0,
+          "requests_completed": 66,
+          "ttft_ms_p50": 58.5,
+          "ttft_ms_p99": 61.0
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 402.1,
+          "tokens_out": 24127,
+          "tokens_in": 0,
+          "requests_completed": 66,
+          "ttft_ms_p50": 58.0,
+          "ttft_ms_p99": 61.2
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 395.3,
+          "tokens_out": 23722,
+          "tokens_in": 0,
+          "requests_completed": 71,
+          "ttft_ms_p50": 58.7,
+          "ttft_ms_p99": 64.5
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 398.4,
+          "tokens_out": 23893,
+          "tokens_in": 0,
+          "requests_completed": 64,
+          "ttft_ms_p50": 58.7,
+          "ttft_ms_p99": 63.0
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 410.0,
+          "tokens_out": 24605,
+          "tokens_in": 0,
+          "requests_completed": 66,
+          "ttft_ms_p50": 58.3,
+          "ttft_ms_p99": 62.1
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 398.6,
+          "tokens_out": 23918,
+          "tokens_in": 0,
+          "requests_completed": 71,
+          "ttft_ms_p50": 59.5,
+          "ttft_ms_p99": 65.7
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 390.3,
+          "tokens_out": 23418,
+          "tokens_in": 0,
+          "requests_completed": 64,
+          "ttft_ms_p50": 58.3,
+          "ttft_ms_p99": 62.8
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 417.2,
+          "tokens_out": 25045,
+          "tokens_in": 0,
+          "requests_completed": 69,
+          "ttft_ms_p50": 58.2,
+          "ttft_ms_p99": 61.0
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 391.1,
+          "tokens_out": 23462,
+          "tokens_in": 0,
+          "requests_completed": 70,
+          "ttft_ms_p50": 57.8,
+          "ttft_ms_p99": 62.4
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 408.7,
+          "tokens_out": 24514,
+          "tokens_in": 0,
+          "requests_completed": 66,
+          "ttft_ms_p50": 58.5,
+          "ttft_ms_p99": 65.6
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 415.2,
+          "tokens_out": 24925,
+          "tokens_in": 0,
+          "requests_completed": 71,
+          "ttft_ms_p50": 58.2,
+          "ttft_ms_p99": 61.3
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 395.0,
+          "tokens_out": 23687,
+          "tokens_in": 0,
+          "requests_completed": 67,
+          "ttft_ms_p50": 58.0,
+          "ttft_ms_p99": 64.9
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 399.4,
+      "throttle_ratio": 0.879,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -331.5
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.59,
+    "baseline_delta": 0.0,
+    "valid": true,
+    "framework": "vLLM",
+    "precision": "W8A8",
+    "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "13:39:44",
+    "run_id": "1b79437b",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_1b79437b",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T13:36:50.535504+00:00",
+    "benchmark_end_time": "2026-05-18T13:39:44.822889+00:00",
+    "benchmark_elapsed_minutes": 26.5,
+    "model_load_seconds": 18.2,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'sustained'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/offline",
+      "online": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/online",
+      "sustained": "results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/sustained"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/sustained/result.json
new file mode 100644
index 00000000..eaea3a87
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_ffd81462/w8a8/sustained/result.json
@@ -0,0 +1,279 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_C",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T05:56:25.789998+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
+    "model_revision": "e2bfb7d92784ad7d1b606c2f9644d3cefb2ec708",
+    "model_name": null,
+    "model_note": "INT8 quantized by RedHatAI using llm-compressor (compressed-tensors). Both weights and activations quantized to INT8.",
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "W8A8",
+    "effective_dtype": null,
+    "quantization_method": "compressed-tensors",
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": {
+      "tensor_parallel_size": 1,
+      "enforce_eager": true,
+      "max_num_seqs": 512,
+      "gpu_memory_utilization": 0.9
+    },
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 366.9,
+          "tokens_out": 22031,
+          "tokens_in": 0,
+          "requests_completed": 63,
+          "ttft_ms_p50": 59.1,
+          "ttft_ms_p99": 396.4
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 402.7,
+          "tokens_out": 24148,
+          "tokens_in": 0,
+          "requests_completed": 71,
+          "ttft_ms_p50": 58.8,
+          "ttft_ms_p99": 63.4
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 400.8,
+          "tokens_out": 24050,
+          "tokens_in": 0,
+          "requests_completed": 66,
+          "ttft_ms_p50": 58.5,
+          "ttft_ms_p99": 61.0
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 402.1,
+          "tokens_out": 24127,
+          "tokens_in": 0,
+          "requests_completed": 66,
+          "ttft_ms_p50": 58.0,
+          "ttft_ms_p99": 61.2
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 395.3,
+          "tokens_out": 23722,
+          "tokens_in": 0,
+          "requests_completed": 71,
+          "ttft_ms_p50": 58.7,
+          "ttft_ms_p99": 64.5
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 398.4,
+          "tokens_out": 23893,
+          "tokens_in": 0,
+          "requests_completed": 64,
+          "ttft_ms_p50": 58.7,
+          "ttft_ms_p99": 63.0
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 410.0,
+          "tokens_out": 24605,
+          "tokens_in": 0,
+          "requests_completed": 66,
+          "ttft_ms_p50": 58.3,
+          "ttft_ms_p99": 62.1
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 398.6,
+          "tokens_out": 23918,
+          "tokens_in": 0,
+          "requests_completed": 71,
+          "ttft_ms_p50": 59.5,
+          "ttft_ms_p99": 65.7
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 390.3,
+          "tokens_out": 23418,
+          "tokens_in": 0,
+          "requests_completed": 64,
+          "ttft_ms_p50": 58.3,
+          "ttft_ms_p99": 62.8
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 417.2,
+          "tokens_out": 25045,
+          "tokens_in": 0,
+          "requests_completed": 69,
+          "ttft_ms_p50": 58.2,
+          "ttft_ms_p99": 61.0
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 391.1,
+          "tokens_out": 23462,
+          "tokens_in": 0,
+          "requests_completed": 70,
+          "ttft_ms_p50": 57.8,
+          "ttft_ms_p99": 62.4
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 408.7,
+          "tokens_out": 24514,
+          "tokens_in": 0,
+          "requests_completed": 66,
+          "ttft_ms_p50": 58.5,
+          "ttft_ms_p99": 65.6
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 415.2,
+          "tokens_out": 24925,
+          "tokens_in": 0,
+          "requests_completed": 71,
+          "ttft_ms_p50": 58.2,
+          "ttft_ms_p99": 61.3
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 395.0,
+          "tokens_out": 23687,
+          "tokens_in": 0,
+          "requests_completed": 67,
+          "ttft_ms_p50": 58.0,
+          "ttft_ms_p99": 64.9
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 399.4,
+      "throttle_ratio": 0.879,
+      "throttle_onset_minute": 1.0,
+      "ttft_p99_drift_ms": -331.5
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "14:04:58",
+    "run_id": "1b79437b",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_C_nvidia_vllm020_0f6c56e4_1b79437b",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T13:49:43.407380+00:00",
+    "benchmark_end_time": "2026-05-18T14:04:58.852879+00:00",
+    "benchmark_elapsed_minutes": 15.3,
+    "model_load_seconds": 22.2
+  }
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/accuracy/accuracy.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/accuracy/accuracy.json
new file mode 100644
index 00000000..95fced50
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.56,
+  "baseline_delta": 0.0,
+  "valid": true,
+  "framework": "vLLM",
+  "precision": "BF16",
+  "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/env_info.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/env_info.json
new file mode 100644
index 00000000..327ddbe4
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/env_info.json
@@ -0,0 +1,49 @@
+{
+  "collected_at": "2026-05-18T07:00:53.162228+00:00",
+  "accelerators": [
+    {
+      "index": 0,
+      "name": "NVIDIA A100-SXM4-80GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 80.0,
+      "driver_version": "580.65.06",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    }
+  ],
+  "accelerator_platform": "nvidia",
+  "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+  "intra_node_interconnect": null,
+  "cpu": {
+    "model": "AMD EPYC 7742 64-Core Processor",
+    "physical_cores": 128,
+    "logical_cores": 255,
+    "numa_nodes": 2
+  },
+  "system_memory_gb": 1007.7,
+  "pcie_generation": "PCIe Gen 4",
+  "cpu_accelerator_bandwidth_gbs": null,
+  "network_interfaces": [
+    {
+      "name": "mlx5_0",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_1",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_2",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    }
+  ],
+  "os": "Ubuntu 22.04.4 LTS",
+  "python_version": "3.12.0",
+  "kernel_version": "5.15.0-60-generic",
+  "runtime_version": "CUDA 13.0",
+  "pytorch_version": "2.11.0+cu130"
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/interactive/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/interactive/result.json
new file mode 100644
index 00000000..40a91c48
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/interactive/result.json
@@ -0,0 +1,132 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T07:00:53.162228+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": null,
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "interactive",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "interactive": {
+      "ttft_ms_p50": 3178.55,
+      "ttft_ms_p90": 3335.27,
+      "ttft_ms_p99": 3376.37,
+      "tpot_ms_p50": 13.1,
+      "tpot_ms_p90": 13.17,
+      "tpot_ms_p99": 13.2,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 651.9
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "07:46:17",
+    "run_id": "43e96189",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T07:24:33.202740+00:00",
+    "benchmark_end_time": "2026-05-18T07:46:17.004385+00:00",
+    "benchmark_elapsed_minutes": 21.7,
+    "model_load_seconds": 65.2
+  }
+}
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/offline/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/offline/result.json
new file mode 100644
index 00000000..0778b9c8
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/offline/result.json
@@ -0,0 +1,152 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T07:00:53.162228+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 65.15,
+          "throughput_tokens_per_sec_per_chip": 65.15,
+          "throughput_tokens_per_sec_total": 7353.82,
+          "elapsed_seconds_median": 196.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 65.12,
+          "throughput_tokens_per_sec_per_chip": 65.12,
+          "throughput_tokens_per_sec_total": 7349.93,
+          "elapsed_seconds_median": 196.6,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "07:23:02",
+    "run_id": "43e96189",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T07:03:23.326807+00:00",
+    "benchmark_end_time": "2026-05-18T07:23:02.065079+00:00",
+    "benchmark_elapsed_minutes": 19.6,
+    "model_load_seconds": 41.7
+  }
+}
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/online/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/online/result.json
new file mode 100644
index 00000000..fd6b20e6
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/online/result.json
@@ -0,0 +1,169 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T07:00:53.162228+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": null,
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": {
+      "tensor_parallel_size": 1,
+      "enforce_eager": false,
+      "max_num_seqs": 64,
+      "gpu_memory_utilization": 0.85
+    },
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 5000,
+      "max_valid_qps": 0.0,
+      "results_by_qps": [
+        {
+          "target_qps": 0.5,
+          "achieved_qps": 0.5,
+          "ttft_ms_p50": 129241.71,
+          "ttft_ms_p90": 238515.99,
+          "ttft_ms_p99": 255266.98,
+          "tpot_ms_p50": 231.96,
+          "tpot_ms_p90": 236.23,
+          "tpot_ms_p99": 238.5,
+          "elapsed_seconds_median": 459.2,
+          "sla_met": false
+        },
+        {
+          "target_qps": 1,
+          "achieved_qps": 1.0,
+          "ttft_ms_p50": 163924.47,
+          "ttft_ms_p90": 304663.59,
+          "ttft_ms_p99": 340432.59,
+          "tpot_ms_p50": 232.21,
+          "tpot_ms_p90": 236.44,
+          "tpot_ms_p99": 238.73,
+          "elapsed_seconds_median": 461.6,
+          "sla_met": false
+        },
+        {
+          "target_qps": 2,
+          "achieved_qps": 2.0,
+          "ttft_ms_p50": 197613.68,
+          "ttft_ms_p90": 361816.51,
+          "ttft_ms_p99": 400408.53,
+          "tpot_ms_p50": 232.17,
+          "tpot_ms_p90": 236.5,
+          "tpot_ms_p99": 238.76,
+          "elapsed_seconds_median": 459.2,
+          "sla_met": false
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "10:05:01",
+    "run_id": "43e96189",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T09:19:01.620038+00:00",
+    "benchmark_end_time": "2026-05-18T10:05:01.503808+00:00",
+    "benchmark_elapsed_minutes": 46.0,
+    "model_load_seconds": 51.9
+  }
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/result.json
new file mode 100644
index 00000000..ac745b18
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/result.json
@@ -0,0 +1,519 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T07:00:53.162228+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "interactive",
+      "sustained",
+      "online"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 2,
+    "extra_config": null
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 1,
+          "throughput_tokens_per_sec": 65.15,
+          "throughput_tokens_per_sec_per_chip": 65.15,
+          "throughput_tokens_per_sec_total": 7353.82,
+          "elapsed_seconds_median": 196.5,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 65.12,
+          "throughput_tokens_per_sec_per_chip": 65.12,
+          "throughput_tokens_per_sec_total": 7349.93,
+          "elapsed_seconds_median": 196.6,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "interactive": {
+      "ttft_ms_p50": 3178.55,
+      "ttft_ms_p90": 3335.27,
+      "ttft_ms_p99": 3376.37,
+      "tpot_ms_p50": 13.1,
+      "tpot_ms_p90": 13.17,
+      "tpot_ms_p99": 13.2,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 651.9
+    },
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 34.1,
+          "tokens_out": 2048,
+          "tokens_in": 0,
+          "requests_completed": 8,
+          "ttft_ms_p50": 15012.6,
+          "ttft_ms_p99": 27511.4
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 55.4,
+          "tokens_out": 3328,
+          "tokens_in": 0,
+          "requests_completed": 13,
+          "ttft_ms_p50": 6118.2,
+          "ttft_ms_p99": 6481.6
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 59.7,
+          "tokens_out": 3584,
+          "tokens_in": 0,
+          "requests_completed": 14,
+          "ttft_ms_p50": 5323.4,
+          "ttft_ms_p99": 6114.9
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 55.5,
+          "tokens_out": 3328,
+          "tokens_in": 0,
+          "requests_completed": 13,
+          "ttft_ms_p50": 5619.9,
+          "ttft_ms_p99": 6149.6
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 64.0,
+          "tokens_out": 3840,
+          "tokens_in": 0,
+          "requests_completed": 15,
+          "ttft_ms_p50": 5341.4,
+          "ttft_ms_p99": 6110.4
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 55.5,
+          "tokens_out": 3328,
+          "tokens_in": 0,
+          "requests_completed": 13,
+          "ttft_ms_p50": 5932.5,
+          "ttft_ms_p99": 6440.4
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 59.7,
+          "tokens_out": 3584,
+          "tokens_in": 0,
+          "requests_completed": 14,
+          "ttft_ms_p50": 4964.1,
+          "ttft_ms_p99": 5812.6
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 59.8,
+          "tokens_out": 3584,
+          "tokens_in": 0,
+          "requests_completed": 14,
+          "ttft_ms_p50": 5597.6,
+          "ttft_ms_p99": 6251.9
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 59.7,
+          "tokens_out": 3584,
+          "tokens_in": 0,
+          "requests_completed": 14,
+          "ttft_ms_p50": 5486.4,
+          "ttft_ms_p99": 6180.1
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 59.7,
+          "tokens_out": 3584,
+          "tokens_in": 0,
+          "requests_completed": 14,
+          "ttft_ms_p50": 5472.0,
+          "ttft_ms_p99": 6505.0
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 55.5,
+          "tokens_out": 3328,
+          "tokens_in": 0,
+          "requests_completed": 13,
+          "ttft_ms_p50": 5850.6,
+          "ttft_ms_p99": 6694.6
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 64.0,
+          "tokens_out": 3840,
+          "tokens_in": 0,
+          "requests_completed": 15,
+          "ttft_ms_p50": 5208.5,
+          "ttft_ms_p99": 5840.7
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 55.5,
+          "tokens_out": 3328,
+          "tokens_in": 0,
+          "requests_completed": 13,
+          "ttft_ms_p50": 5909.3,
+          "ttft_ms_p99": 6251.2
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 55.5,
+          "tokens_out": 3328,
+          "tokens_in": 0,
+          "requests_completed": 13,
+          "ttft_ms_p50": 5593.9,
+          "ttft_ms_p99": 6073.0
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 59.7,
+          "tokens_out": 3584,
+          "tokens_in": 0,
+          "requests_completed": 14,
+          "ttft_ms_p50": 5297.1,
+          "ttft_ms_p99": 6684.1
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 64.0,
+          "tokens_out": 3840,
+          "tokens_in": 0,
+          "requests_completed": 15,
+          "ttft_ms_p50": 5956.8,
+          "ttft_ms_p99": 6615.6
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 55.4,
+          "tokens_out": 3328,
+          "tokens_in": 0,
+          "requests_completed": 13,
+          "ttft_ms_p50": 5954.7,
+          "ttft_ms_p99": 6462.3
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 55.5,
+          "tokens_out": 3328,
+          "tokens_in": 0,
+          "requests_completed": 13,
+          "ttft_ms_p50": 5267.7,
+          "ttft_ms_p99": 6152.2
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 64.0,
+          "tokens_out": 3840,
+          "tokens_in": 0,
+          "requests_completed": 15,
+          "ttft_ms_p50": 5455.5,
+          "ttft_ms_p99": 5958.6
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 55.5,
+          "tokens_out": 3328,
+          "tokens_in": 0,
+          "requests_completed": 13,
+          "ttft_ms_p50": 5614.7,
+          "ttft_ms_p99": 6275.4
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 59.7,
+          "tokens_out": 3584,
+          "tokens_in": 0,
+          "requests_completed": 14,
+          "ttft_ms_p50": 5592.8,
+          "ttft_ms_p99": 6443.6
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 55.5,
+          "tokens_out": 3328,
+          "tokens_in": 0,
+          "requests_completed": 13,
+          "ttft_ms_p50": 5407.0,
+          "ttft_ms_p99": 6248.9
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 64.0,
+          "tokens_out": 3840,
+          "tokens_in": 0,
+          "requests_completed": 15,
+          "ttft_ms_p50": 5348.3,
+          "ttft_ms_p99": 5840.6
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 55.4,
+          "tokens_out": 3328,
+          "tokens_in": 0,
+          "requests_completed": 13,
+          "ttft_ms_p50": 5893.0,
+          "ttft_ms_p99": 6513.5
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 59.8,
+          "tokens_out": 3584,
+          "tokens_in": 0,
+          "requests_completed": 14,
+          "ttft_ms_p50": 4939.8,
+          "ttft_ms_p99": 5825.9
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 59.7,
+          "tokens_out": 3584,
+          "tokens_in": 0,
+          "requests_completed": 14,
+          "ttft_ms_p50": 4900.3,
+          "ttft_ms_p99": 6665.7
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 59.7,
+          "tokens_out": 3584,
+          "tokens_in": 0,
+          "requests_completed": 14,
+          "ttft_ms_p50": 5623.0,
+          "ttft_ms_p99": 6163.1
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 59.7,
+          "tokens_out": 3584,
+          "tokens_in": 0,
+          "requests_completed": 14,
+          "ttft_ms_p50": 5881.5,
+          "ttft_ms_p99": 6217.3
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 55.5,
+          "tokens_out": 3328,
+          "tokens_in": 0,
+          "requests_completed": 13,
+          "ttft_ms_p50": 6084.9,
+          "ttft_ms_p99": 6683.6
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 58.7,
+      "throttle_ratio": 0.866,
+      "throttle_onset_minute": 2.0,
+      "ttft_p99_drift_ms": 202.0
+    },
+    "online": {
+      "sla_ttft_ms": 5000,
+      "max_valid_qps": 0.0,
+      "results_by_qps": [
+        {
+          "target_qps": 0.5,
+          "achieved_qps": 0.5,
+          "ttft_ms_p50": 129241.71,
+          "ttft_ms_p90": 238515.99,
+          "ttft_ms_p99": 255266.98,
+          "tpot_ms_p50": 231.96,
+          "tpot_ms_p90": 236.23,
+          "tpot_ms_p99": 238.5,
+          "elapsed_seconds_median": 459.2,
+          "sla_met": false
+        },
+        {
+          "target_qps": 1,
+          "achieved_qps": 1.0,
+          "ttft_ms_p50": 163924.47,
+          "ttft_ms_p90": 304663.59,
+          "ttft_ms_p99": 340432.59,
+          "tpot_ms_p50": 232.21,
+          "tpot_ms_p90": 236.44,
+          "tpot_ms_p99": 238.73,
+          "elapsed_seconds_median": 461.6,
+          "sla_met": false
+        },
+        {
+          "target_qps": 2,
+          "achieved_qps": 2.0,
+          "ttft_ms_p50": 197613.68,
+          "ttft_ms_p90": 361816.51,
+          "ttft_ms_p99": 400408.53,
+          "tpot_ms_p50": 232.17,
+          "tpot_ms_p90": 236.5,
+          "tpot_ms_p99": 238.76,
+          "elapsed_seconds_median": 459.2,
+          "sla_met": false
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.56,
+    "baseline_delta": 0.0,
+    "valid": true,
+    "framework": "vLLM",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "07:23:02",
+    "run_id": "43e96189",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T07:03:23.326807+00:00",
+    "benchmark_end_time": "2026-05-18T07:23:02.065079+00:00",
+    "benchmark_elapsed_minutes": 118.1,
+    "model_load_seconds": 41.7,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'interactive', 'sustained', 'online'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/offline",
+      "interactive": "results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/interactive",
+      "sustained": "results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/sustained",
+      "online": "results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/online"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/sustained/result.json
new file mode 100644
index 00000000..097e0e91
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189/sustained/result.json
@@ -0,0 +1,424 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_D",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T07:00:53.162228+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": null,
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 2,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": null,
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 8,
+      "duration_minutes": 30,
+      "warmup_minutes": 2,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": true,
+          "throughput_tokens_per_sec": 34.1,
+          "tokens_out": 2048,
+          "tokens_in": 0,
+          "requests_completed": 8,
+          "ttft_ms_p50": 15012.6,
+          "ttft_ms_p99": 27511.4
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 55.4,
+          "tokens_out": 3328,
+          "tokens_in": 0,
+          "requests_completed": 13,
+          "ttft_ms_p50": 6118.2,
+          "ttft_ms_p99": 6481.6
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 59.7,
+          "tokens_out": 3584,
+          "tokens_in": 0,
+          "requests_completed": 14,
+          "ttft_ms_p50": 5323.4,
+          "ttft_ms_p99": 6114.9
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 55.5,
+          "tokens_out": 3328,
+          "tokens_in": 0,
+          "requests_completed": 13,
+          "ttft_ms_p50": 5619.9,
+          "ttft_ms_p99": 6149.6
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 64.0,
+          "tokens_out": 3840,
+          "tokens_in": 0,
+          "requests_completed": 15,
+          "ttft_ms_p50": 5341.4,
+          "ttft_ms_p99": 6110.4
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 55.5,
+          "tokens_out": 3328,
+          "tokens_in": 0,
+          "requests_completed": 13,
+          "ttft_ms_p50": 5932.5,
+          "ttft_ms_p99": 6440.4
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 59.7,
+          "tokens_out": 3584,
+          "tokens_in": 0,
+          "requests_completed": 14,
+          "ttft_ms_p50": 4964.1,
+          "ttft_ms_p99": 5812.6
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 59.8,
+          "tokens_out": 3584,
+          "tokens_in": 0,
+          "requests_completed": 14,
+          "ttft_ms_p50": 5597.6,
+          "ttft_ms_p99": 6251.9
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 59.7,
+          "tokens_out": 3584,
+          "tokens_in": 0,
+          "requests_completed": 14,
+          "ttft_ms_p50": 5486.4,
+          "ttft_ms_p99": 6180.1
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 59.7,
+          "tokens_out": 3584,
+          "tokens_in": 0,
+          "requests_completed": 14,
+          "ttft_ms_p50": 5472.0,
+          "ttft_ms_p99": 6505.0
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 55.5,
+          "tokens_out": 3328,
+          "tokens_in": 0,
+          "requests_completed": 13,
+          "ttft_ms_p50": 5850.6,
+          "ttft_ms_p99": 6694.6
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 64.0,
+          "tokens_out": 3840,
+          "tokens_in": 0,
+          "requests_completed": 15,
+          "ttft_ms_p50": 5208.5,
+          "ttft_ms_p99": 5840.7
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 55.5,
+          "tokens_out": 3328,
+          "tokens_in": 0,
+          "requests_completed": 13,
+          "ttft_ms_p50": 5909.3,
+          "ttft_ms_p99": 6251.2
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 55.5,
+          "tokens_out": 3328,
+          "tokens_in": 0,
+          "requests_completed": 13,
+          "ttft_ms_p50": 5593.9,
+          "ttft_ms_p99": 6073.0
+        },
+        {
+          "minute": 15.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 59.7,
+          "tokens_out": 3584,
+          "tokens_in": 0,
+          "requests_completed": 14,
+          "ttft_ms_p50": 5297.1,
+          "ttft_ms_p99": 6684.1
+        },
+        {
+          "minute": 16.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 64.0,
+          "tokens_out": 3840,
+          "tokens_in": 0,
+          "requests_completed": 15,
+          "ttft_ms_p50": 5956.8,
+          "ttft_ms_p99": 6615.6
+        },
+        {
+          "minute": 17.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 55.4,
+          "tokens_out": 3328,
+          "tokens_in": 0,
+          "requests_completed": 13,
+          "ttft_ms_p50": 5954.7,
+          "ttft_ms_p99": 6462.3
+        },
+        {
+          "minute": 18.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 55.5,
+          "tokens_out": 3328,
+          "tokens_in": 0,
+          "requests_completed": 13,
+          "ttft_ms_p50": 5267.7,
+          "ttft_ms_p99": 6152.2
+        },
+        {
+          "minute": 19.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 64.0,
+          "tokens_out": 3840,
+          "tokens_in": 0,
+          "requests_completed": 15,
+          "ttft_ms_p50": 5455.5,
+          "ttft_ms_p99": 5958.6
+        },
+        {
+          "minute": 20.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 55.5,
+          "tokens_out": 3328,
+          "tokens_in": 0,
+          "requests_completed": 13,
+          "ttft_ms_p50": 5614.7,
+          "ttft_ms_p99": 6275.4
+        },
+        {
+          "minute": 21.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 59.7,
+          "tokens_out": 3584,
+          "tokens_in": 0,
+          "requests_completed": 14,
+          "ttft_ms_p50": 5592.8,
+          "ttft_ms_p99": 6443.6
+        },
+        {
+          "minute": 22.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 55.5,
+          "tokens_out": 3328,
+          "tokens_in": 0,
+          "requests_completed": 13,
+          "ttft_ms_p50": 5407.0,
+          "ttft_ms_p99": 6248.9
+        },
+        {
+          "minute": 23.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 64.0,
+          "tokens_out": 3840,
+          "tokens_in": 0,
+          "requests_completed": 15,
+          "ttft_ms_p50": 5348.3,
+          "ttft_ms_p99": 5840.6
+        },
+        {
+          "minute": 24.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 55.4,
+          "tokens_out": 3328,
+          "tokens_in": 0,
+          "requests_completed": 13,
+          "ttft_ms_p50": 5893.0,
+          "ttft_ms_p99": 6513.5
+        },
+        {
+          "minute": 25.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 59.8,
+          "tokens_out": 3584,
+          "tokens_in": 0,
+          "requests_completed": 14,
+          "ttft_ms_p50": 4939.8,
+          "ttft_ms_p99": 5825.9
+        },
+        {
+          "minute": 26.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 59.7,
+          "tokens_out": 3584,
+          "tokens_in": 0,
+          "requests_completed": 14,
+          "ttft_ms_p50": 4900.3,
+          "ttft_ms_p99": 6665.7
+        },
+        {
+          "minute": 27.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 59.7,
+          "tokens_out": 3584,
+          "tokens_in": 0,
+          "requests_completed": 14,
+          "ttft_ms_p50": 5623.0,
+          "ttft_ms_p99": 6163.1
+        },
+        {
+          "minute": 28.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 59.7,
+          "tokens_out": 3584,
+          "tokens_in": 0,
+          "requests_completed": 14,
+          "ttft_ms_p50": 5881.5,
+          "ttft_ms_p99": 6217.3
+        },
+        {
+          "minute": 29.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 55.5,
+          "tokens_out": 3328,
+          "tokens_in": 0,
+          "requests_completed": 13,
+          "ttft_ms_p50": 6084.9,
+          "ttft_ms_p99": 6683.6
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 58.7,
+      "throttle_ratio": 0.866,
+      "throttle_onset_minute": 2.0,
+      "ttft_p99_drift_ms": 202.0
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "08:18:12",
+    "run_id": "43e96189",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_D_nvidia_vllm020_0f6c56e4_43e96189",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T07:47:24.813895+00:00",
+    "benchmark_end_time": "2026-05-18T08:18:12.448165+00:00",
+    "benchmark_elapsed_minutes": 30.8,
+    "model_load_seconds": 40.8
+  }
+}
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/accuracy/accuracy.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/accuracy/accuracy.json
new file mode 100644
index 00000000..21e4fec2
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/accuracy/accuracy.json
@@ -0,0 +1,8 @@
+{
+  "subset_score": 0.41,
+  "baseline_delta": 0.03,
+  "valid": true,
+  "framework": "vLLM",
+  "precision": "BF16",
+  "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/env_info.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/env_info.json
new file mode 100644
index 00000000..538f8e4c
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/env_info.json
@@ -0,0 +1,49 @@
+{
+  "collected_at": "2026-05-18T10:05:28.924925+00:00",
+  "accelerators": [
+    {
+      "index": 0,
+      "name": "NVIDIA A100-SXM4-80GB",
+      "vendor": "NVIDIA",
+      "memory_gb": 80.0,
+      "driver_version": "580.65.06",
+      "firmware_version": null,
+      "compute_capability": "8.0",
+      "supports_bf16": true
+    }
+  ],
+  "accelerator_platform": "nvidia",
+  "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+  "intra_node_interconnect": null,
+  "cpu": {
+    "model": "AMD EPYC 7742 64-Core Processor",
+    "physical_cores": 128,
+    "logical_cores": 255,
+    "numa_nodes": 2
+  },
+  "system_memory_gb": 1007.7,
+  "pcie_generation": "PCIe Gen 4",
+  "cpu_accelerator_bandwidth_gbs": null,
+  "network_interfaces": [
+    {
+      "name": "mlx5_0",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_1",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    },
+    {
+      "name": "mlx5_2",
+      "type": "InfiniBand/RoCE",
+      "bandwidth_gbps": null
+    }
+  ],
+  "os": "Ubuntu 22.04.4 LTS",
+  "python_version": "3.12.0",
+  "kernel_version": "5.15.0-60-generic",
+  "runtime_version": "CUDA 13.0",
+  "pytorch_version": "2.11.0+cu130"
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/interactive/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/interactive/result.json
new file mode 100644
index 00000000..f6765408
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/interactive/result.json
@@ -0,0 +1,137 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T10:05:28.924925+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": null,
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "interactive",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": {
+      "tensor_parallel_size": 1,
+      "enforce_eager": false,
+      "max_num_seqs": 128,
+      "gpu_memory_utilization": 0.9
+    },
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "interactive": {
+      "ttft_ms_p50": 11.24,
+      "ttft_ms_p90": 13.36,
+      "ttft_ms_p99": 14.74,
+      "tpot_ms_p50": 1.83,
+      "tpot_ms_p90": 1.83,
+      "tpot_ms_p99": 1.87,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 59.1
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "10:14:20",
+    "run_id": "a4e6a6e4",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T10:11:23.809781+00:00",
+    "benchmark_end_time": "2026-05-18T10:14:20.932444+00:00",
+    "benchmark_elapsed_minutes": 3.0,
+    "model_load_seconds": 27.1
+  }
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/offline/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/offline/result.json
new file mode 100644
index 00000000..8c532a40
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/offline/result.json
@@ -0,0 +1,170 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T10:05:28.924925+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "offline",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": {
+      "tensor_parallel_size": 1,
+      "enforce_eager": false,
+      "max_num_seqs": 128,
+      "gpu_memory_utilization": 0.9
+    },
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 22462.36,
+          "throughput_tokens_per_sec_per_chip": 22462.36,
+          "throughput_tokens_per_sec_total": 33497.61,
+          "elapsed_seconds_median": 1.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 22493.12,
+          "throughput_tokens_per_sec_per_chip": 22493.12,
+          "throughput_tokens_per_sec_total": 33569.32,
+          "elapsed_seconds_median": 1.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 22884.92,
+          "throughput_tokens_per_sec_per_chip": 22884.92,
+          "throughput_tokens_per_sec_total": 34039.23,
+          "elapsed_seconds_median": 1.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "10:07:40",
+    "run_id": "a4e6a6e4",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T10:07:16.851531+00:00",
+    "benchmark_end_time": "2026-05-18T10:07:40.157696+00:00",
+    "benchmark_elapsed_minutes": 0.4,
+    "model_load_seconds": 28.0
+  }
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/online/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/online/result.json
new file mode 100644
index 00000000..7df48900
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/online/result.json
@@ -0,0 +1,157 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T10:05:28.924925+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": null,
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "online",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": {
+      "tensor_parallel_size": 1,
+      "enforce_eager": false,
+      "max_num_seqs": 128,
+      "gpu_memory_utilization": 0.9
+    },
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 40,
+      "results_by_qps": [
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 10.04,
+          "ttft_ms_p90": 12.66,
+          "ttft_ms_p99": 18.27,
+          "tpot_ms_p50": 2.13,
+          "tpot_ms_p90": 2.19,
+          "tpot_ms_p99": 2.37,
+          "elapsed_seconds_median": 32.0,
+          "sla_met": true
+        },
+        {
+          "target_qps": 40,
+          "achieved_qps": 40.0,
+          "ttft_ms_p50": 11.96,
+          "ttft_ms_p90": 15.96,
+          "ttft_ms_p99": 19.93,
+          "tpot_ms_p50": 2.5,
+          "tpot_ms_p90": 2.65,
+          "tpot_ms_p99": 2.87,
+          "elapsed_seconds_median": 7.9,
+          "sla_met": true
+        }
+      ]
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "10:10:30",
+    "run_id": "a4e6a6e4",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T10:08:31.412469+00:00",
+    "benchmark_end_time": "2026-05-18T10:10:30.424981+00:00",
+    "benchmark_elapsed_minutes": 2.0,
+    "model_load_seconds": 24.1
+  }
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/result.json
new file mode 100644
index 00000000..2e7e0ce3
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/result.json
@@ -0,0 +1,375 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T10:05:28.924925+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenarios_run": [
+      "offline",
+      "online",
+      "interactive",
+      "sustained"
+    ],
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "num_runs": 3,
+    "extra_config": {
+      "tensor_parallel_size": 1,
+      "enforce_eager": false,
+      "max_num_seqs": 128,
+      "gpu_memory_utilization": 0.9
+    }
+  },
+  "metrics": {
+    "derived": {},
+    "offline": {
+      "results_by_concurrency": [
+        {
+          "client_concurrency": 4,
+          "throughput_tokens_per_sec": 22462.36,
+          "throughput_tokens_per_sec_per_chip": 22462.36,
+          "throughput_tokens_per_sec_total": 33497.61,
+          "elapsed_seconds_median": 1.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 16,
+          "throughput_tokens_per_sec": 22493.12,
+          "throughput_tokens_per_sec_per_chip": 22493.12,
+          "throughput_tokens_per_sec_total": 33569.32,
+          "elapsed_seconds_median": 1.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        },
+        {
+          "client_concurrency": 64,
+          "throughput_tokens_per_sec": 22884.92,
+          "throughput_tokens_per_sec_per_chip": 22884.92,
+          "throughput_tokens_per_sec_total": 34039.23,
+          "elapsed_seconds_median": 1.9,
+          "peak_memory_gb": null,
+          "power_watts_avg": null,
+          "power_watts_peak": null,
+          "oom": false,
+          "_throughput_note": "output_only",
+          "_concurrency_note": "client_concurrency is the number of requests sent simultaneously. The inference engine batches internally; this does not directly set engine parameters like max_num_seqs."
+        }
+      ]
+    },
+    "online": {
+      "sla_ttft_ms": 500,
+      "max_valid_qps": 40,
+      "results_by_qps": [
+        {
+          "target_qps": 10,
+          "achieved_qps": 10.0,
+          "ttft_ms_p50": 10.04,
+          "ttft_ms_p90": 12.66,
+          "ttft_ms_p99": 18.27,
+          "tpot_ms_p50": 2.13,
+          "tpot_ms_p90": 2.19,
+          "tpot_ms_p99": 2.37,
+          "elapsed_seconds_median": 32.0,
+          "sla_met": true
+        },
+        {
+          "target_qps": 40,
+          "achieved_qps": 40.0,
+          "ttft_ms_p50": 11.96,
+          "ttft_ms_p90": 15.96,
+          "ttft_ms_p99": 19.93,
+          "tpot_ms_p50": 2.5,
+          "tpot_ms_p90": 2.65,
+          "tpot_ms_p99": 2.87,
+          "elapsed_seconds_median": 7.9,
+          "sla_met": true
+        }
+      ]
+    },
+    "interactive": {
+      "ttft_ms_p50": 11.24,
+      "ttft_ms_p90": 13.36,
+      "ttft_ms_p99": 14.74,
+      "tpot_ms_p50": 1.83,
+      "tpot_ms_p90": 1.83,
+      "tpot_ms_p99": 1.87,
+      "peak_memory_gb": null,
+      "elapsed_seconds_median": 59.1
+    },
+    "sustained": {
+      "sustained_concurrency": 32,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11541.5,
+          "tokens_out": 692740,
+          "tokens_in": 0,
+          "requests_completed": 3291,
+          "ttft_ms_p50": 12.9,
+          "ttft_ms_p99": 36.1
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11672.6,
+          "tokens_out": 700107,
+          "tokens_in": 0,
+          "requests_completed": 3324,
+          "ttft_ms_p50": 12.8,
+          "ttft_ms_p99": 20.3
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11721.9,
+          "tokens_out": 703664,
+          "tokens_in": 0,
+          "requests_completed": 3337,
+          "ttft_ms_p50": 12.7,
+          "ttft_ms_p99": 19.1
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11526.8,
+          "tokens_out": 691780,
+          "tokens_in": 0,
+          "requests_completed": 3289,
+          "ttft_ms_p50": 13.3,
+          "ttft_ms_p99": 20.6
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11228.1,
+          "tokens_out": 673578,
+          "tokens_in": 0,
+          "requests_completed": 3190,
+          "ttft_ms_p50": 13.8,
+          "ttft_ms_p99": 21.5
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11380.9,
+          "tokens_out": 682977,
+          "tokens_in": 0,
+          "requests_completed": 3245,
+          "ttft_ms_p50": 13.8,
+          "ttft_ms_p99": 21.0
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11711.7,
+          "tokens_out": 702201,
+          "tokens_in": 0,
+          "requests_completed": 3331,
+          "ttft_ms_p50": 12.8,
+          "ttft_ms_p99": 20.1
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11643.5,
+          "tokens_out": 698683,
+          "tokens_in": 0,
+          "requests_completed": 3317,
+          "ttft_ms_p50": 12.7,
+          "ttft_ms_p99": 20.3
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11662.7,
+          "tokens_out": 700038,
+          "tokens_in": 0,
+          "requests_completed": 3323,
+          "ttft_ms_p50": 12.7,
+          "ttft_ms_p99": 20.2
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11612.6,
+          "tokens_out": 696555,
+          "tokens_in": 0,
+          "requests_completed": 3294,
+          "ttft_ms_p50": 12.8,
+          "ttft_ms_p99": 19.0
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11623.8,
+          "tokens_out": 697495,
+          "tokens_in": 0,
+          "requests_completed": 3317,
+          "ttft_ms_p50": 12.8,
+          "ttft_ms_p99": 19.7
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11639.3,
+          "tokens_out": 698222,
+          "tokens_in": 0,
+          "requests_completed": 3311,
+          "ttft_ms_p50": 12.8,
+          "ttft_ms_p99": 20.2
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11651.4,
+          "tokens_out": 699229,
+          "tokens_in": 0,
+          "requests_completed": 3321,
+          "ttft_ms_p50": 12.7,
+          "ttft_ms_p99": 20.7
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11450.2,
+          "tokens_out": 686752,
+          "tokens_in": 0,
+          "requests_completed": 3257,
+          "ttft_ms_p50": 13.6,
+          "ttft_ms_p99": 20.7
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 11576.2,
+      "throttle_ratio": 0.958,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -15.4
+    }
+  },
+  "accuracy": {
+    "subset_score": 0.41,
+    "baseline_delta": 0.03,
+    "valid": true,
+    "framework": "vLLM",
+    "precision": "BF16",
+    "notes": "Integrated accuracy check \u2014 used same vLLM instance as benchmark."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "10:07:40",
+    "run_id": "a4e6a6e4",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T10:07:16.851531+00:00",
+    "benchmark_end_time": "2026-05-18T10:07:40.157696+00:00",
+    "benchmark_elapsed_minutes": 20.4,
+    "model_load_seconds": 28.0,
+    "benchmark_elapsed_minutes_note": "Total across ['offline', 'online', 'interactive', 'sustained'] scenarios.",
+    "scenario_dirs": {
+      "offline": "results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/offline",
+      "online": "results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/online",
+      "interactive": "results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/interactive",
+      "sustained": "results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/sustained"
+    }
+  }
+}
\ No newline at end of file
diff --git a/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/sustained/result.json b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/sustained/result.json
new file mode 100644
index 00000000..6851ff63
--- /dev/null
+++ b/results/verified/nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4/sustained/result.json
@@ -0,0 +1,279 @@
+{
+  "schema_version": "1.0",
+  "suite_id": "suite_F",
+  "implementation_id": "nvidia_vllm020_0f6c56e4",
+  "chip": {
+    "name": "NVIDIA A100-SXM4-80GB",
+    "vendor": "NVIDIA",
+    "count": 1,
+    "memory_gb": 80.0,
+    "interconnect_intra_node": null,
+    "interconnect_inter_node": null
+  },
+  "environment": {
+    "collected_at": "2026-05-18T10:05:28.924925+00:00",
+    "accelerators": [
+      {
+        "index": 0,
+        "name": "NVIDIA A100-SXM4-80GB",
+        "vendor": "NVIDIA",
+        "memory_gb": 80.0,
+        "driver_version": "580.65.06",
+        "firmware_version": null,
+        "compute_capability": "8.0",
+        "supports_bf16": true
+      }
+    ],
+    "accelerator_platform": "nvidia",
+    "accelerator_topology": "\tGPU0\tNIC0\tNIC1\tNIC2\tCPU Affinity\tNUMA Affinity\tGPU NUMA ID\nGPU0\t X \tPXB\tSYS\tSYS\t0-63,128-191\t0\t\tN/A\nNIC0\tPXB\t X \tSYS\tSYS\t\t\t\t\nNIC1\tSYS\tSYS\t X \tPIX\t\t\t\t\nNIC2\tSYS\tSYS\tPIX\t X \t\t\t\t\n\nLegend:\n\n  X    = Self\n  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)\n  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node\n  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)\n  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)\n  PIX  = Connection traversing at most a single PCIe bridge\n  NV#  = Connection traversing a bonded set of # NVLinks\n\nNIC Legend:\n\n  NIC0: mlx5_0\n  NIC1: mlx5_1\n  NIC2: mlx5_2\n\n",
+    "intra_node_interconnect": null,
+    "cpu": {
+      "model": "AMD EPYC 7742 64-Core Processor",
+      "physical_cores": 128,
+      "logical_cores": 255,
+      "numa_nodes": 2
+    },
+    "system_memory_gb": 1007.7,
+    "pcie_generation": "PCIe Gen 4",
+    "cpu_accelerator_bandwidth_gbs": null,
+    "network_interfaces": [
+      {
+        "name": "mlx5_0",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_1",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      },
+      {
+        "name": "mlx5_2",
+        "type": "InfiniBand/RoCE",
+        "bandwidth_gbps": null
+      }
+    ],
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0",
+    "kernel_version": "5.15.0-60-generic",
+    "runtime_version": "CUDA 13.0",
+    "pytorch_version": "2.11.0+cu130"
+  },
+  "software": {
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "driver_version": "580.65.06",
+    "runtime_version": "CUDA 13.0",
+    "os": "Ubuntu 22.04.4 LTS",
+    "python_version": "3.12.0"
+  },
+  "model": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "model_name": null,
+    "model_note": null,
+    "model_source": "local",
+    "architecture": "dense",
+    "parameter_count_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": null,
+    "quantization_method": null,
+    "model_format": "HuggingFace original"
+  },
+  "task": {
+    "scenario": "sustained",
+    "num_runs": 3,
+    "warmup_runs": 1,
+    "parallelism": {
+      "tensor_parallel_size": 1,
+      "pipeline_parallel_size": 1,
+      "expert_parallel_size": 1,
+      "data_parallel_size": 1
+    },
+    "extra_config": {
+      "tensor_parallel_size": 1,
+      "enforce_eager": false,
+      "max_num_seqs": 128,
+      "gpu_memory_utilization": 0.9
+    },
+    "runtime_metrics": null
+  },
+  "metrics": {
+    "sustained": {
+      "sustained_concurrency": 32,
+      "duration_minutes": 15,
+      "warmup_minutes": 1,
+      "sample_interval_seconds": 60,
+      "samples": [
+        {
+          "minute": 1.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11541.5,
+          "tokens_out": 692740,
+          "tokens_in": 0,
+          "requests_completed": 3291,
+          "ttft_ms_p50": 12.9,
+          "ttft_ms_p99": 36.1
+        },
+        {
+          "minute": 2.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11672.6,
+          "tokens_out": 700107,
+          "tokens_in": 0,
+          "requests_completed": 3324,
+          "ttft_ms_p50": 12.8,
+          "ttft_ms_p99": 20.3
+        },
+        {
+          "minute": 3.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11721.9,
+          "tokens_out": 703664,
+          "tokens_in": 0,
+          "requests_completed": 3337,
+          "ttft_ms_p50": 12.7,
+          "ttft_ms_p99": 19.1
+        },
+        {
+          "minute": 4.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11526.8,
+          "tokens_out": 691780,
+          "tokens_in": 0,
+          "requests_completed": 3289,
+          "ttft_ms_p50": 13.3,
+          "ttft_ms_p99": 20.6
+        },
+        {
+          "minute": 5.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11228.1,
+          "tokens_out": 673578,
+          "tokens_in": 0,
+          "requests_completed": 3190,
+          "ttft_ms_p50": 13.8,
+          "ttft_ms_p99": 21.5
+        },
+        {
+          "minute": 6.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11380.9,
+          "tokens_out": 682977,
+          "tokens_in": 0,
+          "requests_completed": 3245,
+          "ttft_ms_p50": 13.8,
+          "ttft_ms_p99": 21.0
+        },
+        {
+          "minute": 7.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11711.7,
+          "tokens_out": 702201,
+          "tokens_in": 0,
+          "requests_completed": 3331,
+          "ttft_ms_p50": 12.8,
+          "ttft_ms_p99": 20.1
+        },
+        {
+          "minute": 8.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11643.5,
+          "tokens_out": 698683,
+          "tokens_in": 0,
+          "requests_completed": 3317,
+          "ttft_ms_p50": 12.7,
+          "ttft_ms_p99": 20.3
+        },
+        {
+          "minute": 9.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11662.7,
+          "tokens_out": 700038,
+          "tokens_in": 0,
+          "requests_completed": 3323,
+          "ttft_ms_p50": 12.7,
+          "ttft_ms_p99": 20.2
+        },
+        {
+          "minute": 10.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11612.6,
+          "tokens_out": 696555,
+          "tokens_in": 0,
+          "requests_completed": 3294,
+          "ttft_ms_p50": 12.8,
+          "ttft_ms_p99": 19.0
+        },
+        {
+          "minute": 11.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11623.8,
+          "tokens_out": 697495,
+          "tokens_in": 0,
+          "requests_completed": 3317,
+          "ttft_ms_p50": 12.8,
+          "ttft_ms_p99": 19.7
+        },
+        {
+          "minute": 12.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11639.3,
+          "tokens_out": 698222,
+          "tokens_in": 0,
+          "requests_completed": 3311,
+          "ttft_ms_p50": 12.8,
+          "ttft_ms_p99": 20.2
+        },
+        {
+          "minute": 13.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11651.4,
+          "tokens_out": 699229,
+          "tokens_in": 0,
+          "requests_completed": 3321,
+          "ttft_ms_p50": 12.7,
+          "ttft_ms_p99": 20.7
+        },
+        {
+          "minute": 14.0,
+          "is_warmup": false,
+          "throughput_tokens_per_sec": 11450.2,
+          "tokens_out": 686752,
+          "tokens_in": 0,
+          "requests_completed": 3257,
+          "ttft_ms_p50": 13.6,
+          "ttft_ms_p99": 20.7
+        }
+      ],
+      "sustained_throughput_tokens_per_sec": 11576.2,
+      "throttle_ratio": 0.958,
+      "throttle_onset_minute": null,
+      "ttft_p99_drift_ms": -15.4
+    }
+  },
+  "accuracy": {
+    "subset_score": null,
+    "baseline_delta": null,
+    "valid": false,
+    "notes": "Run --scenario accuracy to check model accuracy."
+  },
+  "meta": {
+    "submitted_by": "JuhaoLiang1997",
+    "submission_type": "individual",
+    "date": "2026-05-18",
+    "time": "10:30:10",
+    "run_id": "a4e6a6e4",
+    "run_name": "nvidia_a100_sxm4_80gbx1_suite_F_nvidia_vllm020_0f6c56e4_a4e6a6e4",
+    "flagged": null,
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "env_info_file": "../env_info.json",
+    "log_file": "run.log",
+    "samples_file": "samples.jsonl",
+    "notes": null,
+    "benchmark_start_time": "2026-05-18T10:15:08.957236+00:00",
+    "benchmark_end_time": "2026-05-18T10:30:10.126252+00:00",
+    "benchmark_elapsed_minutes": 15.0,
+    "model_load_seconds": 21.3
+  }
+}
\ No newline at end of file
diff --git a/run.py b/run.py
index 85849c39..d7b9dbad 100644
--- a/run.py
+++ b/run.py
@@ -128,8 +128,11 @@ def cmd_list(args) -> int:
             print(f"      {meta.get('description', '')}")
             if supersedes_chain:
                 print(f"      Replaces: {supersedes_chain[0]}")
+            install_sh = RUNNERS_DIR / rid / "install.sh"
             req_path = RUNNERS_DIR / rid / "requirements.txt"
-            if req_path.exists():
+            if install_sh.exists():
+                print(f"      Install: bash runners/{rid}/install.sh")
+            elif req_path.exists():
                 print(f"      Install: pip install -r runners/{rid}/requirements.txt")
             print()
 
diff --git a/runners/benchmark_runner.py b/runners/benchmark_runner.py
index 30afb70c..5b0c2747 100644
--- a/runners/benchmark_runner.py
+++ b/runners/benchmark_runner.py
@@ -561,8 +561,19 @@ def _compute_implementation_id(self) -> str | None:
         unexpected path or from the base class directly).
         """
         try:
-            # Get the path of the concrete subclass file (not benchmark_runner.py)
-            runner_file = Path(inspect.getfile(self.__class__))
+            # Resolve runner.py path. Prefer the defining module's __file__ because
+            # torch may patch inspect.getfile() and break dynamic imports.
+            runner_file = None
+            mod = sys.modules.get(self.__class__.__module__)
+            if mod is not None:
+                mod_file = getattr(mod, "__file__", None)
+                if mod_file:
+                    runner_file = Path(mod_file).resolve()
+            if runner_file is None or runner_file.name != "runner.py":
+                try:
+                    runner_file = Path(inspect.getfile(self.__class__)).resolve()
+                except (TypeError, OSError):
+                    return None
 
             # The runner must be inside a folder named {platform}_{name}_{hash8}
             folder      = runner_file.parent
diff --git a/runners/nvidia_vllm020_0f6c56e4/README.md b/runners/nvidia_vllm020_0f6c56e4/README.md
new file mode 100644
index 00000000..2d1657f7
--- /dev/null
+++ b/runners/nvidia_vllm020_0f6c56e4/README.md
@@ -0,0 +1,167 @@
+# nvidia_vllm020_0f6c56e4 — NVIDIA vLLM Runner (0.20.x)
+
+AccelMark reference runner for NVIDIA GPUs running **vLLM 0.20.x**.
+
+Supersedes [`nvidia_vllm_47f5d58e`](../nvidia_vllm_47f5d58e/) (vLLM 0.7.3). Use the predecessor for CUDA 11.8 / legacy stacks; use this runner for Ampere+ datacenter GPUs with CUDA 12.8 or 13.0.
+
+## Supported suites
+
+| Suite | Description | Notes |
+|-------|-------------|-------|
+| Suite A | Single-chip, Llama-3-8B | Speculative and burst extra scenarios |
+| Suite B | Multi-chip, Llama-3-70B | Requires 4× A100/H100 or equivalent |
+| Suite C | Quantization, Llama-3.1-8B | **Requires `enforce_eager: true` in runner config** — see below |
+| Suite D | Long context ~28K input | `max_model_len` 30,208 |
+| Suite E | Multi-chip scaling, Llama-3-8B | NVLink recommended |
+| Suite F | Consumer/edge, Qwen2.5-0.5B | Pre-Ampere: use predecessor + `--enforce-eager` |
+| Suite G | MoE multi-chip, Mixtral-8x7B | ≥2× A100-80GB |
+
+## What changed vs nvidia_vllm_47f5d58e
+
+| Area | 0.7.3 (predecessor) | 0.20.x (this runner) |
+|---|---|---|
+| Default CUDA | 12.1 | **13.0** (12.8 via `PYTORCH_INDEX`) |
+| PyTorch | 2.5.1 | **2.11** (pulled by vLLM) |
+| Python | 3.10+ | **3.10–3.12** |
+| Transformers | v4.57 | vLLM-pinned (see `result.json` version string) |
+| FlashAttention | FA2 | FA4 (MLA prefill default on supported models) |
+| Quantization | fp8, compressed-tensors, gptq_marlin | + **turboquant** |
+| Model runner | V1 | V2 |
+
+Release notes: [v0.20.0](https://github.com/vllm-project/vllm/releases/tag/v0.20.0) · [v0.20.1](https://github.com/vllm-project/vllm/releases/tag/v0.20.1).
+
+## Installation
+
+### Prerequisites
+
+- NVIDIA GPU, compute capability ≥ 7.0 (Volta+; Ampere+ recommended)
+- **CUDA 13.0** driver/runtime (default for this stack), or **CUDA 12.8** via PyTorch index below
+- **Python 3.10, 3.11, or 3.12** (not 3.13+ until vLLM supports it)
+- A clean virtualenv/conda env if upgrading from `vllm==0.7.3` (mixed installs break imports)
+
+### Recommended: `install.sh`
+
+From the AccelMark repo root:
+
+```bash
+# Create and activate a fresh env (example)
+conda create -n accel python=3.12 -y
+conda activate accel
+
+# Default install (CUDA 13.0 wheels from vLLM)
+bash runners/nvidia_vllm020_0f6c56e4/install.sh
+```
+
+CUDA **12.8** hosts must point pip at the cu128 PyTorch index:
+
+```bash
+PYTORCH_INDEX=https://download.pytorch.org/whl/cu128 \
+  bash runners/nvidia_vllm020_0f6c56e4/install.sh
+```
+
+`install.sh` reads versions from `requirements.txt` and installs in three stages (pip cannot resolve `vllm` and `mistral-common[image]` in one pass). **Do not** run `pip install -r requirements.txt` directly.
+
+### Verify
+
+```bash
+python -c "import vllm, torch; print('vllm', vllm.__version__, 'torch', torch.__version__, 'cuda', torch.cuda.is_available())"
+```
+
+### Manual install (equivalent to `install.sh`)
+
+```bash
+pip install mistral-common==1.11.2
+pip install vllm==0.20.1    # add --extra-index-url if using PYTORCH_INDEX above
+pip install "numpy>=1.26.0,<2.0" jsonschema psutil tqdm nvidia-ml-py PyYAML
+```
+
+### Submitter profile and local models
+
+```bash
+cp configs/submitter.yaml.example configs/submitter.yaml   # set submitted_by
+cp configs/models_local.yaml.example configs/models_local.yaml   # optional local paths
+```
+
+## Usage
+
+```bash
+python run.py --runner nvidia_vllm020_0f6c56e4 --suite suite_A
+python run.py --runner nvidia_vllm020_0f6c56e4 --suite suite_B --tensor-parallel-size 4
+python run.py --runner nvidia_vllm020_0f6c56e4 --suite suite_C
+```
+
+Or invoke the runner directly:
+
+```bash
+python runners/nvidia_vllm020_0f6c56e4/runner.py --suite suite_F --scenario offline
+```
+
+## Runner config
+
+```bash
+cp configs/runner_configs/runner_nvidia_vllm020_0f6c56e4.yaml.example \
+   configs/runner_configs/runner_nvidia_vllm020_0f6c56e4.yaml
+```
+
+Merge priority: CLI flags > suite-specific section > global defaults.
+
+### Suite C — quantization
+
+Copy `runner_nvidia_vllm020_0f6c56e4.yaml` from the example and keep the `suite_C` override:
+
+```yaml
+suites:
+  suite_C:
+    enforce_eager: true
+```
+
+**`enforce_eager` (required for W8A8 / W8A16 on all GPUs):** vLLM 0.20 + CUDA graphs + `compressed-tensors` can yield repetitive garbage (`-addon-addon-…`) with normal-looking offline throughput. Suite C must set `enforce_eager: true` (or pass `--enforce-eager`).
+
+**FP8 on Ampere (A100 / A800 / RTX 30xx, compute capability &lt; 8.9):** vLLM 0.20 does **not** run RedHatAI FP8 checkpoints correctly. The engine falls back to weight-only Marlin FP8 (`marlin_utils_fp8` warning in the log) and accuracy stays ~0 even with `enforce_eager: true`. This is a vLLM 0.20 limitation, not an AccelMark bug. On these GPUs, Suite C **W8A8 / W8A16 / BF16** are valid; for FP8 use **H100+** (sm ≥ 8.9) or the [`nvidia_vllm_47f5d58e`](../nvidia_vllm_47f5d58e/) runner on vLLM 0.7.3.
+
+### Optional `engine_kwargs` (0.20)
+
+```yaml
+engine_kwargs:
+  attention_backend: FLASH_ATTN_4
+  # compilation_config:
+  #   cudagraph_mode: full_and_piecewise
+  # kv_cache_dtype: turboquant   # experimental; suite C
+```
+
+See [vLLM EngineArgs](https://docs.vllm.ai/en/latest/api/vllm/engine/arg_utils.html).
+
+## Troubleshooting
+
+### Large-memory GPUs (H20, A100 80GB) — SIGFPE / silent crash
+
+Symptom: subprocess exits with `SIGFPE (return code -8)` after model load or on first batch.
+
+```bash
+pip install --upgrade nvidia-cublas-cu13
+```
+
+On CUDA 12.8 stacks use `nvidia-cublas-cu12` instead. Details: [predecessor README](../nvidia_vllm_47f5d58e/README.md#large-memory-gpus-h20-a100-80-gb-etc).
+
+### Pre-Ampere (V100, T4, RTX 20xx)
+
+This runner targets Ampere+ with CUDA 12.8/13.0. For Volta/Turing, use [`nvidia_vllm_47f5d58e`](../nvidia_vllm_47f5d58e/) with `--enforce-eager` (BF16→FP16 fallback, no CUDA graphs). See the predecessor README for Suite F / Suite A on V100.
+
+### Suite C accuracy ~0 but offline OK
+
+1. Confirm `configs/runner_configs/runner_nvidia_vllm020_0f6c56e4.yaml` exists and has `suites.suite_C.enforce_eager: true`.
+2. Re-run accuracy with `--force` (or delete the format’s `accuracy/` folder).
+3. If the log shows `Weight-only FP8 compression will be used leveraging the Marlin kernel` on an **A100**, FP8 will stay ~0 on vLLM 0.20 — use W8A8/W8A16 or H100+ for FP8 (see Suite C section above).
+
+## Hardware matrix
+
+Full GPU compatibility table: [`nvidia_vllm_47f5d58e/README.md`](../nvidia_vllm_47f5d58e/README.md#hardware-compatibility).
+
+## Files
+
+| File | Purpose |
+|------|---------|
+| `runner.py` | Runner implementation |
+| `meta.json` | Runner metadata and suite support |
+| `requirements.txt` | Pinned dependency list (source of truth) |
+| `install.sh` | Staged pip install |
diff --git a/runners/nvidia_vllm020_0f6c56e4/install.sh b/runners/nvidia_vllm020_0f6c56e4/install.sh
new file mode 100644
index 00000000..e4e82924
--- /dev/null
+++ b/runners/nvidia_vllm020_0f6c56e4/install.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+# Install dependencies from requirements.txt in three stages.
+# pip cannot resolve vllm and mistral-common[image] in a single install pass.
+set -euo pipefail
+
+RUNNER_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REQ="${RUNNER_DIR}/requirements.txt"
+EXTRA=()
+if [[ -n "${PYTORCH_INDEX:-}" ]]; then
+  EXTRA=(--extra-index-url "${PYTORCH_INDEX}")
+fi
+
+line() { awk -v p="$1" '$0 ~ "^" p "[=<>]" { print; exit }' "${REQ}"; }
+
+echo "==> $(line mistral-common)"
+pip install "$(line mistral-common)"
+
+echo "==> $(line vllm)"
+pip install "$(line vllm)" "${EXTRA[@]}"
+
+TMP="$(mktemp)"
+trap 'rm -f "${TMP}"' EXIT
+awk '!/^#/ && NF && $0 !~ /^mistral-common/ && $0 !~ /^vllm/' "${REQ}" > "${TMP}"
+echo "==> AccelMark utilities"
+pip install -r "${TMP}"
+
+python -c "import vllm; print('OK — vllm', vllm.__version__)"
diff --git a/runners/nvidia_vllm020_0f6c56e4/meta.json b/runners/nvidia_vllm020_0f6c56e4/meta.json
new file mode 100644
index 00000000..fcf9f0c9
--- /dev/null
+++ b/runners/nvidia_vllm020_0f6c56e4/meta.json
@@ -0,0 +1,21 @@
+{
+  "id": "nvidia_vllm020_0f6c56e4",
+  "platform": "nvidia",
+  "name": "vLLM 0.20 on NVIDIA",
+  "framework": "vLLM",
+  "submitted_by": "JuhaoLiang1997",
+  "description": "AccelMark reference runner for NVIDIA GPUs using vLLM 0.20.x. Supersedes nvidia_vllm_47f5d58e (vLLM 0.7.3). Supports suites A–G.",
+  "supersedes_chain": [],
+  "notes": "vLLM 0.20.x line: torch 2.11, CUDA 13.0 default. Adds turboquant backend. Suite C requires enforce_eager in runner config (see README).",
+  "created": "2026-05-15",
+  "hardware_label": null,
+  "suite_support": {
+    "A": "pending",
+    "B": "pending",
+    "C": "pending",
+    "D": "pending",
+    "E": "pending",
+    "F": "pending",
+    "G": "pending"
+  }
+}
diff --git a/runners/nvidia_vllm020_0f6c56e4/requirements.txt b/runners/nvidia_vllm020_0f6c56e4/requirements.txt
new file mode 100644
index 00000000..b09fcee1
--- /dev/null
+++ b/runners/nvidia_vllm020_0f6c56e4/requirements.txt
@@ -0,0 +1,19 @@
+# AccelMark — NVIDIA vLLM 0.20.x dependencies
+#
+# Install:  bash install.sh
+# Do not:   pip install -r requirements.txt   (pip mistral-common[image] resolver bug)
+#
+# Python 3.10–3.12. Reference stack: torch 2.11 + vllm 0.20.1 + CUDA 13.0
+# CUDA 12.8: PYTORCH_INDEX=https://download.pytorch.org/whl/cu128 bash install.sh
+
+# --- vLLM stack (install.sh stages these; torch/transformers pulled by vllm) ---
+mistral-common==1.11.2
+vllm==0.20.1
+
+# --- AccelMark utilities ---
+numpy>=1.26.0,<2.0
+jsonschema>=4.20.0
+psutil>=7.0.0
+tqdm>=4.66.0
+nvidia-ml-py>=13.0
+PyYAML>=6.0
diff --git a/runners/nvidia_vllm020_0f6c56e4/runner.py b/runners/nvidia_vllm020_0f6c56e4/runner.py
new file mode 100644
index 00000000..8383dfaf
--- /dev/null
+++ b/runners/nvidia_vllm020_0f6c56e4/runner.py
@@ -0,0 +1,378 @@
+"""
+AccelMark — NVIDIA vLLM benchmark script (vLLM 0.20.x).
+
+Implements BenchmarkRunner for vLLM 0.20.x on NVIDIA GPUs.
+All orchestration logic lives in runners/benchmark_runner.py.
+"""
+
+import asyncio
+import sys
+import time
+from pathlib import Path
+from typing import Optional
+
+_REPO_ROOT = Path(__file__).resolve().parent.parent.parent
+sys.path.insert(0, str(_REPO_ROOT))
+
+import torch
+from vllm import LLM, AsyncLLMEngine, SamplingParams
+from vllm.engine.arg_utils import AsyncEngineArgs
+from transformers import AutoTokenizer
+
+from runners.benchmark_runner import BenchmarkRunner, InferenceRequest
+from loadgen.types import InferenceResult
+
+
+import logging
+logging.getLogger("vllm.engine.async_llm_engine").setLevel(logging.WARNING)
+logging.getLogger("vllm.engine.llm_engine").setLevel(logging.WARNING)
+
+
+class VLLMRunner(BenchmarkRunner):
+    """AccelMark benchmark runner using vLLM on NVIDIA GPUs."""
+
+    SUPPORTS_STREAMING = True
+    SUPPORTS_BATCHING = True
+    SUPPORTS_ONLINE = True
+    SUPPORTS_MULTI_CHIP = True
+
+    SUPPORTED_PRECISIONS = ["bf16", "fp16", "fp32"]
+    SUPPORTED_QUANTIZATION_BACKENDS = [
+        "fp8",
+        "compressed-tensors",
+        "gptq_marlin",
+        "turboquant",
+    ]
+
+    def __init__(self):
+        self.llm: LLM = None
+        self.engine: AsyncLLMEngine = None
+        self.tokenizer: AutoTokenizer = None
+        self.sampling_params: SamplingParams = None
+        self._loop: asyncio.AbstractEventLoop = None
+
+    def _get_chip_count(self) -> int:
+        """Return the number of available CUDA GPUs."""
+        try:
+            import torch
+            n = torch.cuda.device_count()
+            return n if n > 0 else 1
+        except Exception:
+            return 1
+
+    def _get_framework_name(self) -> str:
+        return "vLLM"
+
+    def _get_framework_version(self) -> str:
+        vllm_v = "unknown"
+        try:
+            import vllm
+            vllm_v = vllm.__version__
+        except Exception:
+            pass
+
+        tfm_v = None
+        try:
+            import transformers
+            tfm_v = transformers.__version__
+        except Exception:
+            pass
+
+        if tfm_v:
+            return f"{vllm_v}+transformers-{tfm_v}"
+        return vllm_v
+
+    def load_model(self, model_path: str, parallelism: dict) -> None:
+        """Load model — sync LLM for offline/accuracy, async engine for streaming."""
+        tp_size = parallelism["tensor_parallel_size"]
+        pp_size = parallelism["pipeline_parallel_size"]
+        ep_size = parallelism.get("expert_parallel_size", 1)
+        assert pp_size <= 1, "Pipeline parallelism is not supported in VLLMRunner"
+
+        max_tokens    = parallelism["max_tokens"]
+        max_model_len = parallelism["max_model_len"]
+        use_async     = parallelism["use_async"]
+        enforce_eager = getattr(self, "_enforce_eager", False)
+
+        cfg             = getattr(self, "_runner_config", {})
+        max_num_seqs    = cfg.get("max_num_seqs", 512)
+        gpu_memory_util = cfg.get("gpu_memory_utilization", 0.90)
+        extra_kwargs    = dict(cfg.get("engine_kwargs") or {})
+
+        try:
+            import dataclasses
+            from vllm.engine.arg_utils import EngineArgs as _EngineArgs
+            _valid = {f.name for f in dataclasses.fields(_EngineArgs)}
+            _dropped = {k: v for k, v in extra_kwargs.items() if k not in _valid}
+            if _dropped:
+                print(f"  Warning: engine_kwargs keys not supported by this "
+                      f"vLLM version and will be ignored: {list(_dropped)}")
+            extra_kwargs = {k: v for k, v in extra_kwargs.items() if k in _valid}
+        except Exception:
+            pass
+
+        effective_precision = getattr(self, "_effective_precision", "BF16").upper()
+        precision           = getattr(self, "_precision", None) or effective_precision
+
+        _dtype_override  = getattr(self, "_precision_dtype_override", None)
+        _prec_eng_kwargs = dict(getattr(self, "_precision_engine_kwargs", None) or {})
+
+        quantization = _prec_eng_kwargs.pop("quantization", None)
+
+        _NATIVE_DTYPE_MAP = {
+            "BF16":  "bfloat16",
+            "FP16":  "float16",
+            "FP32":  "float32",
+        }
+        dtype = _NATIVE_DTYPE_MAP.get(precision, "auto")
+        self._quantization_method = quantization
+
+        if _dtype_override:
+            dtype = _dtype_override
+
+        if _prec_eng_kwargs:
+            _prec_eng_kwargs.update(extra_kwargs)
+            extra_kwargs = _prec_eng_kwargs
+
+        print(f"Loading model: precision={precision}, dtype={dtype}"
+              + (f", quantization_method={self._quantization_method}"
+                 if self._quantization_method else ""))
+
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            model_path, trust_remote_code=False
+        )
+
+        self.sampling_params = SamplingParams(
+            max_tokens=max_tokens,
+            temperature=0.0,
+        )
+
+        if not use_async:
+            llm_kwargs = dict(
+                model=model_path,
+                dtype=dtype,
+                tensor_parallel_size=tp_size,
+                trust_remote_code=False,
+                enforce_eager=enforce_eager,
+                max_num_seqs=max_num_seqs,
+                gpu_memory_utilization=gpu_memory_util,
+                **extra_kwargs,
+            )
+            if ep_size > 1:
+                llm_kwargs["enable_expert_parallel"] = True
+                llm_kwargs["tensor_parallel_size"]   = tp_size
+            if quantization:
+                llm_kwargs["quantization"] = quantization
+            if max_model_len:
+                llm_kwargs["max_model_len"] = max_model_len
+            self.llm = LLM(**llm_kwargs)
+        else:
+            self._loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(self._loop)
+            engine_kwargs = dict(
+                model=model_path,
+                dtype=dtype,
+                tensor_parallel_size=tp_size,
+                trust_remote_code=False,
+                enforce_eager=enforce_eager,
+                gpu_memory_utilization=gpu_memory_util,
+                **extra_kwargs,
+            )
+            if ep_size > 1:
+                engine_kwargs["enable_expert_parallel"] = True
+            if max_model_len:
+                engine_kwargs["max_model_len"] = max_model_len
+            engine_args = AsyncEngineArgs(**engine_kwargs)
+            self.engine = AsyncLLMEngine.from_engine_args(engine_args)
+
+    def get_effective_dtype(self) -> Optional[str]:
+        try:
+            if self.llm is not None:
+                dtype = self.llm.llm_engine.model_config.dtype
+                return str(dtype).replace("torch.", "")
+            elif self.engine is not None:
+                dtype = self.engine.engine.model_config.dtype
+                return str(dtype).replace("torch.", "")
+        except Exception:
+            pass
+        return getattr(self, "_effective_dtype", None)
+
+    def inference_fn_offline(self, requests: list[InferenceRequest]) -> list[InferenceResult]:
+        formatted = [self._format_prompt(r.prompt) for r in requests]
+        t_start = time.perf_counter()
+        outputs = self.llm.generate(formatted, self.sampling_params)
+        elapsed = time.perf_counter() - t_start
+
+        self._last_accuracy_outputs = [o.outputs[0].text for o in outputs]
+
+        results = []
+        for output in outputs:
+            results.append(InferenceResult(
+                first_token_time_ms=None,
+                total_time_ms=elapsed * 1000,
+                output_tokens=len(output.outputs[0].token_ids),
+                input_tokens=len(output.prompt_token_ids),
+                success=True,
+                output_text=output.outputs[0].text,
+            ))
+        return results
+
+    async def inference_fn_streaming(self, request: InferenceRequest) -> InferenceResult:
+        from vllm.utils import random_uuid
+
+        formatted = self._format_prompt(request.prompt)
+        request_id = random_uuid()
+        t_start = time.perf_counter()
+        first_token_time_ms = None
+        output_tokens = 0
+        output_text = ""
+
+        async for output in self.engine.generate(
+            formatted, self.sampling_params, request_id
+        ):
+            if (
+                first_token_time_ms is None
+                and len(output.outputs[0].token_ids) > 0
+            ):
+                first_token_time_ms = (time.perf_counter() - t_start) * 1000
+            output_tokens = len(output.outputs[0].token_ids)
+            output_text = output.outputs[0].text
+
+        total_time_ms = (time.perf_counter() - t_start) * 1000
+        return InferenceResult(
+            first_token_time_ms=first_token_time_ms,
+            total_time_ms=total_time_ms,
+            output_tokens=output_tokens,
+            input_tokens=0,
+            success=True,
+            output_text=output_text,
+        )
+
+    async def inference_fn_token_stream(self, request: InferenceRequest):
+        from vllm.utils import random_uuid
+
+        formatted   = self._format_prompt(request.prompt)
+        request_id  = random_uuid()
+        prev_length = 0
+
+        async for output in self.engine.generate(
+            formatted, self.sampling_params, request_id
+        ):
+            current_text = output.outputs[0].text
+            delta = current_text[prev_length:]
+            if delta:
+                yield delta
+                prev_length = len(current_text)
+
+    def get_peak_memory_gb(self) -> float:
+        try:
+            return torch.cuda.max_memory_allocated() / (1024 ** 3)
+        except Exception:
+            return None
+
+    def release_resources(self) -> None:
+        if self.llm is not None:
+            try:
+                del self.llm
+            except Exception:
+                pass
+            self.llm = None
+
+        if self.engine is not None:
+            try:
+                if self._loop and not self._loop.is_closed():
+                    self._loop.run_until_complete(self.engine.shutdown())
+            except Exception:
+                pass
+            try:
+                del self.engine
+            except Exception:
+                pass
+            self.engine = None
+
+        try:
+            from vllm.distributed.parallel_state import cleanup_dist_env_and_memory
+            cleanup_dist_env_and_memory(shutdown_ray=False)
+        except Exception:
+            try:
+                from vllm.distributed.parallel_state import (
+                    destroy_model_parallel, destroy_distributed_environment,
+                )
+                destroy_model_parallel()
+                destroy_distributed_environment()
+            except Exception:
+                pass
+
+        try:
+            if torch.distributed.is_initialized():
+                torch.distributed.destroy_process_group()
+        except Exception:
+            pass
+
+    def parse_args(self):
+        args = super().parse_args()
+        cfg = self._runner_config
+
+        import argparse
+        parser = argparse.ArgumentParser(add_help=False)
+        parser.add_argument("--tensor-parallel-size", type=int, default=None,
+                            dest="tensor_parallel_size")
+        parser.add_argument("--pipeline-parallel-size", type=int, default=None,
+                            dest="pipeline_parallel_size")
+        parser.add_argument("--expert-parallel-size", type=int, default=None,
+                            dest="expert_parallel_size")
+        parser.add_argument("--enforce-eager", action="store_true", default=False,
+                            dest="enforce_eager")
+        extra, _ = parser.parse_known_args()
+
+        tp_size, _tp_source = self._resolve_tensor_parallel_size(
+            extra.tensor_parallel_size
+        )
+
+        pp_size = (extra.pipeline_parallel_size
+                   if extra.pipeline_parallel_size is not None
+                   else cfg.get("pipeline_parallel_size", 1))
+        ep_size = (extra.expert_parallel_size
+                   if extra.expert_parallel_size is not None
+                   else cfg.get("expert_parallel_size", 1))
+        self._enforce_eager = extra.enforce_eager or cfg.get("enforce_eager", False)
+
+        print(f"  tensor_parallel_size = {tp_size}  [{_tp_source}]")
+        if ep_size > 1:
+            print(f"  expert_parallel_size = {ep_size}  [cli/yaml]")
+
+        if not self.SUPPORTS_MULTI_CHIP and tp_size * pp_size > 1:
+            print(f"Warning: {self.__class__.__name__} does not support multi-chip. "
+                  f"Ignoring tensor_parallel_size={tp_size}, using 1.")
+            tp_size = 1
+            pp_size = 1
+            ep_size = 1
+
+        self._parallelism = {
+            "tensor_parallel_size":   tp_size,
+            "pipeline_parallel_size": pp_size,
+            "expert_parallel_size":   ep_size,
+            "data_parallel_size":     1,
+        }
+        self._chip_count = tp_size * pp_size
+        self._precision  = getattr(args, "precision", None)
+        return args
+
+    def get_extra_subprocess_args(self, args) -> list[str]:
+        extra = [
+            "--tensor-parallel-size",
+            str(self._parallelism.get("tensor_parallel_size", 1)),
+        ]
+        if self._parallelism.get("pipeline_parallel_size", 1) > 1:
+            extra += ["--pipeline-parallel-size",
+                      str(self._parallelism["pipeline_parallel_size"])]
+        if self._parallelism.get("expert_parallel_size", 1) > 1:
+            extra += ["--expert-parallel-size",
+                      str(self._parallelism["expert_parallel_size"])]
+        if self._enforce_eager:
+            extra += ["--enforce-eager"]
+        return extra
+
+
+if __name__ == "__main__":
+    VLLMRunner().main()