{
  "schema": "localllm.advisor.gpu_summary@1.0.0",
  "generated_at": "2026-05-22T06:53:43.121Z",
  "gpu": {
    "slug": "nvidia-rtx-4090",
    "name": "NVIDIA RTX 4090",
    "vendor": "nvidia",
    "vram_mb": 24576,
    "bandwidth_gbps": 1008,
    "memory_type": "GDDR6X",
    "architecture": "Ada Lovelace",
    "tdp_watts": 450,
    "price_usd": 1999
  },
  "by_use_case": {
    "chat": [
      {
        "id": "qwen3-30b-a3b-instruct-2507",
        "slug": "qwen3-30b-a3b-instruct-2507",
        "name": "Qwen3-30B-A3B-Instruct-2507",
        "family": "qwen",
        "params_b": 30,
        "architecture": "moe",
        "quality_score": 73,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 16875,
        "vram_pct": 68.7,
        "estimated_tps": 52
      },
      {
        "id": "deepseek-r1-0528-qwen3-8b",
        "slug": "deepseek-r1-0528-qwen3-8b",
        "name": "DeepSeek-R1-0528-Qwen3-8B",
        "family": "qwen",
        "params_b": 8.2,
        "architecture": "dense",
        "quality_score": 70,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 16400,
        "vram_pct": 66.7,
        "estimated_tps": 53
      },
      {
        "id": "phi-4-14b-base",
        "slug": "phi-4-14b",
        "name": "Phi-4 14B",
        "family": "phi",
        "params_b": 14,
        "architecture": "dense",
        "quality_score": 69,
        "quant": "Q8_0",
        "bpw": 8,
        "vram_mb": 16486,
        "vram_pct": 67.1,
        "estimated_tps": 53
      },
      {
        "id": "qwen3-coder-30b-a3b",
        "slug": "qwen3-coder-30b-a3b",
        "name": "Qwen3-Coder 30B A3B",
        "family": "qwen",
        "params_b": 30,
        "architecture": "moe",
        "quality_score": 69,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 16875,
        "vram_pct": 68.7,
        "estimated_tps": 52
      },
      {
        "id": "qwen3-4b-instruct-2507",
        "slug": "qwen3-4b-instruct-2507",
        "name": "Qwen3-4B-Instruct-2507",
        "family": "qwen",
        "params_b": 4,
        "architecture": "dense",
        "quality_score": 66,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 8000,
        "vram_pct": 32.6,
        "estimated_tps": 110
      },
      {
        "id": "nemotron-nemo-31.6b",
        "slug": "nvidia-nemotron-3-nano-30b-a3b-bf16",
        "name": "NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
        "family": "nemotron",
        "params_b": 31.6,
        "architecture": "dense",
        "quality_score": 65,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 18702,
        "vram_pct": 76.1,
        "estimated_tps": 47
      },
      {
        "id": "qwen-27.8b",
        "slug": "qwen3.5-27b",
        "name": "Qwen3.5-27B",
        "family": "qwen",
        "params_b": 27.8,
        "architecture": "dense",
        "quality_score": 64,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 16513,
        "vram_pct": 67.2,
        "estimated_tps": 53
      },
      {
        "id": "qwen1.5-32b",
        "slug": "qwen-1.5-32b",
        "name": "Qwen 1.5 32B",
        "family": "qwen",
        "params_b": 32,
        "architecture": "dense",
        "quality_score": 64,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 18932,
        "vram_pct": 77,
        "estimated_tps": 46
      },
      {
        "id": "qwen-32.5b",
        "slug": "qwen2.5-32b",
        "name": "Qwen2.5-32B",
        "family": "qwen",
        "params_b": 32.5,
        "architecture": "dense",
        "quality_score": 64,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 19220,
        "vram_pct": 78.2,
        "estimated_tps": 46
      },
      {
        "id": "qwen-r1-32.8b",
        "slug": "deepseek-r1-distill-qwen-32b",
        "name": "DeepSeek-R1-Distill-Qwen-32B",
        "family": "qwen",
        "params_b": 32.8,
        "architecture": "dense",
        "quality_score": 64,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 19393,
        "vram_pct": 78.9,
        "estimated_tps": 45
      },
      {
        "id": "qwen-coder-32.8b",
        "slug": "qwen2.5-coder-32b",
        "name": "Qwen2.5-Coder-32B",
        "family": "qwen",
        "params_b": 32.8,
        "architecture": "dense",
        "quality_score": 64,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 19393,
        "vram_pct": 78.9,
        "estimated_tps": 45
      },
      {
        "id": "qwen-coder-32.8b",
        "slug": "qwen2.5-coder-32b",
        "name": "Qwen2.5-Coder-32B",
        "family": "qwen",
        "params_b": 32.8,
        "architecture": "dense",
        "quality_score": 64,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 19393,
        "vram_pct": 78.9,
        "estimated_tps": 45
      },
      {
        "id": "qwen-r1-32.8b",
        "slug": "deepseek-r1-distill-qwen-32b",
        "name": "DeepSeek-R1-Distill-Qwen-32B",
        "family": "qwen",
        "params_b": 32.8,
        "architecture": "dense",
        "quality_score": 64,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 19393,
        "vram_pct": 78.9,
        "estimated_tps": 45
      },
      {
        "id": "solar-pro-22b",
        "slug": "solar-pro-22b",
        "name": "SOLAR-Pro 22B",
        "family": "solar",
        "params_b": 22.1,
        "architecture": "dense",
        "quality_score": 63,
        "quant": "Q6_K",
        "bpw": 6.5,
        "vram_mb": 19187,
        "vram_pct": 78.1,
        "estimated_tps": 46
      },
      {
        "id": "mistral-small-3.2-24b",
        "slug": "mistral-small-3.2-24b",
        "name": "Mistral Small 3.2 24B",
        "family": "mistral",
        "params_b": 24,
        "architecture": "dense",
        "quality_score": 63,
        "quant": "Q6_K",
        "bpw": 6.5,
        "vram_mb": 19500,
        "vram_pct": 79.3,
        "estimated_tps": 45
      },
      {
        "id": "mistral-small-22b-2409",
        "slug": "mistral-small-22b",
        "name": "Mistral Small 22B",
        "family": "mistral",
        "params_b": 22.2,
        "architecture": "dense",
        "quality_score": 61,
        "quant": "Q6_K",
        "bpw": 6.5,
        "vram_mb": 19270,
        "vram_pct": 78.4,
        "estimated_tps": 46
      },
      {
        "id": "mllama-11b",
        "slug": "llama-3.2-11b-vision-instruct",
        "name": "Llama-3.2-11B-Vision-Instruct",
        "family": "llama",
        "params_b": 11,
        "architecture": "dense",
        "quality_score": 60,
        "quant": "Q8_0",
        "bpw": 8,
        "vram_mb": 13000,
        "vram_pct": 52.9,
        "estimated_tps": 67
      },
      {
        "id": "mllama-11b",
        "slug": "llama-3.2-11b-vision-instruct",
        "name": "Llama-3.2-11B-Vision-Instruct",
        "family": "llama",
        "params_b": 11,
        "architecture": "dense",
        "quality_score": 60,
        "quant": "Q8_0",
        "bpw": 8,
        "vram_mb": 13000,
        "vram_pct": 52.9,
        "estimated_tps": 67
      },
      {
        "id": "phi-3.5-mini-3.8b",
        "slug": "phi-3.5-mini-3.8b",
        "name": "Phi-3.5 Mini 3.8B",
        "family": "phi",
        "params_b": 3.82,
        "architecture": "dense",
        "quality_score": 59,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 8273,
        "vram_pct": 33.7,
        "estimated_tps": 106
      },
      {
        "id": "gemma-3-12b",
        "slug": "gemma-3-12b",
        "name": "Gemma 3 12B",
        "family": "gemma",
        "params_b": 12.2,
        "architecture": "dense",
        "quality_score": 58,
        "quant": "Q8_0",
        "bpw": 8,
        "vram_mb": 12993,
        "vram_pct": 52.9,
        "estimated_tps": 68
      },
      {
        "id": "gemma-3-12b",
        "slug": "gemma-3-12b",
        "name": "Gemma 3 12B",
        "family": "gemma",
        "params_b": 12.2,
        "architecture": "dense",
        "quality_score": 58,
        "quant": "Q8_0",
        "bpw": 8,
        "vram_mb": 12993,
        "vram_pct": 52.9,
        "estimated_tps": 68
      },
      {
        "id": "qwen1.5-14b",
        "slug": "qwen-1.5-14b",
        "name": "Qwen 1.5 14B",
        "family": "qwen",
        "params_b": 14,
        "architecture": "dense",
        "quality_score": 58,
        "quant": "Q8_0",
        "bpw": 8,
        "vram_mb": 14836,
        "vram_pct": 60.4,
        "estimated_tps": 59
      },
      {
        "id": "qwen-coder-14.8b",
        "slug": "qwen2.5-coder-14b",
        "name": "Qwen2.5-Coder-14B",
        "family": "qwen",
        "params_b": 14.8,
        "architecture": "dense",
        "quality_score": 58,
        "quant": "Q8_0",
        "bpw": 8,
        "vram_mb": 15655,
        "vram_pct": 63.7,
        "estimated_tps": 56
      },
      {
        "id": "qwen-14.8b",
        "slug": "qwen2.5-14b",
        "name": "Qwen2.5-14B",
        "family": "qwen",
        "params_b": 14.8,
        "architecture": "dense",
        "quality_score": 58,
        "quant": "Q8_0",
        "bpw": 8,
        "vram_mb": 15655,
        "vram_pct": 63.7,
        "estimated_tps": 56
      },
      {
        "id": "ministral-8b",
        "slug": "ministral-8b",
        "name": "Ministral 8B",
        "family": "mistral",
        "params_b": 8,
        "architecture": "dense",
        "quality_score": 57,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 16884,
        "vram_pct": 68.7,
        "estimated_tps": 52
      }
    ],
    "coding": [
      {
        "id": "qwen3-coder-30b-a3b",
        "slug": "qwen3-coder-30b-a3b",
        "name": "Qwen3-Coder 30B A3B",
        "family": "qwen",
        "params_b": 30,
        "architecture": "moe",
        "quality_score": 70,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 16875,
        "vram_pct": 68.7,
        "estimated_tps": 52
      },
      {
        "id": "qwen-32.5b",
        "slug": "qwen2.5-32b",
        "name": "Qwen2.5-32B",
        "family": "qwen",
        "params_b": 32.5,
        "architecture": "dense",
        "quality_score": 70,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 19220,
        "vram_pct": 78.2,
        "estimated_tps": 46
      },
      {
        "id": "qwen-coder-32.8b",
        "slug": "qwen2.5-coder-32b",
        "name": "Qwen2.5-Coder-32B",
        "family": "qwen",
        "params_b": 32.8,
        "architecture": "dense",
        "quality_score": 70,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 19393,
        "vram_pct": 78.9,
        "estimated_tps": 45
      },
      {
        "id": "qwen-coder-32.8b",
        "slug": "qwen2.5-coder-32b",
        "name": "Qwen2.5-Coder-32B",
        "family": "qwen",
        "params_b": 32.8,
        "architecture": "dense",
        "quality_score": 70,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 19393,
        "vram_pct": 78.9,
        "estimated_tps": 45
      },
      {
        "id": "qwen2.5-7b",
        "slug": "qwen-2.5-7b",
        "name": "Qwen 2.5 7B",
        "family": "qwen",
        "params_b": 7.6,
        "architecture": "dense",
        "quality_score": 68,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 16065,
        "vram_pct": 65.4,
        "estimated_tps": 55
      },
      {
        "id": "qwen2.5-7b",
        "slug": "qwen-2.5-7b",
        "name": "Qwen 2.5 7B",
        "family": "qwen",
        "params_b": 7.6,
        "architecture": "dense",
        "quality_score": 68,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 16065,
        "vram_pct": 65.4,
        "estimated_tps": 55
      },
      {
        "id": "qwen2.5-coder-7b",
        "slug": "qwen-2.5-coder-7b",
        "name": "Qwen 2.5 Coder 7B",
        "family": "qwen",
        "params_b": 7.6,
        "architecture": "dense",
        "quality_score": 65,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 16065,
        "vram_pct": 65.4,
        "estimated_tps": 55
      },
      {
        "id": "qwen2.5-coder-7b",
        "slug": "qwen-2.5-coder-7b",
        "name": "Qwen 2.5 Coder 7B",
        "family": "qwen",
        "params_b": 7.6,
        "architecture": "dense",
        "quality_score": 65,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 16065,
        "vram_pct": 65.4,
        "estimated_tps": 55
      },
      {
        "id": "phi-4-mini-3.8b",
        "slug": "phi-4-mini-3.8b",
        "name": "Phi-4-mini 3.8B",
        "family": "phi",
        "params_b": 3.8,
        "architecture": "dense",
        "quality_score": 63,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 8282,
        "vram_pct": 33.7,
        "estimated_tps": 106
      },
      {
        "id": "mistral-small-3.2-24b",
        "slug": "mistral-small-3.2-24b",
        "name": "Mistral Small 3.2 24B",
        "family": "mistral",
        "params_b": 24,
        "architecture": "dense",
        "quality_score": 63,
        "quant": "Q6_K",
        "bpw": 6.5,
        "vram_mb": 19500,
        "vram_pct": 79.3,
        "estimated_tps": 45
      },
      {
        "id": "internlm2.5-20b",
        "slug": "internlm2.5-20b",
        "name": "InternLM2.5 20B",
        "family": "internlm",
        "params_b": 19.8,
        "architecture": "dense",
        "quality_score": 62,
        "quant": "Q6_K",
        "bpw": 6.5,
        "vram_mb": 17274,
        "vram_pct": 70.3,
        "estimated_tps": 51
      },
      {
        "id": "internlm2.5-7b",
        "slug": "internlm2.5-7b",
        "name": "InternLM2.5 7B",
        "family": "internlm",
        "params_b": 7.74,
        "architecture": "dense",
        "quality_score": 58,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 16352,
        "vram_pct": 66.5,
        "estimated_tps": 54
      },
      {
        "id": "nemotron-3-nano-4b",
        "slug": "nemotron-3-nano-4b",
        "name": "Nemotron 3 Nano 4B",
        "family": "nemotron",
        "params_b": 4,
        "architecture": "dense",
        "quality_score": 57,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 8000,
        "vram_pct": 32.6,
        "estimated_tps": 110
      },
      {
        "id": "qwen2.5-3b",
        "slug": "qwen-2.5-3b",
        "name": "Qwen 2.5 3B",
        "family": "qwen",
        "params_b": 3.1,
        "architecture": "dense",
        "quality_score": 55,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 6849,
        "vram_pct": 27.9,
        "estimated_tps": 128
      },
      {
        "id": "phi-3.5-mini-3.8b",
        "slug": "phi-3.5-mini-3.8b",
        "name": "Phi-3.5 Mini 3.8B",
        "family": "phi",
        "params_b": 3.82,
        "architecture": "dense",
        "quality_score": 55,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 8273,
        "vram_pct": 33.7,
        "estimated_tps": 106
      },
      {
        "id": "internlm2-20b",
        "slug": "internlm2-20b",
        "name": "InternLM2 20B",
        "family": "internlm",
        "params_b": 19.8,
        "architecture": "dense",
        "quality_score": 52,
        "quant": "Q6_K",
        "bpw": 6.5,
        "vram_mb": 17274,
        "vram_pct": 70.3,
        "estimated_tps": 51
      },
      {
        "id": "phi-4-reasoning-14b",
        "slug": "phi-4-reasoning-14b",
        "name": "Phi-4-reasoning 14B",
        "family": "phi",
        "params_b": 14,
        "architecture": "dense",
        "quality_score": 51,
        "quant": "Q8_0",
        "bpw": 8,
        "vram_mb": 14836,
        "vram_pct": 60.4,
        "estimated_tps": 59
      },
      {
        "id": "qwen-coder-14.8b",
        "slug": "qwen2.5-coder-14b",
        "name": "Qwen2.5-Coder-14B",
        "family": "qwen",
        "params_b": 14.8,
        "architecture": "dense",
        "quality_score": 51,
        "quant": "Q8_0",
        "bpw": 8,
        "vram_mb": 15655,
        "vram_pct": 63.7,
        "estimated_tps": 56
      },
      {
        "id": "qwen-14.8b",
        "slug": "qwen2.5-14b",
        "name": "Qwen2.5-14B",
        "family": "qwen",
        "params_b": 14.8,
        "architecture": "dense",
        "quality_score": 51,
        "quant": "Q8_0",
        "bpw": 8,
        "vram_mb": 15655,
        "vram_pct": 63.7,
        "estimated_tps": 56
      },
      {
        "id": "granite-3.1-8b",
        "slug": "granite-3.1-8b",
        "name": "Granite 3.1 8B",
        "family": "granite",
        "params_b": 8.17,
        "architecture": "dense",
        "quality_score": 51,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 17232,
        "vram_pct": 70.1,
        "estimated_tps": 51
      },
      {
        "id": "nemotron-nemo-31.6b",
        "slug": "nvidia-nemotron-3-nano-30b-a3b-bf16",
        "name": "NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
        "family": "nemotron",
        "params_b": 31.6,
        "architecture": "dense",
        "quality_score": 49,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 18702,
        "vram_pct": 76.1,
        "estimated_tps": 47
      },
      {
        "id": "mistral-small-22b-2409",
        "slug": "mistral-small-22b",
        "name": "Mistral Small 22B",
        "family": "mistral",
        "params_b": 22.2,
        "architecture": "dense",
        "quality_score": 49,
        "quant": "Q6_K",
        "bpw": 6.5,
        "vram_mb": 19270,
        "vram_pct": 78.4,
        "estimated_tps": 46
      },
      {
        "id": "exaone-32b",
        "slug": "exaone-4.0-32b",
        "name": "EXAONE-4.0-32B",
        "family": "exaone",
        "params_b": 32,
        "architecture": "dense",
        "quality_score": 48,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 18932,
        "vram_pct": 77,
        "estimated_tps": 46
      },
      {
        "id": "qwen2.5-1.5b",
        "slug": "qwen-2.5-1.5b",
        "name": "Qwen 2.5 1.5B",
        "family": "qwen",
        "params_b": 1.5,
        "architecture": "dense",
        "quality_score": 47,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 3572,
        "vram_pct": 14.5,
        "estimated_tps": 246
      },
      {
        "id": "granite-3.0-8b",
        "slug": "granite-3.0-8b",
        "name": "Granite 3.0 8B",
        "family": "granite",
        "params_b": 8.17,
        "architecture": "dense",
        "quality_score": 47,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 17232,
        "vram_pct": 70.1,
        "estimated_tps": 51
      }
    ],
    "reasoning": [
      {
        "id": "phi-4-14b-base",
        "slug": "phi-4-14b",
        "name": "Phi-4 14B",
        "family": "phi",
        "params_b": 14,
        "architecture": "dense",
        "quality_score": 68,
        "quant": "Q8_0",
        "bpw": 8,
        "vram_mb": 16486,
        "vram_pct": 67.1,
        "estimated_tps": 53
      },
      {
        "id": "deepseek-r1-0528-qwen3-8b",
        "slug": "deepseek-r1-0528-qwen3-8b",
        "name": "DeepSeek-R1-0528-Qwen3-8B",
        "family": "qwen",
        "params_b": 8.2,
        "architecture": "dense",
        "quality_score": 67,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 16400,
        "vram_pct": 66.7,
        "estimated_tps": 53
      },
      {
        "id": "qwen3-30b-a3b-instruct-2507",
        "slug": "qwen3-30b-a3b-instruct-2507",
        "name": "Qwen3-30B-A3B-Instruct-2507",
        "family": "qwen",
        "params_b": 30,
        "architecture": "moe",
        "quality_score": 58,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 16875,
        "vram_pct": 68.7,
        "estimated_tps": 52
      },
      {
        "id": "qwen3-4b-instruct-2507",
        "slug": "qwen3-4b-instruct-2507",
        "name": "Qwen3-4B-Instruct-2507",
        "family": "qwen",
        "params_b": 4,
        "architecture": "dense",
        "quality_score": 54,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 8000,
        "vram_pct": 32.6,
        "estimated_tps": 110
      },
      {
        "id": "phi-4-reasoning-plus-14b",
        "slug": "phi-4-reasoning-plus-14b",
        "name": "Phi-4 Reasoning Plus 14B",
        "family": "phi",
        "params_b": 14,
        "architecture": "dense",
        "quality_score": 53,
        "quant": "Q8_0",
        "bpw": 8,
        "vram_mb": 14000,
        "vram_pct": 57,
        "estimated_tps": 63
      },
      {
        "id": "qwen3-coder-30b-a3b",
        "slug": "qwen3-coder-30b-a3b",
        "name": "Qwen3-Coder 30B A3B",
        "family": "qwen",
        "params_b": 30,
        "architecture": "moe",
        "quality_score": 53,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 16875,
        "vram_pct": 68.7,
        "estimated_tps": 52
      },
      {
        "id": "deepseek-r1-distill-qwen-14b",
        "slug": "deepseek-r1-distill-qwen-14b",
        "name": "DeepSeek R1 Distill Qwen 14B",
        "family": "deepseek",
        "params_b": 14.8,
        "architecture": "dense",
        "quality_score": 49,
        "quant": "Q8_0",
        "bpw": 8,
        "vram_mb": 15655,
        "vram_pct": 63.7,
        "estimated_tps": 56
      },
      {
        "id": "phi-3.5-mini-3.8b",
        "slug": "phi-3.5-mini-3.8b",
        "name": "Phi-3.5 Mini 3.8B",
        "family": "phi",
        "params_b": 3.82,
        "architecture": "dense",
        "quality_score": 44,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 8273,
        "vram_pct": 33.7,
        "estimated_tps": 106
      },
      {
        "id": "nemotron-3-nano-4b",
        "slug": "nemotron-3-nano-4b",
        "name": "Nemotron 3 Nano 4B",
        "family": "nemotron",
        "params_b": 4,
        "architecture": "dense",
        "quality_score": 41,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 8000,
        "vram_pct": 32.6,
        "estimated_tps": 110
      },
      {
        "id": "phi-4-mini-3.8b",
        "slug": "phi-4-mini-3.8b",
        "name": "Phi-4-mini 3.8B",
        "family": "phi",
        "params_b": 3.8,
        "architecture": "dense",
        "quality_score": 40,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 8282,
        "vram_pct": 33.7,
        "estimated_tps": 106
      },
      {
        "id": "nemotron-nemo-31.6b",
        "slug": "nvidia-nemotron-3-nano-30b-a3b-bf16",
        "name": "NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
        "family": "nemotron",
        "params_b": 31.6,
        "architecture": "dense",
        "quality_score": 40,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 18702,
        "vram_pct": 76.1,
        "estimated_tps": 47
      },
      {
        "id": "yi-1.5-34b",
        "slug": "yi-1.5-34b",
        "name": "Yi-1.5 34B",
        "family": "yi",
        "params_b": 34.4,
        "architecture": "dense",
        "quality_score": 39,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 20614,
        "vram_pct": 83.9,
        "estimated_tps": 43
      },
      {
        "id": "deepseek-r1-distill-llama-8b",
        "slug": "deepseek-r1-distill-llama-8b",
        "name": "DeepSeek R1 Distill Llama 8B",
        "family": "deepseek",
        "params_b": 8,
        "architecture": "dense",
        "quality_score": 38,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 16884,
        "vram_pct": 68.7,
        "estimated_tps": 52
      },
      {
        "id": "mllama-11b",
        "slug": "llama-3.2-11b-vision-instruct",
        "name": "Llama-3.2-11B-Vision-Instruct",
        "family": "llama",
        "params_b": 11,
        "architecture": "dense",
        "quality_score": 36,
        "quant": "Q8_0",
        "bpw": 8,
        "vram_mb": 13000,
        "vram_pct": 52.9,
        "estimated_tps": 67
      },
      {
        "id": "mllama-11b",
        "slug": "llama-3.2-11b-vision-instruct",
        "name": "Llama-3.2-11B-Vision-Instruct",
        "family": "llama",
        "params_b": 11,
        "architecture": "dense",
        "quality_score": 36,
        "quant": "Q8_0",
        "bpw": 8,
        "vram_mb": 13000,
        "vram_pct": 52.9,
        "estimated_tps": 67
      },
      {
        "id": "qwen2.5-7b",
        "slug": "qwen-2.5-7b",
        "name": "Qwen 2.5 7B",
        "family": "qwen",
        "params_b": 7.6,
        "architecture": "dense",
        "quality_score": 36,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 16065,
        "vram_pct": 65.4,
        "estimated_tps": 55
      },
      {
        "id": "qwen2.5-7b",
        "slug": "qwen-2.5-7b",
        "name": "Qwen 2.5 7B",
        "family": "qwen",
        "params_b": 7.6,
        "architecture": "dense",
        "quality_score": 36,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 16065,
        "vram_pct": 65.4,
        "estimated_tps": 55
      },
      {
        "id": "qwen-27.8b",
        "slug": "qwen3.5-27b",
        "name": "Qwen3.5-27B",
        "family": "qwen",
        "params_b": 27.8,
        "architecture": "dense",
        "quality_score": 36,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 16513,
        "vram_pct": 67.2,
        "estimated_tps": 53
      },
      {
        "id": "deepseek-v2-lite-16b",
        "slug": "deepseek-v2-lite-16b",
        "name": "DeepSeek V2 Lite 16B",
        "family": "deepseek",
        "params_b": 16,
        "architecture": "moe",
        "quality_score": 36,
        "quant": "Q8_0",
        "bpw": 8,
        "vram_mb": 16884,
        "vram_pct": 68.7,
        "estimated_tps": 52
      },
      {
        "id": "qwen1.5-32b",
        "slug": "qwen-1.5-32b",
        "name": "Qwen 1.5 32B",
        "family": "qwen",
        "params_b": 32,
        "architecture": "dense",
        "quality_score": 36,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 18932,
        "vram_pct": 77,
        "estimated_tps": 46
      },
      {
        "id": "qwen-32.5b",
        "slug": "qwen2.5-32b",
        "name": "Qwen2.5-32B",
        "family": "qwen",
        "params_b": 32.5,
        "architecture": "dense",
        "quality_score": 36,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 19220,
        "vram_pct": 78.2,
        "estimated_tps": 46
      },
      {
        "id": "qwen-r1-32.8b",
        "slug": "deepseek-r1-distill-qwen-32b",
        "name": "DeepSeek-R1-Distill-Qwen-32B",
        "family": "qwen",
        "params_b": 32.8,
        "architecture": "dense",
        "quality_score": 36,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 19393,
        "vram_pct": 78.9,
        "estimated_tps": 45
      },
      {
        "id": "qwen-coder-32.8b",
        "slug": "qwen2.5-coder-32b",
        "name": "Qwen2.5-Coder-32B",
        "family": "qwen",
        "params_b": 32.8,
        "architecture": "dense",
        "quality_score": 36,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 19393,
        "vram_pct": 78.9,
        "estimated_tps": 45
      },
      {
        "id": "qwen-coder-32.8b",
        "slug": "qwen2.5-coder-32b",
        "name": "Qwen2.5-Coder-32B",
        "family": "qwen",
        "params_b": 32.8,
        "architecture": "dense",
        "quality_score": 36,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 19393,
        "vram_pct": 78.9,
        "estimated_tps": 45
      },
      {
        "id": "qwen-r1-32.8b",
        "slug": "deepseek-r1-distill-qwen-32b",
        "name": "DeepSeek-R1-Distill-Qwen-32B",
        "family": "qwen",
        "params_b": 32.8,
        "architecture": "dense",
        "quality_score": 36,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 19393,
        "vram_pct": 78.9,
        "estimated_tps": 45
      }
    ],
    "creative": [
      {
        "id": "qwen3-30b-a3b-instruct-2507",
        "slug": "qwen3-30b-a3b-instruct-2507",
        "name": "Qwen3-30B-A3B-Instruct-2507",
        "family": "qwen",
        "params_b": 30,
        "architecture": "moe",
        "quality_score": 73,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 16875,
        "vram_pct": 68.7,
        "estimated_tps": 52
      },
      {
        "id": "deepseek-r1-0528-qwen3-8b",
        "slug": "deepseek-r1-0528-qwen3-8b",
        "name": "DeepSeek-R1-0528-Qwen3-8B",
        "family": "qwen",
        "params_b": 8.2,
        "architecture": "dense",
        "quality_score": 70,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 16400,
        "vram_pct": 66.7,
        "estimated_tps": 53
      },
      {
        "id": "phi-4-14b-base",
        "slug": "phi-4-14b",
        "name": "Phi-4 14B",
        "family": "phi",
        "params_b": 14,
        "architecture": "dense",
        "quality_score": 69,
        "quant": "Q8_0",
        "bpw": 8,
        "vram_mb": 16486,
        "vram_pct": 67.1,
        "estimated_tps": 53
      },
      {
        "id": "qwen3-coder-30b-a3b",
        "slug": "qwen3-coder-30b-a3b",
        "name": "Qwen3-Coder 30B A3B",
        "family": "qwen",
        "params_b": 30,
        "architecture": "moe",
        "quality_score": 69,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 16875,
        "vram_pct": 68.7,
        "estimated_tps": 52
      },
      {
        "id": "qwen3-4b-instruct-2507",
        "slug": "qwen3-4b-instruct-2507",
        "name": "Qwen3-4B-Instruct-2507",
        "family": "qwen",
        "params_b": 4,
        "architecture": "dense",
        "quality_score": 66,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 8000,
        "vram_pct": 32.6,
        "estimated_tps": 110
      },
      {
        "id": "nemotron-nemo-31.6b",
        "slug": "nvidia-nemotron-3-nano-30b-a3b-bf16",
        "name": "NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
        "family": "nemotron",
        "params_b": 31.6,
        "architecture": "dense",
        "quality_score": 65,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 18702,
        "vram_pct": 76.1,
        "estimated_tps": 47
      },
      {
        "id": "qwen-27.8b",
        "slug": "qwen3.5-27b",
        "name": "Qwen3.5-27B",
        "family": "qwen",
        "params_b": 27.8,
        "architecture": "dense",
        "quality_score": 64,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 16513,
        "vram_pct": 67.2,
        "estimated_tps": 53
      },
      {
        "id": "qwen1.5-32b",
        "slug": "qwen-1.5-32b",
        "name": "Qwen 1.5 32B",
        "family": "qwen",
        "params_b": 32,
        "architecture": "dense",
        "quality_score": 64,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 18932,
        "vram_pct": 77,
        "estimated_tps": 46
      },
      {
        "id": "qwen-32.5b",
        "slug": "qwen2.5-32b",
        "name": "Qwen2.5-32B",
        "family": "qwen",
        "params_b": 32.5,
        "architecture": "dense",
        "quality_score": 64,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 19220,
        "vram_pct": 78.2,
        "estimated_tps": 46
      },
      {
        "id": "qwen-r1-32.8b",
        "slug": "deepseek-r1-distill-qwen-32b",
        "name": "DeepSeek-R1-Distill-Qwen-32B",
        "family": "qwen",
        "params_b": 32.8,
        "architecture": "dense",
        "quality_score": 64,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 19393,
        "vram_pct": 78.9,
        "estimated_tps": 45
      },
      {
        "id": "qwen-coder-32.8b",
        "slug": "qwen2.5-coder-32b",
        "name": "Qwen2.5-Coder-32B",
        "family": "qwen",
        "params_b": 32.8,
        "architecture": "dense",
        "quality_score": 64,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 19393,
        "vram_pct": 78.9,
        "estimated_tps": 45
      },
      {
        "id": "qwen-coder-32.8b",
        "slug": "qwen2.5-coder-32b",
        "name": "Qwen2.5-Coder-32B",
        "family": "qwen",
        "params_b": 32.8,
        "architecture": "dense",
        "quality_score": 64,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 19393,
        "vram_pct": 78.9,
        "estimated_tps": 45
      },
      {
        "id": "qwen-r1-32.8b",
        "slug": "deepseek-r1-distill-qwen-32b",
        "name": "DeepSeek-R1-Distill-Qwen-32B",
        "family": "qwen",
        "params_b": 32.8,
        "architecture": "dense",
        "quality_score": 64,
        "quant": "Q4_K_M",
        "bpw": 4.5,
        "vram_mb": 19393,
        "vram_pct": 78.9,
        "estimated_tps": 45
      },
      {
        "id": "solar-pro-22b",
        "slug": "solar-pro-22b",
        "name": "SOLAR-Pro 22B",
        "family": "solar",
        "params_b": 22.1,
        "architecture": "dense",
        "quality_score": 63,
        "quant": "Q6_K",
        "bpw": 6.5,
        "vram_mb": 19187,
        "vram_pct": 78.1,
        "estimated_tps": 46
      },
      {
        "id": "mistral-small-3.2-24b",
        "slug": "mistral-small-3.2-24b",
        "name": "Mistral Small 3.2 24B",
        "family": "mistral",
        "params_b": 24,
        "architecture": "dense",
        "quality_score": 63,
        "quant": "Q6_K",
        "bpw": 6.5,
        "vram_mb": 19500,
        "vram_pct": 79.3,
        "estimated_tps": 45
      },
      {
        "id": "mistral-small-22b-2409",
        "slug": "mistral-small-22b",
        "name": "Mistral Small 22B",
        "family": "mistral",
        "params_b": 22.2,
        "architecture": "dense",
        "quality_score": 61,
        "quant": "Q6_K",
        "bpw": 6.5,
        "vram_mb": 19270,
        "vram_pct": 78.4,
        "estimated_tps": 46
      },
      {
        "id": "mllama-11b",
        "slug": "llama-3.2-11b-vision-instruct",
        "name": "Llama-3.2-11B-Vision-Instruct",
        "family": "llama",
        "params_b": 11,
        "architecture": "dense",
        "quality_score": 60,
        "quant": "Q8_0",
        "bpw": 8,
        "vram_mb": 13000,
        "vram_pct": 52.9,
        "estimated_tps": 67
      },
      {
        "id": "mllama-11b",
        "slug": "llama-3.2-11b-vision-instruct",
        "name": "Llama-3.2-11B-Vision-Instruct",
        "family": "llama",
        "params_b": 11,
        "architecture": "dense",
        "quality_score": 60,
        "quant": "Q8_0",
        "bpw": 8,
        "vram_mb": 13000,
        "vram_pct": 52.9,
        "estimated_tps": 67
      },
      {
        "id": "phi-3.5-mini-3.8b",
        "slug": "phi-3.5-mini-3.8b",
        "name": "Phi-3.5 Mini 3.8B",
        "family": "phi",
        "params_b": 3.82,
        "architecture": "dense",
        "quality_score": 59,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 8273,
        "vram_pct": 33.7,
        "estimated_tps": 106
      },
      {
        "id": "gemma-3-12b",
        "slug": "gemma-3-12b",
        "name": "Gemma 3 12B",
        "family": "gemma",
        "params_b": 12.2,
        "architecture": "dense",
        "quality_score": 58,
        "quant": "Q8_0",
        "bpw": 8,
        "vram_mb": 12993,
        "vram_pct": 52.9,
        "estimated_tps": 68
      },
      {
        "id": "gemma-3-12b",
        "slug": "gemma-3-12b",
        "name": "Gemma 3 12B",
        "family": "gemma",
        "params_b": 12.2,
        "architecture": "dense",
        "quality_score": 58,
        "quant": "Q8_0",
        "bpw": 8,
        "vram_mb": 12993,
        "vram_pct": 52.9,
        "estimated_tps": 68
      },
      {
        "id": "qwen1.5-14b",
        "slug": "qwen-1.5-14b",
        "name": "Qwen 1.5 14B",
        "family": "qwen",
        "params_b": 14,
        "architecture": "dense",
        "quality_score": 58,
        "quant": "Q8_0",
        "bpw": 8,
        "vram_mb": 14836,
        "vram_pct": 60.4,
        "estimated_tps": 59
      },
      {
        "id": "qwen-coder-14.8b",
        "slug": "qwen2.5-coder-14b",
        "name": "Qwen2.5-Coder-14B",
        "family": "qwen",
        "params_b": 14.8,
        "architecture": "dense",
        "quality_score": 58,
        "quant": "Q8_0",
        "bpw": 8,
        "vram_mb": 15655,
        "vram_pct": 63.7,
        "estimated_tps": 56
      },
      {
        "id": "qwen-14.8b",
        "slug": "qwen2.5-14b",
        "name": "Qwen2.5-14B",
        "family": "qwen",
        "params_b": 14.8,
        "architecture": "dense",
        "quality_score": 58,
        "quant": "Q8_0",
        "bpw": 8,
        "vram_mb": 15655,
        "vram_pct": 63.7,
        "estimated_tps": 56
      },
      {
        "id": "ministral-8b",
        "slug": "ministral-8b",
        "name": "Ministral 8B",
        "family": "mistral",
        "params_b": 8,
        "architecture": "dense",
        "quality_score": 57,
        "quant": "FP16",
        "bpw": 16,
        "vram_mb": 16884,
        "vram_pct": 68.7,
        "estimated_tps": 52
      }
    ]
  }
}