{
  "$schema": "https://tokenscost.com/data/pricing.schema.json",
  "source": "https://tokenscost.com",
  "canonical": "https://tokenscost.com/data/pricing.json",
  "license": "CC-BY-4.0",
  "licenseUrl": "https://creativecommons.org/licenses/by/4.0/",
  "attribution": "Pricing data via Tokenscost (tokenscost.com)",
  "generatedAt": "2026-04-18T23:56:24.109Z",
  "lastUpdated": "2026-04-18",
  "unit": "USD per 1,000,000 tokens",
  "sources": {
    "openai": "https://platform.openai.com/docs/pricing",
    "anthropic": "https://www.anthropic.com/pricing",
    "google": "https://ai.google.dev/gemini-api/docs/pricing",
    "mistral": "https://mistral.ai/pricing",
    "groq": "https://groq.com/pricing",
    "nvidia": "https://build.nvidia.com/"
  },
  "modelCount": 27,
  "models": [
    {
      "id": "gpt-5",
      "name": "GPT-5",
      "provider": "OpenAI",
      "providerLogo": "openai",
      "inputPricePerM": 1.25,
      "outputPricePerM": 10,
      "contextWindow": 400000,
      "notes": "GPT-5 base — strong general reasoning, 8× output ratio",
      "batchDiscount": 0.5,
      "deprecated": false,
      "tokenizer": "tiktoken",
      "isReasoning": true,
      "reasoningRatio": 4
    },
    {
      "id": "gpt-5-mini",
      "name": "GPT-5 mini",
      "provider": "OpenAI",
      "providerLogo": "openai",
      "inputPricePerM": 0.25,
      "outputPricePerM": 2,
      "contextWindow": 400000,
      "notes": "Mid-tier GPT-5 — best price/perf for most production traffic",
      "batchDiscount": 0.5,
      "deprecated": false,
      "tokenizer": "tiktoken",
      "isReasoning": true,
      "reasoningRatio": 3
    },
    {
      "id": "gpt-5-nano",
      "name": "GPT-5 nano",
      "provider": "OpenAI",
      "providerLogo": "openai",
      "inputPricePerM": 0.05,
      "outputPricePerM": 0.4,
      "contextWindow": 400000,
      "notes": "Cheapest GPT-5 — ultra-high-volume tasks",
      "batchDiscount": 0.5,
      "deprecated": false,
      "tokenizer": "tiktoken"
    },
    {
      "id": "gpt-4.1",
      "name": "GPT-4.1",
      "provider": "OpenAI",
      "providerLogo": "openai",
      "inputPricePerM": 2,
      "outputPricePerM": 8,
      "contextWindow": 1050000,
      "notes": "Legacy — 1M context, still strong on coding/instructions",
      "batchDiscount": 0.5,
      "deprecated": false,
      "tokenizer": "tiktoken"
    },
    {
      "id": "gpt-4o",
      "name": "GPT-4o",
      "provider": "OpenAI",
      "providerLogo": "openai",
      "inputPricePerM": 2.5,
      "outputPricePerM": 10,
      "contextWindow": 128000,
      "notes": "Multimodal flagship (legacy lineup)",
      "batchDiscount": 0.5,
      "deprecated": false,
      "tokenizer": "tiktoken"
    },
    {
      "id": "gpt-4o-mini",
      "name": "GPT-4o mini",
      "provider": "OpenAI",
      "providerLogo": "openai",
      "inputPricePerM": 0.15,
      "outputPricePerM": 0.6,
      "contextWindow": 128000,
      "notes": "Cheap multimodal workhorse",
      "batchDiscount": 0.5,
      "deprecated": false,
      "tokenizer": "tiktoken"
    },
    {
      "id": "claude-opus-4-7",
      "name": "Claude Opus 4.7",
      "provider": "Anthropic",
      "providerLogo": "anthropic",
      "inputPricePerM": 5,
      "outputPricePerM": 25,
      "contextWindow": 200000,
      "notes": "Most capable Claude — 1M context in beta",
      "batchDiscount": 0.5,
      "deprecated": false,
      "tokenizer": "approx"
    },
    {
      "id": "claude-sonnet-4-6",
      "name": "Claude Sonnet 4.6",
      "provider": "Anthropic",
      "providerLogo": "anthropic",
      "inputPricePerM": 3,
      "outputPricePerM": 15,
      "contextWindow": 200000,
      "notes": "Best balance of speed and intelligence — 1M context in beta",
      "batchDiscount": 0.5,
      "deprecated": false,
      "tokenizer": "approx"
    },
    {
      "id": "claude-haiku-4-5",
      "name": "Claude Haiku 4.5",
      "provider": "Anthropic",
      "providerLogo": "anthropic",
      "inputPricePerM": 1,
      "outputPricePerM": 5,
      "contextWindow": 200000,
      "notes": "Fastest, most compact Claude",
      "batchDiscount": 0.5,
      "deprecated": false,
      "tokenizer": "approx"
    },
    {
      "id": "gemini-3.1-pro",
      "name": "Gemini 3.1 Pro",
      "provider": "Google",
      "providerLogo": "google",
      "inputPricePerM": 2,
      "outputPricePerM": 12,
      "contextWindow": 1000000,
      "notes": "New flagship (Feb 2026) — top-tier reasoning + native video",
      "batchDiscount": null,
      "deprecated": false,
      "tokenizer": "approx"
    },
    {
      "id": "gemini-2.5-pro",
      "name": "Gemini 2.5 Pro",
      "provider": "Google",
      "providerLogo": "google",
      "inputPricePerM": 1.25,
      "outputPricePerM": 10,
      "inputPricePerMHighContext": 2.5,
      "outputPricePerMHighContext": 15,
      "highContextThreshold": 200000,
      "contextWindow": 1000000,
      "notes": "Tiered pricing: ≤200K vs >200K input context",
      "batchDiscount": null,
      "deprecated": false,
      "tokenizer": "approx"
    },
    {
      "id": "gemini-2.5-flash",
      "name": "Gemini 2.5 Flash",
      "provider": "Google",
      "providerLogo": "google",
      "inputPricePerM": 0.15,
      "outputPricePerM": 0.6,
      "contextWindow": 1000000,
      "notes": "High-throughput, cost-sensitive tasks",
      "batchDiscount": null,
      "deprecated": false,
      "tokenizer": "approx"
    },
    {
      "id": "gemini-2.5-flash-lite",
      "name": "Gemini 2.5 Flash-Lite",
      "provider": "Google",
      "providerLogo": "google",
      "inputPricePerM": 0.1,
      "outputPricePerM": 0.4,
      "contextWindow": 1000000,
      "notes": "Cheapest current Gemini — high-volume tasks",
      "batchDiscount": null,
      "deprecated": false,
      "tokenizer": "approx"
    },
    {
      "id": "mistral-large-2411",
      "name": "Mistral Large 2411",
      "provider": "Mistral",
      "providerLogo": "mistral",
      "inputPricePerM": 2,
      "outputPricePerM": 6,
      "contextWindow": 131000,
      "notes": "Mistral's flagship (Nov 2024 release, current top-tier)",
      "batchDiscount": null,
      "deprecated": false,
      "tokenizer": "approx"
    },
    {
      "id": "mistral-medium-3",
      "name": "Mistral Medium 3",
      "provider": "Mistral",
      "providerLogo": "mistral",
      "inputPricePerM": 1,
      "outputPricePerM": 3,
      "contextWindow": 128000,
      "notes": "Strong mid-tier Mistral with function calling",
      "batchDiscount": null,
      "deprecated": false,
      "tokenizer": "approx"
    },
    {
      "id": "mistral-small-3-1",
      "name": "Mistral Small 3.1",
      "provider": "Mistral",
      "providerLogo": "mistral",
      "inputPricePerM": 0.1,
      "outputPricePerM": 0.3,
      "contextWindow": 131000,
      "notes": "Extremely cost-efficient",
      "batchDiscount": null,
      "deprecated": false,
      "tokenizer": "approx"
    },
    {
      "id": "llama-3.3-70b-groq",
      "name": "Llama 3.3 70B Versatile",
      "provider": "Groq",
      "providerLogo": "groq",
      "inputPricePerM": 0.59,
      "outputPricePerM": 0.79,
      "contextWindow": 128000,
      "notes": "Meta Llama 3.3 70B on Groq LPU — flagship general-purpose",
      "batchDiscount": null,
      "deprecated": false,
      "tokenizer": "approx"
    },
    {
      "id": "llama-3.1-8b-instant-groq",
      "name": "Llama 3.1 8B Instant",
      "provider": "Groq",
      "providerLogo": "groq",
      "inputPricePerM": 0.05,
      "outputPricePerM": 0.08,
      "contextWindow": 128000,
      "notes": "Cheapest, fastest Groq model — sub-second responses",
      "batchDiscount": null,
      "deprecated": false,
      "tokenizer": "approx"
    },
    {
      "id": "gpt-oss-120b-groq",
      "name": "GPT-OSS 120B",
      "provider": "Groq",
      "providerLogo": "groq",
      "inputPricePerM": 0.15,
      "outputPricePerM": 0.75,
      "contextWindow": 131000,
      "notes": "OpenAI open-weight 120B reasoning model on Groq",
      "batchDiscount": null,
      "deprecated": false,
      "tokenizer": "approx",
      "isReasoning": true,
      "reasoningRatio": 3
    },
    {
      "id": "kimi-k2-groq",
      "name": "Kimi K2 Instruct",
      "provider": "Groq",
      "providerLogo": "groq",
      "inputPricePerM": 1,
      "outputPricePerM": 3,
      "contextWindow": 131000,
      "notes": "Moonshot Kimi K2 — strong tool use & coding",
      "batchDiscount": null,
      "deprecated": false,
      "tokenizer": "approx"
    },
    {
      "id": "qwen3-32b-groq",
      "name": "Qwen3 32B",
      "provider": "Groq",
      "providerLogo": "groq",
      "inputPricePerM": 0.29,
      "outputPricePerM": 0.59,
      "contextWindow": 131000,
      "notes": "Alibaba Qwen3 32B with reasoning mode",
      "batchDiscount": null,
      "deprecated": false,
      "tokenizer": "approx",
      "isReasoning": true,
      "reasoningRatio": 2
    },
    {
      "id": "llama-4-maverick",
      "name": "Llama 4 Maverick",
      "provider": "Meta",
      "providerLogo": "meta",
      "inputPricePerM": 0.27,
      "outputPricePerM": 0.85,
      "contextWindow": 1000000,
      "notes": "Meta's flagship MoE — 400B params, 17B active. Pricing via Together AI.",
      "batchDiscount": null,
      "deprecated": false,
      "tokenizer": "approx"
    },
    {
      "id": "llama-4-scout",
      "name": "Llama 4 Scout",
      "provider": "Meta",
      "providerLogo": "meta",
      "inputPricePerM": 0.18,
      "outputPricePerM": 0.59,
      "contextWindow": 10000000,
      "notes": "10M context window — long-doc specialist. Pricing via Together AI.",
      "batchDiscount": null,
      "deprecated": false,
      "tokenizer": "approx"
    },
    {
      "id": "llama-3.3-70b-meta",
      "name": "Llama 3.3 70B",
      "provider": "Meta",
      "providerLogo": "meta",
      "inputPricePerM": 0.88,
      "outputPricePerM": 0.88,
      "contextWindow": 128000,
      "notes": "Llama 3.3 70B Instruct — pricing via Together AI reference host.",
      "batchDiscount": null,
      "deprecated": false,
      "tokenizer": "approx"
    },
    {
      "id": "nemotron-3-super-120b",
      "name": "Nemotron 3 Super 120B",
      "provider": "Nvidia",
      "providerLogo": "nvidia",
      "inputPricePerM": 0.3,
      "outputPricePerM": 0.8,
      "contextWindow": 262000,
      "notes": "NVIDIA flagship (Mar 2026 GTC) — hybrid Mamba-Transformer MoE, 120B/12B active. Median across hosted providers.",
      "batchDiscount": null,
      "deprecated": false,
      "tokenizer": "approx",
      "isReasoning": true,
      "reasoningRatio": 3
    },
    {
      "id": "llama-3-3-nemotron-super-49b",
      "name": "Llama 3.3 Nemotron Super 49B",
      "provider": "Nvidia",
      "providerLogo": "nvidia",
      "inputPricePerM": 0.4,
      "outputPricePerM": 0.4,
      "contextWindow": 128000,
      "notes": "NVIDIA-tuned Llama 3.3 49B — strong agentic reasoning, symmetric in/out pricing.",
      "batchDiscount": null,
      "deprecated": false,
      "tokenizer": "approx"
    },
    {
      "id": "nemotron-nano-9b-v2",
      "name": "Nemotron Nano 9B v2",
      "provider": "Nvidia",
      "providerLogo": "nvidia",
      "inputPricePerM": 0.1,
      "outputPricePerM": 0.3,
      "contextWindow": 128000,
      "notes": "Cheapest, fastest Nemotron — high-throughput agent + RAG workloads.",
      "batchDiscount": null,
      "deprecated": false,
      "tokenizer": "approx"
    }
  ]
}
