{"object":"list","data":[{"api":"chat","id":"anthropic/claude-sonnet-4","object":"model","created":1747933971,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Claude Sonnet 4 significantly improves on Sonnet 3.7's industry-leading capabilities, excelling in coding with a state-of-the-art 72.7% on SWE-bench. The model balances performance and efficiency for internal and external use cases, with enhanced steerability for greater control over implementations.","data_retention":true,"data_retention_days":30,"geolocation":"global"},{"api":"chat","id":"anthropic/claude-opus-4-6","object":"model","created":1770314250,"owned_by":"system","input_price":0.000005,"caching_price":0.00000625,"cached_price":5e-7,"output_price":0.000025,"max_output_tokens":128000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Claude Opus 4.6 is Anthropic's most powerful model yet and the best coding model in the world.","data_retention":true,"data_retention_days":30,"geolocation":"global"},{"api":"chat","id":"anthropic/claude-sonnet-4-6","object":"model","created":1771351251,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Claude Sonnet 4.6 is Anthropic's best coding model in the world, leading on SWE-bench Verified (77.2%) and OSWorld (61.4%). It delivers sustained autonomous performance on complex tasks for over 30 hours—up from seven hours for Opus 4—maintaining focus and reliability throughout the entire software development lifecycle, with enhanced capabilities in tool handling, memory management, and context processing that make it the strongest model for building complex agents.","data_retention":true,"data_retention_days":30,"geolocation":"global"},{"api":"chat","id":"anthropic/claude-opus-4-5","object":"model","created":1754414845,"owned_by":"system","input_price":0.000005,"caching_price":0.00000625,"cached_price":5e-7,"output_price":0.000025,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Claude Opus 4 is Anthropic's most powerful model yet and the best coding model in the world, leading on SWE-bench (72.5%) and Terminal-bench (43.2%). It delivers sustained performance on long-running tasks that require focused effort and thousands of steps, with the ability to work continuously for several hours—dramatically outperforming all Sonnet models and significantly expanding what AI agents can accomplish.","data_retention":true,"data_retention_days":30,"geolocation":"global"},{"api":"chat","id":"anthropic/claude-opus-4-1","object":"model","created":1754414845,"owned_by":"system","input_price":0.000015,"caching_price":0.00001875,"cached_price":0.0000015,"output_price":0.000075,"max_output_tokens":32000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Claude Opus 4 is Anthropic's most powerful model yet and the best coding model in the world, leading on SWE-bench (72.5%) and Terminal-bench (43.2%). It delivers sustained performance on long-running tasks that require focused effort and thousands of steps, with the ability to work continuously for several hours—dramatically outperforming all Sonnet models and significantly expanding what AI agents can accomplish.","data_retention":true,"data_retention_days":30,"geolocation":"global"},{"api":"chat","id":"anthropic/claude-sonnet-4-5","object":"model","created":1759165971,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Claude Sonnet 4.5 is Anthropic's best coding model in the world, leading on SWE-bench Verified (77.2%) and OSWorld (61.4%). It delivers sustained autonomous performance on complex tasks for over 30 hours—up from seven hours for Opus 4—maintaining focus and reliability throughout the entire software development lifecycle, with enhanced capabilities in tool handling, memory management, and context processing that make it the strongest model for building complex agents.","data_retention":true,"data_retention_days":30,"geolocation":"global"},{"api":"chat","id":"anthropic/claude-opus-4-7","object":"model","created":1776356268,"owned_by":"system","input_price":0.000005,"caching_price":0.00000625,"cached_price":5e-7,"output_price":0.000025,"max_output_tokens":128000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Opus 4.7 is the next generation of Anthropic's Opus family, built for long-running, asynchronous agents. Building on the coding and agentic strengths of Opus 4.6, it delivers stronger performance on complex, multi-step tasks and more reliable agentic execution across extended workflows. It is especially effective for asynchronous agent pipelines where tasks unfold over time - large codebases, multi-stage debugging, and end-to-end project orchestration.","data_retention":true,"data_retention_days":30,"privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"anthropic/claude-opus-4","object":"model","created":1747934845,"owned_by":"system","input_price":0.000015,"caching_price":0.00001875,"cached_price":0.0000015,"output_price":0.000075,"max_output_tokens":32000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Claude Opus 4 is Anthropic's most powerful model yet and the best coding model in the world, leading on SWE-bench (72.5%) and Terminal-bench (43.2%). It delivers sustained performance on long-running tasks that require focused effort and thousands of steps, with the ability to work continuously for several hours—dramatically outperforming all Sonnet models and significantly expanding what AI agents can accomplish.","data_retention":true,"data_retention_days":30,"geolocation":"global"},{"api":"chat","id":"anthropic/claude-haiku-4-5","object":"model","created":1764004371,"owned_by":"system","input_price":0.000001,"caching_price":0.00000125,"cached_price":1e-7,"output_price":0.000005,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic Haiku 4.5","data_retention":true,"data_retention_days":30,"geolocation":"global"},{"api":"chat","id":"google/gemini-3.1-pro-preview","object":"model","created":1771524000,"owned_by":"system","input_price":0.000002,"caching_price":0.0000045,"cached_price":2e-7,"output_price":0.000012,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 3.1 Pro is the next iteration in the Gemini 3 series of models, a suite of highly capable, natively multimodal reasoning models. As of this model card’s date of publication, Gemini 3.1 Pro is Google’s most advanced model for complex tasks. Geminin 3.1 Pro can comprehend vast datasets and challenging problems from massively multimodal information sources, including text, audio, images, video, and entire code repositories.","geolocation":"global"},{"api":"chat","id":"google/gemini-3.1-flash-image-preview@europe-west1","object":"model","created":1772125200,"owned_by":"system","input_price":5e-7,"caching_price":0,"cached_price":0,"output_price":0.000002,"max_output_tokens":32768,"context_window":131072,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":true,"supports_tool_calling":true,"description":"Gemini 3.1 Flash Image is optimized for image understanding and generation and offers a balance of price and performance.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"google/gemini-3.1-flash-image-preview@europe-west4","object":"model","created":1772125200,"owned_by":"system","input_price":5e-7,"caching_price":0,"cached_price":0,"output_price":0.000002,"max_output_tokens":32768,"context_window":131072,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":true,"supports_tool_calling":true,"description":"Gemini 3.1 Flash Image is optimized for image understanding and generation and offers a balance of price and performance.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"google/gemini-2.5-flash-lite","object":"model","created":1747765524,"owned_by":"system","input_price":1e-7,"caching_price":1.8333e-7,"cached_price":1e-8,"output_price":4e-7,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's smallest and most cost effective model, built for at scale usage.","geolocation":"global"},{"api":"chat","id":"google/gemini-3-flash-preview","object":"model","created":1766022113,"owned_by":"system","input_price":5e-7,"caching_price":0.000001,"cached_price":5e-8,"output_price":0.000003,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 3 Flash Preview is designed to deliver strong agentic capabilities (near-Pro level) at substantial speed and value. Making it perfect for engaging multi-turn chats, and collaborating back and forth with your coding agent without getting out of flow. Compared to 2.5 Flash it delivers significant improvements across the board.","geolocation":"global"},{"api":"chat","id":"google/gemini-3.1-flash-image-preview@us-central1","object":"model","created":1772125200,"owned_by":"system","input_price":5e-7,"caching_price":0,"cached_price":0,"output_price":0.000002,"max_output_tokens":32768,"context_window":131072,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":true,"supports_tool_calling":true,"description":"Gemini 3.1 Flash Image is optimized for image understanding and generation and offers a balance of price and performance.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"google/gemini-3.1-flash-image-preview@us-east1","object":"model","created":1772125200,"owned_by":"system","input_price":5e-7,"caching_price":0,"cached_price":0,"output_price":0.000002,"max_output_tokens":32768,"context_window":131072,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":true,"supports_tool_calling":true,"description":"Gemini 3.1 Flash Image is optimized for image understanding and generation and offers a balance of price and performance.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"google/gemini-3.1-flash-image-preview@europe-west8","object":"model","created":1772125200,"owned_by":"system","input_price":5e-7,"caching_price":0,"cached_price":0,"output_price":0.000002,"max_output_tokens":32768,"context_window":131072,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":true,"supports_tool_calling":true,"description":"Gemini 3.1 Flash Image is optimized for image understanding and generation and offers a balance of price and performance.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"google/gemini-3.1-flash-lite-preview","object":"model","created":1772559935,"owned_by":"system","input_price":2.5e-7,"caching_price":8.333e-8,"cached_price":2.5e-8,"output_price":0.0000015,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 3.1 Flash Lite Preview is the most cost-efficient model in the Gemini family, optimized for high-volume, low-latency tasks. It delivers fast responses with solid quality for everyday use cases including summarization, classification, and simple reasoning.","geolocation":"global"},{"api":"chat","id":"google/gemini-2.5-flash","object":"model","created":1747765524,"owned_by":"system","input_price":3e-7,"caching_price":5.5e-7,"cached_price":7.5e-8,"output_price":0.0000025,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's first hybrid reasoning model which supports a 1M token context window and has thinking budgets. Most balanced Gemini model, optimized for low latency use cases.","geolocation":"global"},{"api":"chat","id":"google/gemini-2.0-flash-001","object":"model","created":1738769413,"owned_by":"system","input_price":1e-7,"caching_price":1e-7,"cached_price":1e-7,"output_price":4e-7,"max_output_tokens":8192,"context_window":1048576,"supports_caching":false,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini Flash 2.0 offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5). It introduces notable enhancements in multimodal understanding, coding capabilities, complex instruction following, and function calling. These advancements come together to deliver more seamless and robust agentic experiences.","geolocation":"global"},{"api":"chat","id":"google/gemini-3-pro-preview","object":"model","created":1771524000,"owned_by":"system","input_price":0.000002,"caching_price":0.0000045,"cached_price":2e-7,"output_price":0.000012,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 3.1 Pro is the next iteration in the Gemini 3 series of models, a suite of highly capable, natively multimodal reasoning models. As of this model card’s date of publication, Gemini 3.1 Pro is Google’s most advanced model for complex tasks. Geminin 3.1 Pro can comprehend vast datasets and challenging problems from massively multimodal information sources, including text, audio, images, video, and entire code repositories.","geolocation":"global"},{"api":"chat","id":"google/gemini-3.1-flash-image-preview","object":"model","created":1772125200,"owned_by":"system","input_price":5e-7,"caching_price":0,"cached_price":0,"output_price":0.000002,"max_output_tokens":32768,"context_window":131072,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":true,"supports_tool_calling":true,"description":"Gemini 3.1 Flash Image is optimized for image understanding and generation and offers a balance of price and performance.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"google/gemini-3.1-flash-image-preview@us-west1","object":"model","created":1772125200,"owned_by":"system","input_price":5e-7,"caching_price":0,"cached_price":0,"output_price":0.000002,"max_output_tokens":32768,"context_window":131072,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":true,"supports_tool_calling":true,"description":"Gemini 3.1 Flash Image is optimized for image understanding and generation and offers a balance of price and performance.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"google/gemini-2.5-pro","object":"model","created":1746582113,"owned_by":"system","input_price":0.00000125,"caching_price":0.000002375,"cached_price":3.1e-7,"output_price":0.00001,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.","geolocation":"global"},{"api":"chat","id":"google/gemini-3.1-flash-image-preview@us-east5","object":"model","created":1772125200,"owned_by":"system","input_price":5e-7,"caching_price":0,"cached_price":0,"output_price":0.000002,"max_output_tokens":32768,"context_window":131072,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":true,"supports_tool_calling":true,"description":"Gemini 3.1 Flash Image is optimized for image understanding and generation and offers a balance of price and performance.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"google/gemini-3.1-flash-image-preview@us-south1","object":"model","created":1772125200,"owned_by":"system","input_price":5e-7,"caching_price":0,"cached_price":0,"output_price":0.000002,"max_output_tokens":32768,"context_window":131072,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":true,"supports_tool_calling":true,"description":"Gemini 3.1 Flash Image is optimized for image understanding and generation and offers a balance of price and performance.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"google/gemini-3.1-flash-image-preview@europe-central2","object":"model","created":1772125200,"owned_by":"system","input_price":5e-7,"caching_price":0,"cached_price":0,"output_price":0.000002,"max_output_tokens":32768,"context_window":131072,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":true,"supports_tool_calling":true,"description":"Gemini 3.1 Flash Image is optimized for image understanding and generation and offers a balance of price and performance.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"google/gemini-3.1-flash-image-preview@europe-north1","object":"model","created":1772125200,"owned_by":"system","input_price":5e-7,"caching_price":0,"cached_price":0,"output_price":0.000002,"max_output_tokens":32768,"context_window":131072,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":true,"supports_tool_calling":true,"description":"Gemini 3.1 Flash Image is optimized for image understanding and generation and offers a balance of price and performance.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"deepinfra/deepseek-ai/DeepSeek-V3.1","object":"model","created":1756232308,"owned_by":"system","input_price":3e-7,"caching_price":0.000001,"cached_price":3e-7,"output_price":0.000001,"max_output_tokens":0,"context_window":163840,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"deepinfra/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo","object":"model","created":1715697754,"owned_by":"system","input_price":2e-8,"caching_price":2e-8,"cached_price":2e-8,"output_price":5e-8,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"A lightweight and ultra-fast variant of Llama 3.3 70B, for use when quick response times are needed most.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"deepinfra/meta-llama/Meta-Llama-3.1-405B-Instruct","object":"model","created":1738865908,"owned_by":"system","input_price":8e-7,"caching_price":8e-7,"cached_price":8e-7,"output_price":8e-7,"max_output_tokens":0,"context_window":130815,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":false,"description":"A lightweight and ultra-fast variant of Llama 3.3 70B, for use when quick response times are needed most.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"deepinfra/meta-llama/Llama-3.2-90B-Vision-Instruct","object":"model","created":1738865908,"owned_by":"system","input_price":3.5e-7,"caching_price":3.5e-7,"cached_price":3.5e-7,"output_price":4e-7,"max_output_tokens":4096,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":false,"description":"A lightweight and ultra-fast variant of Llama 3.3 70B, for use when quick response times are needed most.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"deepinfra/zai-org/GLM-4.5-Air","object":"model","created":1753899508,"owned_by":"system","input_price":2e-7,"caching_price":0.0000011,"cached_price":2e-7,"output_price":0.0000011,"max_output_tokens":4096,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"The GLM-4.5 series models are foundation models designed for intelligent agents. GLM-4.5 has 355 billion total parameters with 32 billion active parameters, while GLM-4.5-Air adopts a more compact design with 106 billion total parameters and 12 billion active parameters. GLM-4.5 models unify reasoning, coding, and intelligent agent capabilities to meet the complex demands of intelligent agent applications.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"deepinfra/deepseek-ai/DeepSeek-R1","object":"model","created":1738263837,"owned_by":"system","input_price":8.5e-7,"caching_price":8.5e-7,"cached_price":8.5e-7,"output_price":0.0000025,"max_output_tokens":8192,"context_window":64000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"DeepSeek-R1-Distill-Qwen-7B is a 7 billion parameter dense language model distilled from DeepSeek-R1, leveraging reinforcement learning-enhanced reasoning data generated by DeepSeek's larger models. The distillation process transfers advanced reasoning, math, and code capabilities into a smaller, more efficient model architecture based on Qwen2.5-Math-7B. This model demonstrates strong performance across mathematical benchmarks (92.8% pass@1 on MATH-500), coding tasks (Codeforces rating 1189), and general reasoning (49.1% pass@1 on GPQA Diamond), achieving competitive accuracy relative to larger models while maintaining smaller inference costs.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"deepinfra/Qwen/Qwen3-235B-A22B","object":"model","created":1745882204,"owned_by":"system","input_price":2e-7,"caching_price":2e-7,"cached_price":2e-7,"output_price":6e-7,"max_output_tokens":4096,"context_window":40000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"deepinfra/meta-llama/Llama-3.3-70B-Instruct","object":"model","created":1738865908,"owned_by":"system","input_price":2.3e-7,"caching_price":2.3e-7,"cached_price":2.3e-7,"output_price":4e-7,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"A lightweight and ultra-fast variant of Llama 3.3 70B, for use when quick response times are needed most.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"deepinfra/meta-llama/Llama-3.3-70B-Instruct-Turbo","object":"model","created":1738865908,"owned_by":"system","input_price":1.2e-7,"caching_price":1.2e-7,"cached_price":1.2e-7,"output_price":3e-7,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"A lightweight and ultra-fast variant of Llama 3.3 70B, for use when quick response times are needed most.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"deepinfra/Qwen/Qwen3-32B","object":"model","created":1745882204,"owned_by":"system","input_price":1e-7,"caching_price":1e-7,"cached_price":1e-7,"output_price":3e-7,"max_output_tokens":0,"context_window":40000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"deepinfra/Qwen/Qwen2.5-Coder-32B-Instruct","object":"model","created":1745882204,"owned_by":"system","input_price":7e-8,"caching_price":7e-8,"cached_price":7e-8,"output_price":1.6e-7,"max_output_tokens":0,"context_window":16384,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"deepinfra/deepseek-ai/DeepSeek-V3","object":"model","created":1738263837,"owned_by":"system","input_price":8.5e-7,"caching_price":8.5e-7,"cached_price":8.5e-7,"output_price":9e-7,"max_output_tokens":8192,"context_window":128000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"DeepSeek-R1-Distill-Qwen-7B is a 7 billion parameter dense language model distilled from DeepSeek-R1, leveraging reinforcement learning-enhanced reasoning data generated by DeepSeek's larger models. The distillation process transfers advanced reasoning, math, and code capabilities into a smaller, more efficient model architecture based on Qwen2.5-Math-7B. This model demonstrates strong performance across mathematical benchmarks (92.8% pass@1 on MATH-500), coding tasks (Codeforces rating 1189), and general reasoning (49.1% pass@1 on GPQA Diamond), achieving competitive accuracy relative to larger models while maintaining smaller inference costs.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"deepinfra/Kimi K2.5","object":"model","created":1774953861,"owned_by":"system","input_price":4.5e-7,"cached_price":7e-8,"output_price":0.00000225,"max_output_tokens":131072,"context_window":262100,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Kimi K2.5 is an open-source, native multimodal agentic model built through continual pretraining on approximately 15 trillion mixed visual and text tokens atop Kimi-K2-Base. It seamlessly integrates vision and language understanding with advanced agentic capabilities, instant and thinking modes, as well as conversational and agentic paradigms.","geolocation":"global"},{"api":"chat","id":"deepinfra/meta-llama/Meta-Llama-3.1-70B-Instruct","object":"model","created":1738865908,"owned_by":"system","input_price":2.3e-7,"caching_price":2.3e-7,"cached_price":2.3e-7,"output_price":4e-7,"max_output_tokens":0,"context_window":130815,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"A lightweight and ultra-fast variant of Llama 3.3 70B, for use when quick response times are needed most.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"deepinfra/Qwen/Qwen2.5-72B-Instruct","object":"model","created":1745882204,"owned_by":"system","input_price":2.3e-7,"caching_price":2.3e-7,"cached_price":2.3e-7,"output_price":4e-7,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct","object":"model","created":1753454294,"owned_by":"system","input_price":4e-7,"caching_price":0.0000016,"cached_price":4e-7,"output_price":0.0000016,"max_output_tokens":0,"context_window":262144,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"deepinfra/microsoft/phi-4","object":"model","created":1746134561,"owned_by":"system","input_price":7e-8,"caching_price":7e-8,"cached_price":7e-8,"output_price":1.4e-7,"max_output_tokens":0,"context_window":16384,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":false,"description":"Phi-4-reasoning-plus is an enhanced 14B parameter model from Microsoft, fine-tuned from Phi-4 with additional reinforcement learning to boost accuracy on math, science, and code reasoning tasks. It uses the same dense decoder-only transformer architecture as Phi-4, but generates longer, more comprehensive outputs structured into a step-by-step reasoning trace and final answer.\n\nWhile it offers improved benchmark scores over Phi-4-reasoning across tasks like AIME, OmniMath, and HumanEvalPlus, its responses are typically ~50% longer, resulting in higher latency. Designed for English-only applications, it is well-suited for structured reasoning workflows where output quality takes priority over response speed.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"deepinfra/deepseek-ai/DeepSeek-R1-Distill-Llama-70B","object":"model","created":1738263837,"owned_by":"system","input_price":2.3e-7,"caching_price":2.3e-7,"cached_price":2.3e-7,"output_price":6.9e-7,"max_output_tokens":8192,"context_window":64000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":false,"description":"DeepSeek-R1-Distill-Qwen-7B is a 7 billion parameter dense language model distilled from DeepSeek-R1, leveraging reinforcement learning-enhanced reasoning data generated by DeepSeek's larger models. The distillation process transfers advanced reasoning, math, and code capabilities into a smaller, more efficient model architecture based on Qwen2.5-Math-7B. This model demonstrates strong performance across mathematical benchmarks (92.8% pass@1 on MATH-500), coding tasks (Codeforces rating 1189), and general reasoning (49.1% pass@1 on GPQA Diamond), achieving competitive accuracy relative to larger models while maintaining smaller inference costs.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"deepinfra/zai-org/GLM-4.5","object":"model","created":1753899508,"owned_by":"system","input_price":6e-7,"caching_price":0.0000022,"cached_price":6e-7,"output_price":0.0000022,"max_output_tokens":4096,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"The GLM-4.5 series models are foundation models designed for intelligent agents. GLM-4.5 has 355 billion total parameters with 32 billion active parameters, while GLM-4.5-Air adopts a more compact design with 106 billion total parameters and 12 billion active parameters. GLM-4.5 models unify reasoning, coding, and intelligent agent capabilities to meet the complex demands of intelligent agent applications.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"xai/grok-4.3","object":"model","created":1777635042,"owned_by":"system","input_price":0.00000125,"caching_price":0.00000125,"cached_price":2e-7,"output_price":0.0000025,"max_output_tokens":0,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Grok 4.3 is a reasoning model from xAI. It accepts text and image inputs with text output, and is suited for agentic workflows, instruction-following tasks, and applications requiring high factual accuracy. Reasoning is always active and cannot be disabled or configured by effort level. It supports a 1 million token context window with no output token limit, making it well-suited for long-document analysis, deep research, and multi-step agentic tasks. Pricing is tiered: requests exceeding 200k total tokens are billed at a higher rate.","data_retention":true,"privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"xai/grok-code-fast-1","object":"model","created":1760609788,"owned_by":"system","input_price":2e-7,"caching_price":2e-7,"cached_price":2e-8,"output_price":0.0000015,"max_output_tokens":0,"context_window":256000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"A speedy and economical reasoning model that excels at agentic coding","data_retention":true,"geolocation":"global"},{"api":"chat","id":"xai/grok-3-mini","object":"model","created":1760609788,"owned_by":"system","input_price":3e-7,"caching_price":3e-7,"cached_price":3e-7,"output_price":5e-7,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"A lightweight model that thinks before responding. Fast, smart, and great for logic-based tasks that do not require deep domain knowledge. The raw thinking traces are accessible.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"xai/grok-4-fast-non-reasoning","object":"model","created":1760609788,"owned_by":"system","input_price":2e-7,"caching_price":2e-7,"cached_price":5e-8,"output_price":5e-7,"max_output_tokens":0,"context_window":2000000,"supports_caching":true,"supports_vision":false,"supports_computer_use":true,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"xAI's latest advancement in cost-efficient reasoning models","data_retention":true,"geolocation":"global"},{"api":"chat","id":"xai/grok-4.2-beta","object":"model","created":1773944308,"owned_by":"system","input_price":0.000002,"caching_price":0.000002,"cached_price":2e-7,"output_price":0.000006,"max_output_tokens":0,"context_window":2000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Grok 4.20 Beta is xAI's newest flagship model with industry-leading speed and agentic tool calling capabilities. It combines the lowest hallucination rate on the market with strict prompt adherance, delivering consistently precise and truthful responses.","data_retention":true,"privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"xai/grok-3-mini:low","object":"model","created":1760609788,"owned_by":"system","input_price":3e-7,"caching_price":3e-7,"cached_price":3e-7,"output_price":5e-7,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"A lightweight model that thinks before responding. Fast, smart, and great for logic-based tasks that do not require deep domain knowledge. The raw thinking traces are accessible.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"xai/grok-3","object":"model","created":1760609788,"owned_by":"system","input_price":0.000005,"caching_price":0.000005,"cached_price":0.000005,"output_price":0.000025,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in finance, healthcare, law, and science.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"xai/grok-4","object":"model","created":1760609788,"owned_by":"system","input_price":0.000003,"caching_price":0.000003,"cached_price":7.5e-7,"output_price":0.000015,"max_output_tokens":0,"context_window":256000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"xAI's latest and greatest flagship model, offering unparalleled performance in natural language, math and reasoning - the perfect jack of all trades.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"xai/grok-3-mini:high","object":"model","created":1760609788,"owned_by":"system","input_price":3e-7,"caching_price":3e-7,"cached_price":3e-7,"output_price":5e-7,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"A lightweight model that thinks before responding. Fast, smart, and great for logic-based tasks that do not require deep domain knowledge. The raw thinking traces are accessible.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"xai/grok-4-fast","object":"model","created":1760609788,"owned_by":"system","input_price":2e-7,"caching_price":2e-7,"cached_price":5e-8,"output_price":5e-7,"max_output_tokens":0,"context_window":2000000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"xAI's latest advancement in cost-efficient reasoning models","data_retention":true,"geolocation":"global"},{"api":"chat","id":"xai/grok-2-1212","object":"model","created":1760609788,"owned_by":"system","input_price":0.000002,"caching_price":0.000002,"cached_price":0.000002,"output_price":0.00001,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"x AI's Our previous generation chat model.\n","data_retention":true,"geolocation":"global"},{"api":"chat","id":"xai/grok-4-1-fast-reasoning","object":"model","created":1764609284,"owned_by":"system","input_price":2e-7,"caching_price":2e-7,"cached_price":5e-8,"output_price":5e-7,"max_output_tokens":0,"context_window":2000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"A frontier multimodal model optimized specifically for high-performance agentic tool calling.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"xai/grok-4-1-fast-non-reasoning","object":"model","created":1764609284,"owned_by":"system","input_price":2e-7,"caching_price":2e-7,"cached_price":5e-8,"output_price":5e-7,"max_output_tokens":0,"context_window":2000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"A frontier multimodal model optimized specifically for high-performance agentic tool calling.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"moonshot/kimi-k2-thinking-turbo","object":"model","created":1754522204,"owned_by":"system","input_price":6e-7,"caching_price":6e-7,"cached_price":1.5e-7,"output_price":0.0000025,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"High-speed version of kimi-k2-thinking, suitable for scenarios requiring both deep reasoning and extremely fast responses","data_retention":true,"data_used_for_training":true,"geolocation":"global"},{"api":"chat","id":"moonshot/kimi-k2-0905-preview","object":"model","created":1754522204,"owned_by":"system","input_price":6e-7,"caching_price":6e-7,"cached_price":1.5e-7,"output_price":0.0000025,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Based on kimi-k2-0711-preview, with enhanced agentic coding abilities, improved frontend code quality and practicality, and better context understanding","data_retention":true,"data_used_for_training":true,"geolocation":"global"},{"api":"chat","id":"moonshot/kimi-k2-thinking","object":"model","created":1754522204,"owned_by":"system","input_price":6e-7,"caching_price":6e-7,"cached_price":1.5e-7,"output_price":0.0000025,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"A thinking model with general agentic and reasoning capabilities, specializing in deep reasoning tasks.","data_retention":true,"data_used_for_training":true,"geolocation":"global"},{"api":"chat","id":"moonshot/kimi-k2-turbo-preview","object":"model","created":1754522204,"owned_by":"system","input_price":0.0000012,"caching_price":0.000005,"cached_price":3e-7,"output_price":0.000005,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"A Mixture-of-Experts (MoE) foundation model with exceptional coding and agent capabilities, featuring 1 trillion total parameters and 32 billion activated parameters. In benchmark evaluations covering general knowledge reasoning, programming, mathematics, and agent-related tasks, the K2 model outperforms other leading open-source models.","data_retention":true,"data_used_for_training":true,"geolocation":"global"},{"api":"chat","id":"moonshot/kimi-k2.6","object":"model","created":1776704451,"owned_by":"system","input_price":9.5e-7,"caching_price":9.6e-7,"cached_price":1.6e-7,"output_price":0.000004,"max_output_tokens":262144,"context_window":262144,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Kimi K2.6 is Moonshot AI's next-generation multimodal model, designed for long-horizon coding, coding-driven UI/UX generation, and multi-agent orchestration. It handles complex end-to-end coding tasks across Python, Rust, and Go, and can convert prompts and visual inputs into production-ready interfaces. Its agent swarm architecture scales to hundreds of parallel sub-agents for autonomous task decomposition - delivering documents, websites, and spreadsheets in a single run without human oversight.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"moonshot/kimi-k2-0711-preview","object":"model","created":1752535004,"owned_by":"system","input_price":6e-7,"caching_price":6e-7,"cached_price":1.5e-7,"output_price":0.0000025,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"A Mixture-of-Experts (MoE) foundation model with exceptional coding and agent capabilities, featuring 1 trillion total parameters and 32 billion activated parameters. In benchmark evaluations covering general knowledge reasoning, programming, mathematics, and agent-related tasks, the K2 model outperforms other leading open-source models.","data_retention":true,"data_used_for_training":true,"geolocation":"global"},{"api":"chat","id":"moonshot/kimi-k2.5","object":"model","created":1770289011,"owned_by":"system","input_price":6e-7,"caching_price":6e-7,"cached_price":1e-7,"output_price":0.000003,"max_output_tokens":262144,"context_window":262144,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.","geolocation":"global"},{"api":"chat","id":"azure/gpt-4.1@eastus2","object":"model","created":1744654985,"owned_by":"system","input_price":0.000002,"caching_price":0.000002,"cached_price":5e-7,"output_price":0.000008,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning. It supports a 1 million token context window and outperforms GPT-4o and GPT-4.5 across coding (54.6% SWE-bench Verified), instruction compliance (87.4% IFEval), and multimodal understanding benchmarks. It is tuned for precise code diffs, agent reliability, and high recall in large document contexts, making it ideal for agents, IDE tooling, and enterprise knowledge retrieval.","privacy_comments":"N/A","geolocation":"us"},{"api":"chat","id":"azure/gpt-4.1-mini@eastus2","object":"model","created":1744654981,"owned_by":"system","input_price":4e-7,"caching_price":4e-7,"cached_price":1e-7,"output_price":0.0000016,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-4.1 Mini is a mid-sized model delivering performance competitive with GPT-4o at substantially lower latency and cost. It retains a 1 million token context window and scores 45.1% on hard instruction evals, 35.8% on MultiChallenge, and 84.1% on IFEval. Mini also shows strong coding ability (e.g., 31.6% on Aider’s polyglot diff benchmark) and vision understanding, making it suitable for interactive applications with tight performance constraints.","privacy_comments":"N/A","geolocation":"us"},{"api":"chat","id":"azure/gpt-4.1","object":"model","created":1744654985,"owned_by":"system","input_price":0.000002,"caching_price":0.000002,"cached_price":5e-7,"output_price":0.000008,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning. It supports a 1 million token context window and outperforms GPT-4o and GPT-4.5 across coding (54.6% SWE-bench Verified), instruction compliance (87.4% IFEval), and multimodal understanding benchmarks. It is tuned for precise code diffs, agent reliability, and high recall in large document contexts, making it ideal for agents, IDE tooling, and enterprise knowledge retrieval.","geolocation":"global"},{"api":"chat","id":"azure/openai-responses/gpt-5.4@swedencentral","object":"model","created":1765472906,"owned_by":"system","input_price":0.0000025,"caching_price":0.0000025,"cached_price":2.5e-7,"output_price":0.000015,"max_output_tokens":128000,"context_window":1050000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.4 is OpenAI’s latest frontier model, unifying the Codex and GPT lines into a single system. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs, enabling high-context reasoning, coding, and multimodal analysis within the same workflow.\n\nThe model delivers improved performance in coding, document understanding, tool use, and instruction following. It is designed as a strong default for both general-purpose tasks and software engineering, capable of generating production-quality code, synthesizing information across multiple sources, and executing complex multi-step workflows with fewer iterations and greater token efficiency.\n","privacy_comments":"N/A","geolocation":"us"},{"api":"chat","id":"azure/gpt-5@swedencentral","object":"model","created":1744824542,"owned_by":"system","input_price":0.00000125,"caching_price":0.00001,"cached_price":1.25e-7,"output_price":0.00001,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5 is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.","privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"azure/o4-mini@swedencentral","object":"model","created":1744824542,"owned_by":"system","input_price":0.0000011,"caching_price":0.0000011,"cached_price":2.75e-7,"output_price":0.0000044,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"azure/openai-responses/gpt-4.1@swedencentral","object":"model","created":1744654985,"owned_by":"system","input_price":0.000002,"caching_price":0.000002,"cached_price":5e-7,"output_price":0.000008,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning. It supports a 1 million token context window and outperforms GPT-4o and GPT-4.5 across coding (54.6% SWE-bench Verified), instruction compliance (87.4% IFEval), and multimodal understanding benchmarks. It is tuned for precise code diffs, agent reliability, and high recall in large document contexts, making it ideal for agents, IDE tooling, and enterprise knowledge retrieval.","privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"azure/openai-responses/gpt-4.1-nano@eastus2","object":"model","created":1744654969,"owned_by":"system","input_price":1e-7,"caching_price":1e-7,"cached_price":2.5e-8,"output_price":4e-7,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"For tasks that demand low latency, GPT‑4.1 nano is the fastest and cheapest model in the GPT-4.1 series. It delivers exceptional performance at a small size with its 1 million token context window, and scores 80.1% on MMLU, 50.3% on GPQA, and 9.8% on Aider polyglot coding – even higher than GPT‑4o mini. It’s ideal for tasks like classification or autocompletion.","privacy_comments":"N/A","geolocation":"us"},{"api":"chat","id":"azure/openai-responses/gpt-5.4-pro@eastus2","object":"model","created":1765472906,"owned_by":"system","input_price":0.00003,"caching_price":0.00018,"cached_price":0.00003,"output_price":0.00018,"max_output_tokens":128000,"context_window":1050000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.4 Pro is OpenAI's most advanced model, building on GPT-5.4's unified architecture with enhanced reasoning capabilities for complex, high-stakes tasks. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs. Optimized for step-by-step reasoning, instruction following, and accuracy, GPT-5.4 Pro excels at agentic coding, long-context workflows, and multi-step problem solving.","privacy_comments":"N/A","geolocation":"us"},{"api":"chat","id":"azure/gpt-5-nano@francecentral","object":"model","created":1744824542,"owned_by":"system","input_price":5e-8,"caching_price":5e-8,"cached_price":5e-9,"output_price":4e-7,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":" ","privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"azure/openai-responses/gpt-4.1@eastus2","object":"model","created":1744654985,"owned_by":"system","input_price":0.000002,"caching_price":0.000002,"cached_price":5e-7,"output_price":0.000008,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning. It supports a 1 million token context window and outperforms GPT-4o and GPT-4.5 across coding (54.6% SWE-bench Verified), instruction compliance (87.4% IFEval), and multimodal understanding benchmarks. It is tuned for precise code diffs, agent reliability, and high recall in large document contexts, making it ideal for agents, IDE tooling, and enterprise knowledge retrieval.","privacy_comments":"N/A","geolocation":"us"},{"api":"chat","id":"azure/gpt-5.4@eastus2","object":"model","created":1777213257,"owned_by":"system","input_price":0.0000025,"cached_price":2.5e-7,"output_price":0.000015,"max_output_tokens":128000,"context_window":1050000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT 5.4 is OpenAI's frontier model designed for complex professional workloads, with strong reasoning, high reliability, and improved token efficiency on hard tasks. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs, enabling large scale reasoning, coding, and multimodal workflows within a single system.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"azure/o4-mini@westus3","object":"model","created":1744824542,"owned_by":"system","input_price":0.0000011,"caching_price":0.0000011,"cached_price":2.75e-7,"output_price":0.0000044,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","privacy_comments":"N/A","geolocation":"us"},{"api":"chat","id":"azure/gpt-5-mini@swedencentral","object":"model","created":1744824542,"owned_by":"system","input_price":2.5e-7,"caching_price":2.5e-7,"cached_price":2.5e-8,"output_price":0.000002,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":" ","privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"azure/openai-responses/gpt-5.5@eastus2","object":"model","created":1777109995,"owned_by":"system","input_price":0.000005,"cached_price":5e-7,"output_price":0.00003,"max_output_tokens":128000,"context_window":1050000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.5 is OpenAI's frontier model designed for complex professional workloads, building on GPT-5.4 with stronger reasoning, higher reliability, and improved token efficiency on hard tasks. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs, enabling large-scale reasoning, coding, and multimodal workflows within a single system.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"azure/gpt-5.1","object":"model","created":1764178142,"owned_by":"system","input_price":0.00000125,"caching_price":0.00001,"cached_price":1.25e-7,"output_price":0.00001,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"","geolocation":"global"},{"api":"chat","id":"azure/openai-responses/gpt-4.1-nano@westus3","object":"model","created":1744654969,"owned_by":"system","input_price":1e-7,"caching_price":1e-7,"cached_price":2.5e-8,"output_price":4e-7,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"For tasks that demand low latency, GPT‑4.1 nano is the fastest and cheapest model in the GPT-4.1 series. It delivers exceptional performance at a small size with its 1 million token context window, and scores 80.1% on MMLU, 50.3% on GPQA, and 9.8% on Aider polyglot coding – even higher than GPT‑4o mini. It’s ideal for tasks like classification or autocompletion.","privacy_comments":"N/A","geolocation":"us"},{"api":"chat","id":"azure/gpt-4.1-nano@westus3","object":"model","created":1744654969,"owned_by":"system","input_price":1e-7,"caching_price":1e-7,"cached_price":2.5e-8,"output_price":4e-7,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"For tasks that demand low latency, GPT‑4.1 nano is the fastest and cheapest model in the GPT-4.1 series. It delivers exceptional performance at a small size with its 1 million token context window, and scores 80.1% on MMLU, 50.3% on GPQA, and 9.8% on Aider polyglot coding – even higher than GPT‑4o mini. It’s ideal for tasks like classification or autocompletion.","privacy_comments":"N/A","geolocation":"us"},{"api":"chat","id":"azure/gpt-5-mini@uksouth","object":"model","created":1744824542,"owned_by":"system","input_price":2.5e-7,"caching_price":2.5e-7,"cached_price":2.5e-8,"output_price":0.000002,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":" ","privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"azure/gpt-5","object":"model","created":1744824542,"owned_by":"system","input_price":0.00000125,"caching_price":0.00001,"cached_price":1.25e-7,"output_price":0.00001,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"","geolocation":"global"},{"api":"chat","id":"azure/openai-responses/gpt-5.2-codex@eastus2","object":"model","created":1769088542,"owned_by":"system","input_price":0.00000175,"caching_price":0.00000175,"cached_price":1.75e-7,"output_price":0.000014,"max_output_tokens":128000,"context_window":400000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"OpenAI's most intelligent coding model optimized for long-horizon, agentic coding tasks.","geolocation":"us"},{"api":"chat","id":"azure/openai-responses/gpt-4.1@francecentral","object":"model","created":1744654985,"owned_by":"system","input_price":0.000002,"caching_price":0.000002,"cached_price":5e-7,"output_price":0.000008,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning. It supports a 1 million token context window and outperforms GPT-4o and GPT-4.5 across coding (54.6% SWE-bench Verified), instruction compliance (87.4% IFEval), and multimodal understanding benchmarks. It is tuned for precise code diffs, agent reliability, and high recall in large document contexts, making it ideal for agents, IDE tooling, and enterprise knowledge retrieval.","privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"azure/openai-responses/gpt-4.1-mini@eastus2","object":"model","created":1744654981,"owned_by":"system","input_price":4e-7,"caching_price":4e-7,"cached_price":1e-7,"output_price":0.0000016,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-4.1 Mini is a mid-sized model delivering performance competitive with GPT-4o at substantially lower latency and cost. It retains a 1 million token context window and scores 45.1% on hard instruction evals, 35.8% on MultiChallenge, and 84.1% on IFEval. Mini also shows strong coding ability (e.g., 31.6% on Aider’s polyglot diff benchmark) and vision understanding, making it suitable for interactive applications with tight performance constraints.","privacy_comments":"N/A","geolocation":"us"},{"api":"chat","id":"azure/openai-responses/gpt-4.1-nano@swedencentral","object":"model","created":1744654969,"owned_by":"system","input_price":1e-7,"caching_price":1e-7,"cached_price":2.5e-8,"output_price":4e-7,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"For tasks that demand low latency, GPT‑4.1 nano is the fastest and cheapest model in the GPT-4.1 series. It delivers exceptional performance at a small size with its 1 million token context window, and scores 80.1% on MMLU, 50.3% on GPQA, and 9.8% on Aider polyglot coding – even higher than GPT‑4o mini. It’s ideal for tasks like classification or autocompletion.","privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"azure/openai-responses/gpt-4.1-mini@francecentral","object":"model","created":1744654981,"owned_by":"system","input_price":4e-7,"caching_price":4e-7,"cached_price":1e-7,"output_price":0.0000016,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-4.1 Mini is a mid-sized model delivering performance competitive with GPT-4o at substantially lower latency and cost. It retains a 1 million token context window and scores 45.1% on hard instruction evals, 35.8% on MultiChallenge, and 84.1% on IFEval. Mini also shows strong coding ability (e.g., 31.6% on Aider’s polyglot diff benchmark) and vision understanding, making it suitable for interactive applications with tight performance constraints.","privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"azure/gpt-4.1-mini@francecentral","object":"model","created":1744654981,"owned_by":"system","input_price":4e-7,"caching_price":4e-7,"cached_price":1e-7,"output_price":0.0000016,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-4.1 Mini is a mid-sized model delivering performance competitive with GPT-4o at substantially lower latency and cost. It retains a 1 million token context window and scores 45.1% on hard instruction evals, 35.8% on MultiChallenge, and 84.1% on IFEval. Mini also shows strong coding ability (e.g., 31.6% on Aider’s polyglot diff benchmark) and vision understanding, making it suitable for interactive applications with tight performance constraints.","privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"azure/gpt-5-nano@eastus2","object":"model","created":1744824542,"owned_by":"system","input_price":5e-8,"caching_price":5e-8,"cached_price":5e-9,"output_price":4e-7,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":" ","privacy_comments":"N/A","geolocation":"us"},{"api":"chat","id":"azure/openai-responses/gpt-4.1@westus3","object":"model","created":1744654985,"owned_by":"system","input_price":0.000002,"caching_price":0.000002,"cached_price":5e-7,"output_price":0.000008,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning. It supports a 1 million token context window and outperforms GPT-4o and GPT-4.5 across coding (54.6% SWE-bench Verified), instruction compliance (87.4% IFEval), and multimodal understanding benchmarks. It is tuned for precise code diffs, agent reliability, and high recall in large document contexts, making it ideal for agents, IDE tooling, and enterprise knowledge retrieval.","privacy_comments":"N/A","geolocation":"us"},{"api":"chat","id":"azure/gpt-5.1@francecentral","object":"model","created":1764178142,"owned_by":"system","input_price":0.00000125,"caching_price":0.00001,"cached_price":1.25e-7,"output_price":0.00001,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.1 is the latest frontier-grade model in the GPT-5 series, offering stronger general-purpose reasoning, improved instruction adherence, and a more natural conversational style compared to GPT-5. It uses adaptive reasoning to allocate computation dynamically, responding quickly to simple queries while spending more depth on complex tasks. The model produces clearer, more grounded explanations with reduced jargon, making it easier to follow even on technical or multi-step problems.\n\nBuilt for broad task coverage, GPT-5.1 delivers consistent gains across math, coding, and structured analysis workloads, with more coherent long-form answers and improved tool-use reliability. It also features refined conversational alignment, enabling warmer, more intuitive responses without compromising precision. GPT-5.1 serves as the primary full-capability successor to GPT-5","privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"azure/gpt-5.2","object":"model","created":1769448542,"owned_by":"system","input_price":0.00000175,"caching_price":0.000014,"cached_price":1.75e-7,"output_price":0.000014,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"","geolocation":"global"},{"api":"chat","id":"azure/gpt-4.1@francecentral","object":"model","created":1744654985,"owned_by":"system","input_price":0.000002,"caching_price":0.000002,"cached_price":5e-7,"output_price":0.000008,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning. It supports a 1 million token context window and outperforms GPT-4o and GPT-4.5 across coding (54.6% SWE-bench Verified), instruction compliance (87.4% IFEval), and multimodal understanding benchmarks. It is tuned for precise code diffs, agent reliability, and high recall in large document contexts, making it ideal for agents, IDE tooling, and enterprise knowledge retrieval.","privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"azure/gpt-4.1-nano@swedencentral","object":"model","created":1744654969,"owned_by":"system","input_price":1e-7,"caching_price":1e-7,"cached_price":2.5e-8,"output_price":4e-7,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"For tasks that demand low latency, GPT‑4.1 nano is the fastest and cheapest model in the GPT-4.1 series. It delivers exceptional performance at a small size with its 1 million token context window, and scores 80.1% on MMLU, 50.3% on GPQA, and 9.8% on Aider polyglot coding – even higher than GPT‑4o mini. It’s ideal for tasks like classification or autocompletion.","privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"azure/gpt-4.1-nano@eastus2","object":"model","created":1744654969,"owned_by":"system","input_price":1e-7,"caching_price":1e-7,"cached_price":2.5e-8,"output_price":4e-7,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"For tasks that demand low latency, GPT‑4.1 nano is the fastest and cheapest model in the GPT-4.1 series. It delivers exceptional performance at a small size with its 1 million token context window, and scores 80.1% on MMLU, 50.3% on GPQA, and 9.8% on Aider polyglot coding – even higher than GPT‑4o mini. It’s ideal for tasks like classification or autocompletion.","privacy_comments":"N/A","geolocation":"us"},{"api":"chat","id":"azure/o4-mini@eastus2","object":"model","created":1744824542,"owned_by":"system","input_price":0.0000011,"caching_price":0.0000011,"cached_price":2.75e-7,"output_price":0.0000044,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","privacy_comments":"N/A","geolocation":"us"},{"api":"chat","id":"azure/gpt-5-mini@eastus2","object":"model","created":1744824542,"owned_by":"system","input_price":2.5e-7,"caching_price":2.5e-7,"cached_price":2.5e-8,"output_price":0.000002,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":" ","privacy_comments":"N/A","geolocation":"us"},{"api":"chat","id":"azure/openai-responses/gpt-5.4@eastus2","object":"model","created":1765472906,"owned_by":"system","input_price":0.0000025,"caching_price":0.0000025,"cached_price":2.5e-7,"output_price":0.000015,"max_output_tokens":128000,"context_window":1050000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.4 is OpenAI’s latest frontier model, unifying the Codex and GPT lines into a single system. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs, enabling high-context reasoning, coding, and multimodal analysis within the same workflow.\n\nThe model delivers improved performance in coding, document understanding, tool use, and instruction following. It is designed as a strong default for both general-purpose tasks and software engineering, capable of generating production-quality code, synthesizing information across multiple sources, and executing complex multi-step workflows with fewer iterations and greater token efficiency.\n","privacy_comments":"N/A","geolocation":"us"},{"api":"chat","id":"azure/gpt-5.2@eastus2","object":"model","created":1769448542,"owned_by":"system","input_price":0.00000175,"caching_price":0.000014,"cached_price":1.75e-7,"output_price":0.000014,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.2 is the latest frontier-grade model in the GPT-5 series, offering stronger agentic and long context perfomance compared to GPT-5.1. It uses adaptive reasoning to allocate computation dynamically, responding quickly to simple queries while spending more depth on complex tasks.\n\nBuilt for broad task coverage, GPT-5.2 delivers consistent gains across math, coding, sciende, and tool calling workloads, with more coherent long-form answers and improved tool-use reliability.","privacy_comments":"N/A","geolocation":"us"},{"api":"chat","id":"azure/openai-responses/gpt-4.1-nano@francecentral","object":"model","created":1744654969,"owned_by":"system","input_price":1e-7,"caching_price":1e-7,"cached_price":2.5e-8,"output_price":4e-7,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"For tasks that demand low latency, GPT‑4.1 nano is the fastest and cheapest model in the GPT-4.1 series. It delivers exceptional performance at a small size with its 1 million token context window, and scores 80.1% on MMLU, 50.3% on GPQA, and 9.8% on Aider polyglot coding – even higher than GPT‑4o mini. It’s ideal for tasks like classification or autocompletion.","privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"azure/gpt-5.4@swedencentral","object":"model","created":1777213257,"owned_by":"system","input_price":0.0000025,"cached_price":2.5e-7,"output_price":0.000015,"max_output_tokens":128000,"context_window":1050000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT 5.4 is OpenAI's frontier model designed for complex professional workloads, with strong reasoning, high reliability, and improved token efficiency on hard tasks. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs, enabling large scale reasoning, coding, and multimodal workflows within a single system.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"azure/gpt-5-mini","object":"model","created":1744824542,"owned_by":"system","input_price":2.5e-7,"caching_price":2.5e-7,"cached_price":2.5e-8,"output_price":0.000002,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"","geolocation":"global"},{"api":"chat","id":"azure/openai-responses/gpt-5.5@swedencentral","object":"model","created":1777109995,"owned_by":"system","input_price":0.000005,"cached_price":5e-7,"output_price":0.00003,"max_output_tokens":128000,"context_window":1050000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.5 is OpenAI's frontier model designed for complex professional workloads, building on GPT-5.4 with stronger reasoning, higher reliability, and improved token efficiency on hard tasks. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs, enabling large-scale reasoning, coding, and multimodal workflows within a single system.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"azure/openai-responses/gpt-4.1-mini","object":"model","created":1744654981,"owned_by":"system","input_price":4e-7,"caching_price":4e-7,"cached_price":1e-7,"output_price":0.0000016,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-4.1 Mini is a mid-sized model delivering performance competitive with GPT-4o at substantially lower latency and cost. It retains a 1 million token context window and scores 45.1% on hard instruction evals, 35.8% on MultiChallenge, and 84.1% on IFEval. Mini also shows strong coding ability (e.g., 31.6% on Aider’s polyglot diff benchmark) and vision understanding, making it suitable for interactive applications with tight performance constraints.","geolocation":"global"},{"api":"chat","id":"azure/gpt-5.1@swedencentral","object":"model","created":1764178142,"owned_by":"system","input_price":0.00000125,"caching_price":0.00001,"cached_price":1.25e-7,"output_price":0.00001,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.1 is the latest frontier-grade model in the GPT-5 series, offering stronger general-purpose reasoning, improved instruction adherence, and a more natural conversational style compared to GPT-5. It uses adaptive reasoning to allocate computation dynamically, responding quickly to simple queries while spending more depth on complex tasks. The model produces clearer, more grounded explanations with reduced jargon, making it easier to follow even on technical or multi-step problems.\n\nBuilt for broad task coverage, GPT-5.1 delivers consistent gains across math, coding, and structured analysis workloads, with more coherent long-form answers and improved tool-use reliability. It also features refined conversational alignment, enabling warmer, more intuitive responses without compromising precision. GPT-5.1 serves as the primary full-capability successor to GPT-5","privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"azure/openai-responses/gpt-5.4@francecentral","object":"model","created":1765472906,"owned_by":"system","input_price":0.0000025,"caching_price":0.0000025,"cached_price":2.5e-7,"output_price":0.000015,"max_output_tokens":128000,"context_window":1050000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.4 is OpenAI’s latest frontier model, unifying the Codex and GPT lines into a single system. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs, enabling high-context reasoning, coding, and multimodal analysis within the same workflow.\n\nThe model delivers improved performance in coding, document understanding, tool use, and instruction following. It is designed as a strong default for both general-purpose tasks and software engineering, capable of generating production-quality code, synthesizing information across multiple sources, and executing complex multi-step workflows with fewer iterations and greater token efficiency.\n","privacy_comments":"N/A","geolocation":"us"},{"api":"chat","id":"azure/gpt-4.1-nano","object":"model","created":1744654969,"owned_by":"system","input_price":1e-7,"caching_price":1e-7,"cached_price":2.5e-8,"output_price":4e-7,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"For tasks that demand low latency, GPT‑4.1 nano is the fastest and cheapest model in the GPT-4.1 series. It delivers exceptional performance at a small size with its 1 million token context window, and scores 80.1% on MMLU, 50.3% on GPQA, and 9.8% on Aider polyglot coding – even higher than GPT‑4o mini. It’s ideal for tasks like classification or autocompletion.","geolocation":"global"},{"api":"chat","id":"azure/gpt-5@eastus2","object":"model","created":1744824542,"owned_by":"system","input_price":0.00000125,"caching_price":0.00001,"cached_price":1.25e-7,"output_price":0.00001,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5 is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.","privacy_comments":"N/A","geolocation":"us"},{"api":"chat","id":"azure/o4-mini@francecentral","object":"model","created":1744824542,"owned_by":"system","input_price":0.0000011,"caching_price":0.0000011,"cached_price":2.75e-7,"output_price":0.0000044,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"azure/openai-responses/gpt-4.1","object":"model","created":1744654985,"owned_by":"system","input_price":0.000002,"caching_price":0.000002,"cached_price":5e-7,"output_price":0.000008,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning. It supports a 1 million token context window and outperforms GPT-4o and GPT-4.5 across coding (54.6% SWE-bench Verified), instruction compliance (87.4% IFEval), and multimodal understanding benchmarks. It is tuned for precise code diffs, agent reliability, and high recall in large document contexts, making it ideal for agents, IDE tooling, and enterprise knowledge retrieval.","geolocation":"global"},{"api":"chat","id":"azure/gpt-5.1@eastus2","object":"model","created":1764178142,"owned_by":"system","input_price":0.00000125,"caching_price":0.00001,"cached_price":1.25e-7,"output_price":0.00001,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.1 is the latest frontier-grade model in the GPT-5 series, offering stronger general-purpose reasoning, improved instruction adherence, and a more natural conversational style compared to GPT-5. It uses adaptive reasoning to allocate computation dynamically, responding quickly to simple queries while spending more depth on complex tasks. The model produces clearer, more grounded explanations with reduced jargon, making it easier to follow even on technical or multi-step problems.\n\nBuilt for broad task coverage, GPT-5.1 delivers consistent gains across math, coding, and structured analysis workloads, with more coherent long-form answers and improved tool-use reliability. It also features refined conversational alignment, enabling warmer, more intuitive responses without compromising precision. GPT-5.1 serves as the primary full-capability successor to GPT-5","privacy_comments":"N/A","geolocation":"us"},{"api":"chat","id":"azure/gpt-4.1-mini@westus3","object":"model","created":1744654981,"owned_by":"system","input_price":4e-7,"caching_price":4e-7,"cached_price":1e-7,"output_price":0.0000016,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-4.1 Mini is a mid-sized model delivering performance competitive with GPT-4o at substantially lower latency and cost. It retains a 1 million token context window and scores 45.1% on hard instruction evals, 35.8% on MultiChallenge, and 84.1% on IFEval. Mini also shows strong coding ability (e.g., 31.6% on Aider’s polyglot diff benchmark) and vision understanding, making it suitable for interactive applications with tight performance constraints.","privacy_comments":"N/A","geolocation":"us"},{"api":"chat","id":"azure/gpt-4.1-nano@francecentral","object":"model","created":1744654969,"owned_by":"system","input_price":1e-7,"caching_price":1e-7,"cached_price":2.5e-8,"output_price":4e-7,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"For tasks that demand low latency, GPT‑4.1 nano is the fastest and cheapest model in the GPT-4.1 series. It delivers exceptional performance at a small size with its 1 million token context window, and scores 80.1% on MMLU, 50.3% on GPQA, and 9.8% on Aider polyglot coding – even higher than GPT‑4o mini. It’s ideal for tasks like classification or autocompletion.","privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"azure/gpt-5@uksouth","object":"model","created":1744824542,"owned_by":"system","input_price":0.00000125,"caching_price":0.00001,"cached_price":1.25e-7,"output_price":0.00001,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5 is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.","privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"azure/gpt-4.1-nano@uksouth","object":"model","created":1744654969,"owned_by":"system","input_price":1e-7,"caching_price":1e-7,"cached_price":2.5e-8,"output_price":4e-7,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"For tasks that demand low latency, GPT‑4.1 nano is the fastest and cheapest model in the GPT-4.1 series. It delivers exceptional performance at a small size with its 1 million token context window, and scores 80.1% on MMLU, 50.3% on GPQA, and 9.8% on Aider polyglot coding – even higher than GPT‑4o mini. It’s ideal for tasks like classification or autocompletion.","privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"azure/gpt-5-mini@francecentral","object":"model","created":1744824542,"owned_by":"system","input_price":2.5e-7,"caching_price":2.5e-7,"cached_price":2.5e-8,"output_price":0.000002,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":" ","privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"azure/gpt-5-nano@swedencentral","object":"model","created":1744824542,"owned_by":"system","input_price":5e-8,"caching_price":5e-8,"cached_price":5e-9,"output_price":4e-7,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":" ","privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"azure/openai-responses/gpt-4.1-mini@westus3","object":"model","created":1744654981,"owned_by":"system","input_price":4e-7,"caching_price":4e-7,"cached_price":1e-7,"output_price":0.0000016,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-4.1 Mini is a mid-sized model delivering performance competitive with GPT-4o at substantially lower latency and cost. It retains a 1 million token context window and scores 45.1% on hard instruction evals, 35.8% on MultiChallenge, and 84.1% on IFEval. Mini also shows strong coding ability (e.g., 31.6% on Aider’s polyglot diff benchmark) and vision understanding, making it suitable for interactive applications with tight performance constraints.","privacy_comments":"N/A","geolocation":"us"},{"api":"chat","id":"azure/gpt-5.5@swedencentral","object":"model","created":1777109995,"owned_by":"system","input_price":0.000005,"cached_price":5e-7,"output_price":0.00003,"max_output_tokens":128000,"context_window":1050000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.5 is OpenAI's frontier model designed for complex professional workloads, building on GPT-5.4 with stronger reasoning, higher reliability, and improved token efficiency on hard tasks. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs, enabling large-scale reasoning, coding, and multimodal workflows within a single system.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"azure/gpt-4.1-mini@uksouth","object":"model","created":1744654981,"owned_by":"system","input_price":4e-7,"caching_price":4e-7,"cached_price":1e-7,"output_price":0.0000016,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-4.1 Mini is a mid-sized model delivering performance competitive with GPT-4o at substantially lower latency and cost. It retains a 1 million token context window and scores 45.1% on hard instruction evals, 35.8% on MultiChallenge, and 84.1% on IFEval. Mini also shows strong coding ability (e.g., 31.6% on Aider’s polyglot diff benchmark) and vision understanding, making it suitable for interactive applications with tight performance constraints.","privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"azure/gpt-4.1-mini","object":"model","created":1744654981,"owned_by":"system","input_price":4e-7,"caching_price":4e-7,"cached_price":1e-7,"output_price":0.0000016,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-4.1 Mini is a mid-sized model delivering performance competitive with GPT-4o at substantially lower latency and cost. It retains a 1 million token context window and scores 45.1% on hard instruction evals, 35.8% on MultiChallenge, and 84.1% on IFEval. Mini also shows strong coding ability (e.g., 31.6% on Aider’s polyglot diff benchmark) and vision understanding, making it suitable for interactive applications with tight performance constraints.","geolocation":"global"},{"api":"chat","id":"azure/gpt-5.5@eastus2","object":"model","created":1777109995,"owned_by":"system","input_price":0.000005,"cached_price":5e-7,"output_price":0.00003,"max_output_tokens":128000,"context_window":1050000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.5 is OpenAI's frontier model designed for complex professional workloads, building on GPT-5.4 with stronger reasoning, higher reliability, and improved token efficiency on hard tasks. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs, enabling large-scale reasoning, coding, and multimodal workflows within a single system.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"azure/openai-responses/gpt-4.1-nano","object":"model","created":1744654969,"owned_by":"system","input_price":1e-7,"caching_price":1e-7,"cached_price":2.5e-8,"output_price":4e-7,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"For tasks that demand low latency, GPT‑4.1 nano is the fastest and cheapest model in the GPT-4.1 series. It delivers exceptional performance at a small size with its 1 million token context window, and scores 80.1% on MMLU, 50.3% on GPQA, and 9.8% on Aider polyglot coding – even higher than GPT‑4o mini. It’s ideal for tasks like classification or autocompletion.","geolocation":"global"},{"api":"chat","id":"azure/gpt-4.1@swedencentral","object":"model","created":1744654985,"owned_by":"system","input_price":0.000002,"caching_price":0.000002,"cached_price":5e-7,"output_price":0.000008,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning. It supports a 1 million token context window and outperforms GPT-4o and GPT-4.5 across coding (54.6% SWE-bench Verified), instruction compliance (87.4% IFEval), and multimodal understanding benchmarks. It is tuned for precise code diffs, agent reliability, and high recall in large document contexts, making it ideal for agents, IDE tooling, and enterprise knowledge retrieval.","privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"azure/gpt-5@francecentral","object":"model","created":1744824542,"owned_by":"system","input_price":0.00000125,"caching_price":0.00001,"cached_price":1.25e-7,"output_price":0.00001,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5 is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.","privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"azure/gpt-5.3-codex@eastus2","object":"model","created":1772010000,"owned_by":"system","input_price":0.00000175,"caching_price":0.000014,"cached_price":1.75e-7,"output_price":0.000014,"max_output_tokens":128000,"context_window":400000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.3-Codex is OpenAI’s most advanced agentic coding model, combining the frontier software engineering performance of GPT-5.2-Codex with the broader reasoning and professional knowledge capabilities of GPT-5.2.","geolocation":"us"},{"api":"chat","id":"azure/gpt-5.4@francecentral","object":"model","created":1777213257,"owned_by":"system","input_price":0.0000025,"cached_price":2.5e-7,"output_price":0.000015,"max_output_tokens":128000,"context_window":1050000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT 5.4 is OpenAI's frontier model designed for complex professional workloads, with strong reasoning, high reliability, and improved token efficiency on hard tasks. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs, enabling large scale reasoning, coding, and multimodal workflows within a single system.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"azure/gpt-4.1@uksouth","object":"model","created":1744654985,"owned_by":"system","input_price":0.000002,"caching_price":0.000002,"cached_price":5e-7,"output_price":0.000008,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning. It supports a 1 million token context window and outperforms GPT-4o and GPT-4.5 across coding (54.6% SWE-bench Verified), instruction compliance (87.4% IFEval), and multimodal understanding benchmarks. It is tuned for precise code diffs, agent reliability, and high recall in large document contexts, making it ideal for agents, IDE tooling, and enterprise knowledge retrieval.","privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"azure/gpt-5-nano","object":"model","created":1744824542,"owned_by":"system","input_price":5e-8,"caching_price":5e-8,"cached_price":5e-9,"output_price":4e-7,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"","geolocation":"global"},{"api":"chat","id":"azure/o4-mini@uksouth","object":"model","created":1744824542,"owned_by":"system","input_price":0.0000011,"caching_price":0.0000011,"cached_price":2.75e-7,"output_price":0.0000044,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"azure/gpt-4.1@westus3","object":"model","created":1744654985,"owned_by":"system","input_price":0.000002,"caching_price":0.000002,"cached_price":5e-7,"output_price":0.000008,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning. It supports a 1 million token context window and outperforms GPT-4o and GPT-4.5 across coding (54.6% SWE-bench Verified), instruction compliance (87.4% IFEval), and multimodal understanding benchmarks. It is tuned for precise code diffs, agent reliability, and high recall in large document contexts, making it ideal for agents, IDE tooling, and enterprise knowledge retrieval.","privacy_comments":"N/A","geolocation":"us"},{"api":"chat","id":"inceptron/minimax-m2.5","object":"model","created":1777477859,"owned_by":"system","input_price":2.8e-7,"cached_price":3e-8,"output_price":0.0000011,"max_output_tokens":196608,"context_window":196608,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"MiniMax M2.5 is a state of the art language model built for real world productivity and autonomous agent execution. Trained with large scale reinforcement learning across hundreds of thousands of complex digital environments, it delivers leading performance in coding, search, tool use, and professional office workflows, operating with significantly improved speed and token efficiency. Designed to plan like an architect and act with cost efficient precision, M2.5 extends beyond software development into finance, research, and enterprise grade office tasks, bringing high end agentic capability at a fraction of the typical frontier model cost.","privacy_comments":"No training on user data","geolocation":"eu"},{"api":"chat","id":"inceptron/glm-5.1","object":"model","created":1777476141,"owned_by":"system","input_price":0.0000014,"cached_price":2.6e-7,"output_price":0.0000044,"max_output_tokens":202752,"context_window":202752,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GLM 5.1 is the next generation flagship model from Z.ai, purpose built for long horizon agentic engineering and advanced coding workflows. Delivering state of the art results on benchmarks such as SWE Bench Pro, NL2Repo, and Terminal Bench 2.0, it significantly surpasses its predecessor in real world software development tasks. Unlike earlier models that plateau after initial progress, GLM 5.1 is designed to operate autonomously for extended periods, planning, experimenting, iterating, and refining its approach over hundreds of rounds and thousands of tool calls.","privacy_comments":"No training on user data","geolocation":"eu"},{"api":"chat","id":"inceptron/kimi-k2.6","object":"model","created":1777476469,"owned_by":"system","input_price":8e-7,"cached_price":2e-7,"output_price":0.0000035,"max_output_tokens":262144,"context_window":262144,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Kimi K2.6 is Moonshot AI's latest open weight reasoning model, built for long horizon coding, agentic execution, and multimodal reasoning. It retains the trillion parameter MoE architecture with roughly 32B active parameters and a 256k token context window, while improving agentic benchmark performance and knowledge reliability over K2.5. Native text, image, and video input plus tool driven workflows make it well suited for coding, research, and complex multi step tasks.","privacy_comments":"No training on user data","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-sonnet-4@us-west-2","object":"model","created":1747933971,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"us"},{"api":"chat","id":"bedrock/claude-sonnet-4","object":"model","created":1747933971,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"global"},{"api":"chat","id":"bedrock/claude-haiku-4-5@eu-west-1","object":"model","created":1761216302,"owned_by":"system","input_price":0.0000011,"caching_price":0.000001375,"cached_price":1.1e-7,"output_price":0.0000055,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic Haiku 4.5","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-sonnet-4@eu-west-1","object":"model","created":1747933971,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-sonnet-4@eu-west-3","object":"model","created":1747933971,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-opus-4-5@eu-central-1","object":"model","created":1764627436,"owned_by":"system","input_price":0.0000055,"caching_price":0.000006875,"cached_price":5.5e-7,"output_price":0.0000275,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Premium model combining maximum intelligence with practical performance","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-opus-4-5@us-east-2","object":"model","created":1764627436,"owned_by":"system","input_price":0.0000055,"caching_price":0.000006875,"cached_price":5.5e-7,"output_price":0.0000275,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Premium model combining maximum intelligence with practical performance","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"us"},{"api":"chat","id":"bedrock/claude-haiku-4-5@eu-west-3","object":"model","created":1761216302,"owned_by":"system","input_price":0.0000011,"caching_price":0.000001375,"cached_price":1.1e-7,"output_price":0.0000055,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic Haiku 4.5","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-sonnet-4-5@eu-central-1","object":"model","created":1747933971,"owned_by":"system","input_price":0.0000033,"caching_price":0.000004125,"cached_price":3e-7,"output_price":0.0000165,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-sonnet-4-6@eu-north-1","object":"model","created":1771498893,"owned_by":"system","input_price":0.0000033,"caching_price":0.000004125,"cached_price":3e-7,"output_price":0.0000165,"max_output_tokens":64000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Claude Sonnet 4.6 is Anthropic's best coding model in the world, leading on SWE-bench Verified (77.2%) and OSWorld (61.4%). It delivers sustained autonomous performance on complex tasks for over 30 hours—up from seven hours for Opus 4—maintaining focus and reliability throughout the entire software development lifecycle, with enhanced capabilities in tool handling, memory management, and context processing that make it the strongest model for building complex agents.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-sonnet-4-6@eu-west-1","object":"model","created":1771498893,"owned_by":"system","input_price":0.0000033,"caching_price":0.000004125,"cached_price":3e-7,"output_price":0.0000165,"max_output_tokens":64000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Claude Sonnet 4.6 is Anthropic's best coding model in the world, leading on SWE-bench Verified (77.2%) and OSWorld (61.4%). It delivers sustained autonomous performance on complex tasks for over 30 hours—up from seven hours for Opus 4—maintaining focus and reliability throughout the entire software development lifecycle, with enhanced capabilities in tool handling, memory management, and context processing that make it the strongest model for building complex agents.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-opus-4-5@eu-west-1","object":"model","created":1764627436,"owned_by":"system","input_price":0.0000055,"caching_price":0.000006875,"cached_price":5.5e-7,"output_price":0.0000275,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Premium model combining maximum intelligence with practical performance","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-opus-4-6","object":"model","created":1770315610,"owned_by":"system","input_price":0.000005,"caching_price":0.00000625,"cached_price":5e-7,"output_price":0.000025,"max_output_tokens":128000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Claude Opus 4.6 is Anthropic's most powerful model yet and the best coding model in the world.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"global"},{"api":"chat","id":"bedrock/claude-sonnet-4-6@eu-central-1","object":"model","created":1771498893,"owned_by":"system","input_price":0.0000033,"caching_price":0.000004125,"cached_price":3e-7,"output_price":0.0000165,"max_output_tokens":64000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Claude Sonnet 4.6 is Anthropic's best coding model in the world, leading on SWE-bench Verified (77.2%) and OSWorld (61.4%). It delivers sustained autonomous performance on complex tasks for over 30 hours—up from seven hours for Opus 4—maintaining focus and reliability throughout the entire software development lifecycle, with enhanced capabilities in tool handling, memory management, and context processing that make it the strongest model for building complex agents.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-sonnet-4@eu-north-1","object":"model","created":1747933971,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-haiku-4-5@us-east-2","object":"model","created":1761216302,"owned_by":"system","input_price":0.0000011,"caching_price":0.000001375,"cached_price":1.1e-7,"output_price":0.0000055,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic Haiku 4.5","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"us"},{"api":"chat","id":"bedrock/kimi-k2.5@us-west-2","object":"model","created":1774966161,"owned_by":"system","input_price":6e-7,"caching_price":6e-7,"cached_price":6e-7,"output_price":0.000003,"max_output_tokens":16000,"context_window":128000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Kimi K2.5 on AWS","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"us"},{"api":"chat","id":"bedrock/claude-opus-4-7@eu-west-1","object":"model","created":1776418380,"owned_by":"system","input_price":0.0000055,"caching_price":0.000006875,"cached_price":5.5e-7,"output_price":0.0000275,"max_output_tokens":128000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Claude Opus 4.7 is Anthropic's most capable generally available model, advancing performance across coding, enterprise workflows, and long-running agentic tasks. Coding: Claude Opus 4.7 is built for agentic coding at scale, excelling at long-horizon projects, complex implementations, and polished UI design. It handles the full lifecycle from architecture to deployment, including design-quality UI so senior engineers can delegate complex work with confidence. Enterprise workflows: Claude Opus 4.7 sets the standard for enterprise knowledge work, carrying context across sessions to manage complex, multi-day projects end-to-end. It delivers professional polish on the documents, spreadsheets, and presentations that move work forward.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-sonnet-4-6@eu-west-3","object":"model","created":1771498893,"owned_by":"system","input_price":0.0000033,"caching_price":0.000004125,"cached_price":3e-7,"output_price":0.0000165,"max_output_tokens":64000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Claude Sonnet 4.6 is Anthropic's best coding model in the world, leading on SWE-bench Verified (77.2%) and OSWorld (61.4%). It delivers sustained autonomous performance on complex tasks for over 30 hours—up from seven hours for Opus 4—maintaining focus and reliability throughout the entire software development lifecycle, with enhanced capabilities in tool handling, memory management, and context processing that make it the strongest model for building complex agents.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-opus-4-7@eu-west-3","object":"model","created":1776418380,"owned_by":"system","input_price":0.0000055,"caching_price":0.000006875,"cached_price":5.5e-7,"output_price":0.0000275,"max_output_tokens":128000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Claude Opus 4.7 is Anthropic's most capable generally available model, advancing performance across coding, enterprise workflows, and long-running agentic tasks. Coding: Claude Opus 4.7 is built for agentic coding at scale, excelling at long-horizon projects, complex implementations, and polished UI design. It handles the full lifecycle from architecture to deployment, including design-quality UI so senior engineers can delegate complex work with confidence. Enterprise workflows: Claude Opus 4.7 sets the standard for enterprise knowledge work, carrying context across sessions to manage complex, multi-day projects end-to-end. It delivers professional polish on the documents, spreadsheets, and presentations that move work forward.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-opus-4-7@eu-north-1","object":"model","created":1776418380,"owned_by":"system","input_price":0.0000055,"caching_price":0.000006875,"cached_price":5.5e-7,"output_price":0.0000275,"max_output_tokens":128000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Claude Opus 4.7 is Anthropic's most capable generally available model, advancing performance across coding, enterprise workflows, and long-running agentic tasks. Coding: Claude Opus 4.7 is built for agentic coding at scale, excelling at long-horizon projects, complex implementations, and polished UI design. It handles the full lifecycle from architecture to deployment, including design-quality UI so senior engineers can delegate complex work with confidence. Enterprise workflows: Claude Opus 4.7 sets the standard for enterprise knowledge work, carrying context across sessions to manage complex, multi-day projects end-to-end. It delivers professional polish on the documents, spreadsheets, and presentations that move work forward.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-opus-4-5@eu-north-1","object":"model","created":1764627436,"owned_by":"system","input_price":0.0000055,"caching_price":0.000006875,"cached_price":5.5e-7,"output_price":0.0000275,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Premium model combining maximum intelligence with practical performance","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-3-7-sonnet@eu-central-1","object":"model","created":1718845200,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-sonnet-4@us-east-1","object":"model","created":1747933971,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"us"},{"api":"chat","id":"bedrock/claude-sonnet-4-5@us-east-1","object":"model","created":1747933971,"owned_by":"system","input_price":0.0000033,"caching_price":0.000004125,"cached_price":3e-7,"output_price":0.0000165,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"us"},{"api":"chat","id":"bedrock/claude-haiku-4-5@eu-central-1","object":"model","created":1761216302,"owned_by":"system","input_price":0.0000011,"caching_price":0.000001375,"cached_price":1.1e-7,"output_price":0.0000055,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic Haiku 4.5","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-3-7-sonnet@eu-west-1","object":"model","created":1718845200,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-opus-4-7@eu-central-1","object":"model","created":1776418380,"owned_by":"system","input_price":0.0000055,"caching_price":0.000006875,"cached_price":5.5e-7,"output_price":0.0000275,"max_output_tokens":128000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Claude Opus 4.7 is Anthropic's most capable generally available model, advancing performance across coding, enterprise workflows, and long-running agentic tasks. Coding: Claude Opus 4.7 is built for agentic coding at scale, excelling at long-horizon projects, complex implementations, and polished UI design. It handles the full lifecycle from architecture to deployment, including design-quality UI so senior engineers can delegate complex work with confidence. Enterprise workflows: Claude Opus 4.7 sets the standard for enterprise knowledge work, carrying context across sessions to manage complex, multi-day projects end-to-end. It delivers professional polish on the documents, spreadsheets, and presentations that move work forward.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-sonnet-4-5@us-west-2","object":"model","created":1747933971,"owned_by":"system","input_price":0.0000033,"caching_price":0.000004125,"cached_price":3e-7,"output_price":0.0000165,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"us"},{"api":"chat","id":"bedrock/claude-3-7-sonnet","object":"model","created":1718845200,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"global"},{"api":"chat","id":"bedrock/claude-3-7-sonnet@us-west-2","object":"model","created":1718845200,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"global"},{"api":"chat","id":"bedrock/claude-sonnet-4-5@eu-west-3","object":"model","created":1747933971,"owned_by":"system","input_price":0.0000033,"caching_price":0.000004125,"cached_price":3e-7,"output_price":0.0000165,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-haiku-4-5@eu-north-1","object":"model","created":1761216302,"owned_by":"system","input_price":0.0000011,"caching_price":0.000001375,"cached_price":1.1e-7,"output_price":0.0000055,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic Haiku 4.5","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-opus-4-5@us-east-1","object":"model","created":1764627436,"owned_by":"system","input_price":0.0000055,"caching_price":0.000006875,"cached_price":5.5e-7,"output_price":0.0000275,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Premium model combining maximum intelligence with practical performance","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"us"},{"api":"chat","id":"bedrock/kimi-k2.5@us-east-2","object":"model","created":1774966161,"owned_by":"system","input_price":6e-7,"caching_price":6e-7,"cached_price":6e-7,"output_price":0.000003,"max_output_tokens":16000,"context_window":128000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Kimi K2.5 on AWS","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"us"},{"api":"chat","id":"bedrock/claude-opus-4-7","object":"model","created":1776418380,"owned_by":"system","input_price":0.000005,"caching_price":0.00000625,"cached_price":5e-7,"output_price":0.000025,"max_output_tokens":128000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Claude Opus 4.7 is Anthropic's most capable generally available model, advancing performance across coding, enterprise workflows, and long-running agentic tasks. Coding: Claude Opus 4.7 is built for agentic coding at scale, excelling at long-horizon projects, complex implementations, and polished UI design. It handles the full lifecycle from architecture to deployment, including design-quality UI so senior engineers can delegate complex work with confidence. Enterprise workflows: Claude Opus 4.7 sets the standard for enterprise knowledge work, carrying context across sessions to manage complex, multi-day projects end-to-end. It delivers professional polish on the documents, spreadsheets, and presentations that move work forward.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"global"},{"api":"chat","id":"bedrock/claude-opus-4-5@eu-west-3","object":"model","created":1764627436,"owned_by":"system","input_price":0.0000055,"caching_price":0.000006875,"cached_price":5.5e-7,"output_price":0.0000275,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Premium model combining maximum intelligence with practical performance","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-haiku-4-5","object":"model","created":1761216302,"owned_by":"system","input_price":0.000001,"caching_price":0.00000125,"cached_price":1e-7,"output_price":0.000005,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic Haiku 4.5","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"global"},{"api":"chat","id":"bedrock/claude-haiku-4-5@us-west-2","object":"model","created":1761216302,"owned_by":"system","input_price":0.0000011,"caching_price":0.000001375,"cached_price":1.1e-7,"output_price":0.0000055,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic Haiku 4.5","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"us"},{"api":"chat","id":"bedrock/kimi-k2.5@eu-west-2","object":"model","created":1774966161,"owned_by":"system","input_price":7.2e-7,"caching_price":7.2e-7,"cached_price":7.2e-7,"output_price":0.0000036,"max_output_tokens":16000,"context_window":128000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Kimi K2.5 on AWS","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-sonnet-4@eu-central-1","object":"model","created":1747933971,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-sonnet-4-6@us-west-2","object":"model","created":1771498893,"owned_by":"system","input_price":0.0000033,"caching_price":0.000004125,"cached_price":3e-7,"output_price":0.0000165,"max_output_tokens":64000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Claude Sonnet 4.6 is Anthropic's best coding model in the world, leading on SWE-bench Verified (77.2%) and OSWorld (61.4%). It delivers sustained autonomous performance on complex tasks for over 30 hours—up from seven hours for Opus 4—maintaining focus and reliability throughout the entire software development lifecycle, with enhanced capabilities in tool handling, memory management, and context processing that make it the strongest model for building complex agents.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"us"},{"api":"chat","id":"bedrock/claude-sonnet-4-6@us-east-1","object":"model","created":1771498893,"owned_by":"system","input_price":0.0000033,"caching_price":0.000004125,"cached_price":3e-7,"output_price":0.0000165,"max_output_tokens":64000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Claude Sonnet 4.6 is Anthropic's best coding model in the world, leading on SWE-bench Verified (77.2%) and OSWorld (61.4%). It delivers sustained autonomous performance on complex tasks for over 30 hours—up from seven hours for Opus 4—maintaining focus and reliability throughout the entire software development lifecycle, with enhanced capabilities in tool handling, memory management, and context processing that make it the strongest model for building complex agents.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"us"},{"api":"chat","id":"bedrock/claude-sonnet-4-6@us-east-2","object":"model","created":1771498893,"owned_by":"system","input_price":0.0000033,"caching_price":0.000004125,"cached_price":3e-7,"output_price":0.0000165,"max_output_tokens":64000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Claude Sonnet 4.6 is Anthropic's best coding model in the world, leading on SWE-bench Verified (77.2%) and OSWorld (61.4%). It delivers sustained autonomous performance on complex tasks for over 30 hours—up from seven hours for Opus 4—maintaining focus and reliability throughout the entire software development lifecycle, with enhanced capabilities in tool handling, memory management, and context processing that make it the strongest model for building complex agents.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"us"},{"api":"chat","id":"bedrock/kimi-k2.5@us-east-1","object":"model","created":1774966161,"owned_by":"system","input_price":6e-7,"caching_price":6e-7,"cached_price":6e-7,"output_price":0.000003,"max_output_tokens":16000,"context_window":128000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Kimi K2.5 on AWS","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"us"},{"api":"chat","id":"bedrock/claude-haiku-4-5@us-east-1","object":"model","created":1761216302,"owned_by":"system","input_price":0.0000011,"caching_price":0.000001375,"cached_price":1.1e-7,"output_price":0.0000055,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic Haiku 4.5","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"us"},{"api":"chat","id":"bedrock/kimi-k2.5@eu-north-1","object":"model","created":1774966161,"owned_by":"system","input_price":7.2e-7,"caching_price":7.2e-7,"cached_price":7.2e-7,"output_price":0.0000036,"max_output_tokens":16000,"context_window":128000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Kimi K2.5 on AWS","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-opus-4-5","object":"model","created":1764627436,"owned_by":"system","input_price":0.000005,"caching_price":0.00000625,"cached_price":5e-7,"output_price":0.000025,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Premium model combining maximum intelligence with practical performance","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"global"},{"api":"chat","id":"bedrock/claude-3-7-sonnet@eu-north-1","object":"model","created":1718845200,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-sonnet-4@us-east-2","object":"model","created":1747933971,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"us"},{"api":"chat","id":"bedrock/claude-3-7-sonnet@us-east-1","object":"model","created":1718845200,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"global"},{"api":"chat","id":"bedrock/claude-3-7-sonnet@us-east-2","object":"model","created":1718845200,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"global"},{"api":"chat","id":"bedrock/claude-sonnet-4-5@eu-north-1","object":"model","created":1747933971,"owned_by":"system","input_price":0.0000033,"caching_price":0.000004125,"cached_price":3e-7,"output_price":0.0000165,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-sonnet-4-5@eu-west-1","object":"model","created":1747933971,"owned_by":"system","input_price":0.0000033,"caching_price":0.000004125,"cached_price":3e-7,"output_price":0.0000165,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"bedrock/claude-sonnet-4-5@us-east-2","object":"model","created":1747933971,"owned_by":"system","input_price":0.0000033,"caching_price":0.000004125,"cached_price":3e-7,"output_price":0.0000165,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"us"},{"api":"chat","id":"bedrock/claude-sonnet-4-5","object":"model","created":1747933971,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"global"},{"api":"chat","id":"bedrock/claude-sonnet-4-6","object":"model","created":1771498893,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Claude Sonnet 4.6 is Anthropic's best coding model in the world, leading on SWE-bench Verified (77.2%) and OSWorld (61.4%). It delivers sustained autonomous performance on complex tasks for over 30 hours—up from seven hours for Opus 4—maintaining focus and reliability throughout the entire software development lifecycle, with enhanced capabilities in tool handling, memory management, and context processing that make it the strongest model for building complex agents.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"global"},{"api":"chat","id":"bedrock/claude-opus-4-5@us-west-2","object":"model","created":1764627436,"owned_by":"system","input_price":0.0000055,"caching_price":0.000006875,"cached_price":5.5e-7,"output_price":0.0000275,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Premium model combining maximum intelligence with practical performance","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"us"},{"api":"chat","id":"bedrock/claude-3-7-sonnet@eu-west-3","object":"model","created":1718845200,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","privacy_comments":"https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html","geolocation":"eu"},{"api":"chat","id":"openai-responses/gpt-5.4-nano","object":"model","created":1773832106,"owned_by":"system","input_price":2e-7,"caching_price":0.00000125,"cached_price":2e-8,"output_price":0.00000125,"max_output_tokens":128000,"context_window":400000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.4 nano is the most lightweight and cost-efficient variant of the GPT-5.4 family, optimized for speed-critical and high-volume tasks. It supports text and image inputs and is designed for low-latency use cases such as classification, data extraction, ranking, and sub-agent execution.\n\nThe model prioritizes responsiveness and efficiency over deep reasoning, making it ideal for pipelines that require fast, reliable outputs at scale. GPT-5.4 nano is well suited for background tasks, real-time systems, and distributed agent architectures where minimizing cost and latency is essential.","data_retention":true,"privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"openai-responses/o3-pro","object":"model","created":1749601952,"owned_by":"system","input_price":0.00002,"caching_price":0.00002,"cached_price":0.00002,"output_price":0.00008,"max_output_tokens":100000,"context_window":200000,"supports_caching":false,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"The o3 series of models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user. The o1 reasoning model is designed to solve hard problems across domains. The knowledge cutoff for o1 and o1-mini models is October, 2023.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai-responses/gpt-4.1","object":"model","created":1744654985,"owned_by":"system","input_price":0.000002,"caching_price":0.000002,"cached_price":5e-7,"output_price":0.000008,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning. It supports a 1 million token context window and outperforms GPT-4o and GPT-4.5 across coding (54.6% SWE-bench Verified), instruction compliance (87.4% IFEval), and multimodal understanding benchmarks. It is tuned for precise code diffs, agent reliability, and high recall in large document contexts, making it ideal for agents, IDE tooling, and enterprise knowledge retrieval.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai-responses/gpt-5.2","object":"model","created":1765472906,"owned_by":"system","input_price":0.00000175,"caching_price":1.75e-7,"cached_price":1.75e-7,"output_price":0.000014,"max_output_tokens":128000,"context_window":400000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"The best model for coding and agentic tasks across industries","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai-responses/gpt-5-nano","object":"model","created":1754586455,"owned_by":"system","input_price":5e-8,"caching_price":5e-8,"cached_price":5e-9,"output_price":4e-7,"max_output_tokens":128000,"context_window":400000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5 nano is OpenAI's fastest, cheapest version of GPT-5. It's great for summarization and classification tasks.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai-responses/o4-mini","object":"model","created":1744824542,"owned_by":"system","input_price":0.0000011,"caching_price":0.0000011,"cached_price":2.75e-7,"output_price":0.0000044,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai-responses/gpt-5-codex","object":"model","created":1758711928,"owned_by":"system","input_price":0.00000125,"caching_price":0.00000125,"cached_price":1.25e-7,"output_price":0.00001,"max_output_tokens":128000,"context_window":400000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5-Codex is a version of GPT-5 optimized for agentic coding tasks in Codex or similar environments","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai-responses/o3-mini","object":"model","created":1738351721,"owned_by":"system","input_price":0.0000011,"caching_price":0.0000011,"cached_price":5.5e-7,"output_price":0.0000044,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai-responses/gpt-5.3-chat","object":"model","created":1773832106,"owned_by":"system","input_price":0.00000175,"caching_price":0.000014,"cached_price":1.75e-7,"output_price":0.000014,"max_output_tokens":16384,"context_window":128000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.3 Chat is an update to ChatGPT's most-used model that makes everyday conversations smoother, more useful, and more directly helpful. It delivers more accurate answers with better contextualization and significantly reduces unnecessary refusals, caveats, and overly cautious phrasing that can interrupt conversational flow.","data_retention":true,"privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"openai-responses/gpt-5","object":"model","created":1754586506,"owned_by":"system","input_price":0.00000125,"caching_price":0.00001,"cached_price":1.25e-7,"output_price":0.00001,"max_output_tokens":128000,"context_window":400000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5 is OpenAI's flagship model for coding, reasoning, and agentic tasks across domains.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai-responses/gpt-4.1-mini","object":"model","created":1744654981,"owned_by":"system","input_price":4e-7,"caching_price":4e-7,"cached_price":1e-7,"output_price":0.0000016,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-4.1 Mini is a mid-sized model delivering performance competitive with GPT-4o at substantially lower latency and cost. It retains a 1 million token context window and scores 45.1% on hard instruction evals, 35.8% on MultiChallenge, and 84.1% on IFEval. Mini also shows strong coding ability (e.g., 31.6% on Aider’s polyglot diff benchmark) and vision understanding, making it suitable for interactive applications with tight performance constraints.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai-responses/gpt-5.1-codex","object":"model","created":1764719016,"owned_by":"system","input_price":0.00000125,"caching_price":0.00000125,"cached_price":1.25e-7,"output_price":0.00001,"max_output_tokens":128000,"context_window":400000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.1-Codex is a version of GPT-5 optimized for agentic coding tasks in Codex or similar environments","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai-responses/gpt-5.2-codex","object":"model","created":1764719016,"owned_by":"system","input_price":0.00000175,"caching_price":0.00000175,"cached_price":1.75e-7,"output_price":0.000014,"max_output_tokens":128000,"context_window":400000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.2-Codex is a version of GPT-5.2 optimized for agentic coding tasks in Codex or similar environments","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai-responses/gpt-5.5-pro","object":"model","created":1777109483,"owned_by":"system","input_price":0.00003,"output_price":0.00018,"max_output_tokens":128000,"context_window":1050000,"supports_caching":false,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.5 Pro is OpenAI's high-capability model optimized for deep reasoning and accuracy on complex, high-stakes workloads. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs, and is designed for long-horizon problem solving, agentic coding, and precise execution across multi-step workflows.","data_retention":true,"privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"openai-responses/gpt-5-pro","object":"model","created":1759748728,"owned_by":"system","input_price":0.000015,"caching_price":0.00012,"cached_price":0.000015,"output_price":0.00012,"max_output_tokens":272000,"context_window":400000,"supports_caching":false,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5 Pro is OpenAI’s extended-reasoning tier of GPT-5, built to push reliability on hard problems, long tool chains, and agentic workflows. It keeps GPT-5’s multimodal skills and very large context (API page lists up to 400K tokens) while allocating more compute to think longer and plan better, improving code generation, math, and complex writing beyond standard GPT-5/“Thinking.” OpenAI positions Pro as the version that “uses extended reasoning for even more comprehensive and accurate answers,” targeting high-stakes tasks and enterprise use.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai-responses/gpt-5.1","object":"model","created":1771606800,"owned_by":"system","input_price":0.00000125,"caching_price":0.00001,"cached_price":1.25e-7,"output_price":0.00001,"max_output_tokens":128000,"context_window":400000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.1 is our flagship model for coding and agentic tasks with configurable reasoning and non-reasoning effort.","data_retention":true,"privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"openai-responses/gpt-5.3-codex","object":"model","created":1772010000,"owned_by":"system","input_price":0.00000175,"caching_price":0.000014,"cached_price":1.75e-7,"output_price":0.000014,"max_output_tokens":128000,"context_window":400000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.3-Codex is OpenAI’s most advanced agentic coding model, combining the frontier software engineering performance of GPT-5.2-Codex with the broader reasoning and professional knowledge capabilities of GPT-5.2.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai-responses/gpt-5.4-pro","object":"model","created":1765472906,"owned_by":"system","input_price":0.00003,"caching_price":0.00018,"cached_price":0.00003,"output_price":0.00018,"max_output_tokens":128000,"context_window":1050000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.4 Pro is OpenAI's most advanced model, building on GPT-5.4's unified architecture with enhanced reasoning capabilities for complex, high-stakes tasks. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs. Optimized for step-by-step reasoning, instruction following, and accuracy, GPT-5.4 Pro excels at agentic coding, long-context workflows, and multi-step problem solving.","data_retention":true,"privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"openai-responses/gpt-5.5","object":"model","created":1777109681,"owned_by":"system","input_price":0.000005,"cached_price":5e-7,"output_price":0.00003,"max_output_tokens":128000,"context_window":1050000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.5 is OpenAI's frontier model designed for complex professional workloads, building on GPT-5.4 with stronger reasoning, higher reliability, and improved token efficiency on hard tasks. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs, enabling large-scale reasoning, coding, and multimodal workflows within a single system.","data_retention":true,"privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"openai-responses/gpt-4.1-nano","object":"model","created":1744654969,"owned_by":"system","input_price":1e-7,"caching_price":1e-7,"cached_price":2.5e-8,"output_price":4e-7,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"For tasks that demand low latency, GPT‑4.1 nano is the fastest and cheapest model in the GPT-4.1 series. It delivers exceptional performance at a small size with its 1 million token context window, and scores 80.1% on MMLU, 50.3% on GPQA, and 9.8% on Aider polyglot coding – even higher than GPT‑4o mini. It’s ideal for tasks like classification or autocompletion.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai-responses/gpt-5-mini","object":"model","created":1754586319,"owned_by":"system","input_price":2.5e-7,"caching_price":2.5e-7,"cached_price":2.5e-8,"output_price":0.000002,"max_output_tokens":128000,"context_window":400000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5 mini is a faster, more cost-efficient version of GPT-5. It's great for well-defined tasks and precise prompts.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai-responses/gpt-5.4-mini","object":"model","created":1773832106,"owned_by":"system","input_price":7.5e-7,"caching_price":0.0000045,"cached_price":7.5e-8,"output_price":0.0000045,"max_output_tokens":128000,"context_window":400000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.4 mini brings the core capabilities of GPT-5.4 to a faster, more efficient model optimized for high-throughput workloads. It supports text and image inputs with strong performance across reasoning, coding, and tool use, while reducing latency and cost for large-scale deployments.\n\nThe model is designed for production environments that require a balance of capability and efficiency, making it well suited for chat applications, coding assistants, and agent workflows that operate at scale. GPT-5.4 mini delivers reliable instruction following, solid multi-step reasoning, and consistent performance across diverse tasks with improved cost efficiency.","data_retention":true,"privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"openai-responses/gpt-5.4","object":"model","created":1772799588,"owned_by":"system","input_price":0.0000025,"caching_price":0.0000025,"cached_price":2.5e-7,"output_price":0.000015,"max_output_tokens":128000,"context_window":1050000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.4 is OpenAI’s latest frontier model, unifying the Codex and GPT lines into a single system. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs, enabling high-context reasoning, coding, and multimodal analysis within the same workflow.\n\nThe model delivers improved performance in coding, document understanding, tool use, and instruction following. It is designed as a strong default for both general-purpose tasks and software engineering, capable of generating production-quality code, synthesizing information across multiple sources, and executing complex multi-step workflows with fewer iterations and greater token efficiency.\n","data_retention":true,"privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"minimaxi/MiniMax-M2.7","object":"model","created":1773915388,"owned_by":"system","input_price":3e-7,"caching_price":0.0000012,"cached_price":6e-8,"output_price":0.0000012,"max_output_tokens":128000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"MiniMax-M2.7 is a next-generation large language model designed for autonomous, real-world productivity and continuous improvement. Built to actively participate in its own evolution, M2.7 integrates advanced agentic capabilities through multi-agent collaboration, enabling it to plan, execute, and refine complex tasks across dynamic environments.\n\nTrained for production-grade performance, M2.7 handles workflows such as live debugging, root cause analysis, financial modeling, and full document generation across Word, Excel, and PowerPoint. It delivers strong results on benchmarks including 56.2% on SWE-Pro and 57.0% on Terminal Bench 2, while achieving a 1495 ELO on GDPval-AA, setting a new standard for multi-agent systems operating in real-world digital workflows.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"minimaxi/MiniMax-M2.5-highspeed","object":"model","created":1770891388,"owned_by":"system","input_price":6e-7,"caching_price":0.0000024,"cached_price":6e-8,"output_price":0.0000024,"max_output_tokens":128000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"MiniMax-M2.5 is a SOTA large language model designed for real-world productivity. Trained in a diverse range of complex real-world digital working environments, M2.5 builds upon the coding expertise of M2.1 to extend into general office work, reaching fluency in generating and operating Word, Excel, and Powerpoint files, context switching between diverse software environments, and working across different agent and human teams. Scoring 80.2% on SWE-Bench Verified, 51.3% on Multi-SWE-Bench, and 76.3% on BrowseComp, M2.5 is also more token efficient than previous generations, having been trained to optimize its actions and output through planning.","geolocation":"global"},{"api":"chat","id":"minimaxi/MiniMax-M2","object":"model","created":1761646588,"owned_by":"system","input_price":3e-7,"caching_price":0.0000012,"cached_price":3e-7,"output_price":0.0000012,"max_output_tokens":128000,"context_window":200000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"MiniMax-M2 is a compact, high-efficiency large language model optimized for end-to-end coding and agentic workflows. With 10 billion activated parameters (230 billion total), it delivers near-frontier intelligence across general reasoning, tool use, and multi-step task execution while maintaining low latency and deployment efficiency.","geolocation":"global"},{"api":"chat","id":"minimaxi/MiniMax-M2.7-highspeed","object":"model","created":1773915388,"owned_by":"system","input_price":6e-7,"caching_price":0.0000012,"cached_price":6e-8,"output_price":0.0000024,"max_output_tokens":128000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"MiniMax-M2.7 is a next-generation large language model designed for autonomous, real-world productivity and continuous improvement. Built to actively participate in its own evolution, M2.7 integrates advanced agentic capabilities through multi-agent collaboration, enabling it to plan, execute, and refine complex tasks across dynamic environments.\n\nTrained for production-grade performance, M2.7 handles workflows such as live debugging, root cause analysis, financial modeling, and full document generation across Word, Excel, and PowerPoint. It delivers strong results on benchmarks including 56.2% on SWE-Pro and 57.0% on Terminal Bench 2, while achieving a 1495 ELO on GDPval-AA, setting a new standard for multi-agent systems operating in real-world digital workflows.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"minimaxi/MiniMax-M2.5","object":"model","created":1770891388,"owned_by":"system","input_price":3e-7,"caching_price":0.0000012,"cached_price":6e-8,"output_price":0.0000012,"max_output_tokens":128000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"MiniMax-M2.5 is a SOTA large language model designed for real-world productivity. Trained in a diverse range of complex real-world digital working environments, M2.5 builds upon the coding expertise of M2.1 to extend into general office work, reaching fluency in generating and operating Word, Excel, and Powerpoint files, context switching between diverse software environments, and working across different agent and human teams. Scoring 80.2% on SWE-Bench Verified, 51.3% on Multi-SWE-Bench, and 76.3% on BrowseComp, M2.5 is also more token efficient than previous generations, having been trained to optimize its actions and output through planning.","geolocation":"global"},{"api":"chat","id":"together/meta-llama/LlamaGuard-2-8b","object":"model","created":1738263837,"owned_by":"system","input_price":2e-7,"caching_price":2e-7,"cached_price":2e-7,"output_price":2e-7,"max_output_tokens":0,"context_window":8192,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":false,"description":"A lightweight and ultra-fast variant of Llama 3.3 70B, for use when quick response times are needed most.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"together/deepseek-ai/DeepSeek-R1","object":"model","created":1748631837,"owned_by":"system","input_price":0.000003,"caching_price":0.000007,"cached_price":0.000003,"output_price":0.000007,"max_output_tokens":8192,"context_window":64000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":false,"description":"DeepSeek-R1-Distill-Qwen-7B is a 7 billion parameter dense language model distilled from DeepSeek-R1, leveraging reinforcement learning-enhanced reasoning data generated by DeepSeek's larger models. The distillation process transfers advanced reasoning, math, and code capabilities into a smaller, more efficient model architecture based on Qwen2.5-Math-7B. This model demonstrates strong performance across mathematical benchmarks (92.8% pass@1 on MATH-500), coding tasks (Codeforces rating 1189), and general reasoning (49.1% pass@1 on GPQA Diamond), achieving competitive accuracy relative to larger models while maintaining smaller inference costs.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo","object":"model","created":1738263837,"owned_by":"system","input_price":1.8e-7,"caching_price":1.8e-7,"cached_price":1.8e-7,"output_price":1.8e-7,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"A lightweight and ultra-fast variant of Llama 3.3 70B, for use when quick response times are needed most.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"together/meta-llama/Llama-3.3-70B-Instruct-Turbo","object":"model","created":1738263837,"owned_by":"system","input_price":8.8e-7,"caching_price":8.8e-7,"cached_price":8.8e-7,"output_price":8.8e-7,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"A lightweight and ultra-fast variant of Llama 3.3 70B, for use when quick response times are needed most.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"together/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo","object":"model","created":1738263837,"owned_by":"system","input_price":8.8e-7,"caching_price":8.8e-7,"cached_price":8.8e-7,"output_price":8.8e-7,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"A lightweight and ultra-fast variant of Llama 3.3 70B, for use when quick response times are needed most.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"together/Qwen/Qwen2.5-7B-Instruct-Turbo","object":"model","created":1745882204,"owned_by":"system","input_price":3e-7,"caching_price":3e-7,"cached_price":3e-7,"output_price":3e-7,"max_output_tokens":0,"context_window":32768,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"together/meta-llama/Meta-Llama-3-8B-Instruct-Lite","object":"model","created":1738263837,"owned_by":"system","input_price":1e-7,"caching_price":1e-7,"cached_price":1e-7,"output_price":1e-7,"max_output_tokens":0,"context_window":8192,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":false,"description":"A lightweight and ultra-fast variant of Llama 3.3 70B, for use when quick response times are needed most.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"together/Kimi K2.5","object":"model","created":1774953437,"owned_by":"system","input_price":5e-7,"output_price":0.0000028,"max_output_tokens":131072,"context_window":262100,"supports_caching":false,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Kimi K2.5 is an open-source, native multimodal agentic model built through continual pretraining on approximately 15 trillion mixed visual and text tokens atop Kimi-K2-Base. It seamlessly integrates vision and language understanding with advanced agentic capabilities, adopting a unified model architecture that combines vision and text, instant and thinking modes, as well as conversational and agentic paradigms.","geolocation":"global"},{"api":"chat","id":"together/meta-llama/Llama-3.2-3B-Instruct-Turbo","object":"model","created":1738263837,"owned_by":"system","input_price":6e-8,"caching_price":6e-8,"cached_price":6e-8,"output_price":6e-8,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"A lightweight and ultra-fast variant of Llama 3.3 70B, for use when quick response times are needed most.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"together/Qwen/Qwen2.5-72B-Instruct-Turbo","object":"model","created":1745882204,"owned_by":"system","input_price":0.0000012,"caching_price":0.0000012,"cached_price":0.0000012,"output_price":0.0000012,"max_output_tokens":0,"context_window":32768,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"together/deepseek-ai/DeepSeek-V3","object":"model","created":1748631837,"owned_by":"system","input_price":0.00000125,"caching_price":0.00000125,"cached_price":0.00000125,"output_price":0.00000125,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"DeepSeek-R1-Distill-Qwen-7B is a 7 billion parameter dense language model distilled from DeepSeek-R1, leveraging reinforcement learning-enhanced reasoning data generated by DeepSeek's larger models. The distillation process transfers advanced reasoning, math, and code capabilities into a smaller, more efficient model architecture based on Qwen2.5-Math-7B. This model demonstrates strong performance across mathematical benchmarks (92.8% pass@1 on MATH-500), coding tasks (Codeforces rating 1189), and general reasoning (49.1% pass@1 on GPQA Diamond), achieving competitive accuracy relative to larger models while maintaining smaller inference costs.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"groq/openai/gpt-oss-20b","object":"model","created":1754414204,"owned_by":"system","input_price":1e-7,"caching_price":5e-7,"cached_price":1e-7,"output_price":5e-7,"max_output_tokens":32768,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"","geolocation":"global"},{"api":"chat","id":"groq/openai/gpt-oss-120b","object":"model","created":1754414204,"owned_by":"system","input_price":1.5e-7,"caching_price":7.5e-7,"cached_price":1.5e-7,"output_price":7.5e-7,"max_output_tokens":32768,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"","geolocation":"global"},{"api":"chat","id":"fireworks/deepseek-v3.2","object":"model","created":1775122792,"owned_by":"system","input_price":5.6e-7,"cached_price":2.8e-7,"output_price":0.00000168,"max_output_tokens":20480,"context_window":163840,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Model from DeepSeek that harmonizes high computational efficiency with superior reasoning and agent performance.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"fireworks/kimi-k2.6","object":"model","created":1777027118,"owned_by":"system","input_price":9.5e-7,"cached_price":1.6e-7,"output_price":0.000004,"max_output_tokens":32768,"context_window":262144,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Kimi K2.6 is an open-source, native multimodal agentic model that advances practical capabilities in long-horizon coding, coding-driven design, proactive autonomous execution, and swarm-based task orchestration. It features a 1028B Mixture-of-Experts architecture and supports vision, function calling, and agentic paradigms.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"fireworks/deepseek-v4-pro","object":"model","created":1777470408,"owned_by":"system","input_price":0.00000174,"cached_price":1.5e-7,"output_price":0.00000348,"max_output_tokens":131072,"context_window":1000000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"DeepSeek V4 Pro is a flagship open source Mixture of Experts model designed for frontier reasoning, advanced coding, and long context intelligence at scale (up to 1M tokens). It introduces a hybrid attention architecture that dramatically improves long context efficiency while reducing KV and compute overhead, along with stability and training enhancements for deep multi step reasoning. It represents a top tier open source system for complex agentic workflows, high precision reasoning, and demanding production workloads.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"fireworks/glm-5.1","object":"model","created":1777470462,"owned_by":"system","input_price":0.0000014,"cached_price":2.6e-7,"output_price":0.0000044,"max_output_tokens":25344,"context_window":202000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GLM 5.1 is Z.ai's next generation flagship model built for agentic engineering, with stronger coding capabilities and sustained performance over long horizon tasks with hundreds of iteration rounds. It's a 754B parameter MoE model.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"fireworks/qwen3.6-plus","object":"model","created":1777476184,"owned_by":"system","input_price":5e-7,"cached_price":1e-7,"output_price":0.000003,"max_output_tokens":4000,"context_window":131072,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Qwen 3.6 Plus is Alibaba's latest flagship closed model, available exclusively through Fireworks AI outside of Alibaba's own infrastructure. Please contact Fireworks AI to get dedicated instances for Qwen 3.6 Plus.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"fireworks/minimax-m2.5","object":"model","created":1775122792,"owned_by":"system","input_price":3e-7,"cached_price":3e-8,"output_price":0.0000012,"max_output_tokens":24576,"context_window":196608,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"MiniMax M2.5 is built for state-of-the-art coding, agentic tool use, search, and office work, extensively trained with reinforcement learning across hundreds of thousands of real-world environments to plan like an architect and generalize across unfamiliar scaffolding and tools. It delivers significantly faster task completion, improved token efficiency, and exceptional cost-effectiveness, making it well-suited for production-scale agentic applications and complex, multi-step workflows.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"fireworks/glm-5","object":"model","created":1775122792,"owned_by":"system","input_price":0.000001,"cached_price":2e-7,"output_price":0.0000032,"max_output_tokens":25344,"context_window":202752,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GLM-5 is Z.ai's SOTA model targeting complex systems engineering and long-horizon agentic tasks. It uses a mixture of experts architecture, so it only activates 40 billion of its 744 billion parameters. This model uses Deepseek Sparse Attention to select only the most relevant tokens for attention, reducing the cost of long-context processing. GLM-5 continues improving on top of GLM-4.7 for coding and agentic use cases, and it's also great for document generation for enterprise workloads.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"fireworks/kimi-k2.5","object":"model","created":1774956928,"owned_by":"system","input_price":6e-7,"caching_price":0.000003,"cached_price":1e-7,"output_price":0.000003,"max_output_tokens":262144,"context_window":262144,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Kimi K2.5 is an open-source, native multimodal agentic model built through continual pretraining on approximately 15 trillion mixed visual and text tokens atop Kimi-K2-Base. It seamlessly integrates vision and language understanding with advanced agentic capabilities, instant and thinking modes, as well as conversational and agentic paradigms.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"fireworks/minimax-m2.7","object":"model","created":1777470703,"owned_by":"system","input_price":3e-7,"cached_price":6e-8,"output_price":0.0000012,"max_output_tokens":24576,"context_window":196608,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Mixture of Experts language model. M2.7 is capable of building complex agent harnesses and completing highly elaborate productivity tasks, leveraging Agent Teams, complex Skills, and dynamic tool search.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"mistral/mistral-large-latest","object":"model","created":1708792361,"owned_by":"system","input_price":5e-7,"caching_price":5e-7,"cached_price":5e-7,"output_price":0.0000015,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"","data_retention":true,"data_retention_days":30,"geolocation":"eu"},{"api":"chat","id":"mistral/mistral-medium-latest","object":"model","created":1723480361,"owned_by":"system","input_price":4e-7,"caching_price":0.000002,"cached_price":4e-7,"output_price":0.000002,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"","data_retention":true,"data_retention_days":30,"geolocation":"eu"},{"api":"chat","id":"mistral/devstral-medium-2507","object":"model","created":1723480361,"owned_by":"system","input_price":4e-7,"caching_price":0.000002,"cached_price":4e-7,"output_price":0.000002,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"","data_retention":true,"data_retention_days":30,"geolocation":"eu"},{"api":"chat","id":"mistral/devstral-small-latest","object":"model","created":1736964224,"owned_by":"system","input_price":1e-7,"caching_price":1e-7,"cached_price":1e-7,"output_price":3e-7,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"","data_retention":true,"data_retention_days":30,"geolocation":"eu"},{"api":"chat","id":"mistral/pixtral-large-latest","object":"model","created":1723480361,"owned_by":"system","input_price":0.000002,"caching_price":0.000005,"cached_price":0.000002,"output_price":0.000005,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"","data_retention":true,"data_retention_days":30,"geolocation":"eu"},{"api":"chat","id":"mistral/open-mistral-7b","object":"model","created":1708792361,"owned_by":"system","input_price":2.5e-7,"caching_price":2.5e-7,"cached_price":2.5e-7,"output_price":2.5e-7,"max_output_tokens":0,"context_window":32768,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"","data_retention":true,"data_retention_days":30,"geolocation":"eu"},{"api":"chat","id":"mistral/mistral-small-2503","object":"model","created":1708792361,"owned_by":"system","input_price":1e-7,"caching_price":1e-7,"cached_price":1e-7,"output_price":3e-7,"max_output_tokens":0,"context_window":32768,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"","data_retention":true,"data_retention_days":30,"geolocation":"eu"},{"api":"chat","id":"mistral/mistral-small-latest","object":"model","created":1773916361,"owned_by":"system","input_price":1.5e-7,"caching_price":1.5e-7,"cached_price":1.5e-7,"output_price":6e-7,"max_output_tokens":0,"context_window":256000,"supports_caching":false,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Mistral's powerful hybrid model unifying instruct, reasoning, and coding capabilities in a single model. 119B parameters with 6.5B active.","data_retention":true,"data_retention_days":30,"privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"mistral/mistral-small-2603","object":"model","created":1773916361,"owned_by":"system","input_price":1.5e-7,"caching_price":1.5e-7,"cached_price":1.5e-7,"output_price":6e-7,"max_output_tokens":0,"context_window":256000,"supports_caching":false,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Mistral's powerful hybrid model unifying instruct, reasoning, and coding capabilities in a single model. 119B parameters with 6.5B active.","data_retention":true,"data_retention_days":30,"privacy_comments":"N/A","geolocation":"eu"},{"api":"chat","id":"mistral/codestral-latest","object":"model","created":1723480361,"owned_by":"system","input_price":3e-7,"caching_price":9e-7,"cached_price":3e-7,"output_price":9e-7,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"","data_retention":true,"data_retention_days":30,"geolocation":"eu"},{"api":"chat","id":"mistral/devstral-latest","object":"model","created":1765324800,"owned_by":"system","input_price":4e-7,"caching_price":0.000002,"cached_price":4e-7,"output_price":0.000002,"max_output_tokens":0,"context_window":256000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"An enterprise grade text model, that excels at using tools to explore codebases, editing multiple files and power software engineering agents.","data_retention":true,"data_retention_days":30,"geolocation":"eu"},{"api":"chat","id":"mistral/devstral-small-2507","object":"model","created":1723480361,"owned_by":"system","input_price":1e-7,"caching_price":3e-7,"cached_price":1e-7,"output_price":3e-7,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"","data_retention":true,"data_retention_days":30,"geolocation":"eu"},{"api":"chat","id":"zai/GLM-5","object":"model","created":1770825600,"owned_by":"system","input_price":0.000001,"caching_price":0,"cached_price":2e-7,"output_price":0.0000032,"max_output_tokens":128000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GLM-5 is Zai’s new-generation flagship foundation model, designed for Agentic Engineering, capable of providing reliable productivity in complex system engineering and long-range Agent tasks. In terms of Coding and Agent capabilities, GLM-5 has achieved state-of-the-art (SOTA) performance in open source, with its usability in real programming scenarios approaching that of Claude Opus 4.5.","geolocation":"sg"},{"api":"chat","id":"zai/GLM-4.7","object":"model","created":1765880188,"owned_by":"system","input_price":6e-7,"caching_price":6e-7,"cached_price":1.1e-7,"output_price":0.0000022,"max_output_tokens":128000,"context_window":200000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GLM-4.7 is Z AI’s latest flagship model, designed to push agentic and coding performance further. It expands the context window from 128K to 200K tokens, improves reasoning and tool-use capabilities, and delivers stronger results in coding benchmarks and real-world development workflows. GLM-4.6 demonstrates refined writing quality, more capable agent behavior, and higher token efficiency (≈15% fewer tokens vs. GLM-4.5).\n\nEvaluations show clear gains over GLM-4.5 across reasoning, agents, and coding, reaching near parity with Claude Sonnet 4 in practical tasks while outperforming other open-source baselines. GLM-4.6 is available through the Z.ai API platform, OpenRouter, coding agents (Claude Code, Roo Code, Cline, Kilo Code), and soon as downloadable weights on HuggingFace and ModelScope.","geolocation":"sg"},{"api":"chat","id":"zai/GLM-5.1","object":"model","created":1775577600,"owned_by":"system","input_price":0.0000014,"caching_price":0.0000044,"cached_price":2.6e-7,"output_price":0.0000044,"max_output_tokens":128000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Compared with GLM-5, GLM-5.1 delivers significant improvements in coding, agentic tool usage, reasoning, role-play, and general chat quality. Besides, GLM-5.1 has outstanding capabilities in long-horizon agentic tasks like CUDA kernel optimization.","privacy_comments":"N/A","geolocation":"sg"},{"api":"chat","id":"zai/GLM-4.5","object":"model","created":1760609788,"owned_by":"system","input_price":6e-7,"caching_price":6e-7,"cached_price":1.1e-7,"output_price":0.0000022,"max_output_tokens":98304,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GLM-4.5 and GLM-4.5-Air are Z AI's latest flagship models, purpose-built as foundational models for agent-oriented applications. Both leverage a Mixture-of-Experts (MoE) architecture. GLM-4.5 has a total parameter count of 355B with 32B active parameters per forward pass, while GLM-4.5-Air adopts a more streamlined design with 106B total parameters and 12B active parameters.","geolocation":"sg"},{"api":"chat","id":"zai/GLM-4.6","object":"model","created":1760609788,"owned_by":"system","input_price":6e-7,"caching_price":6e-7,"cached_price":1.1e-7,"output_price":0.0000022,"max_output_tokens":128000,"context_window":200000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GLM-4.6 is Z AI’s latest flagship model, designed to push agentic and coding performance further. It expands the context window from 128K to 200K tokens, improves reasoning and tool-use capabilities, and delivers stronger results in coding benchmarks and real-world development workflows. GLM-4.6 demonstrates refined writing quality, more capable agent behavior, and higher token efficiency (≈15% fewer tokens vs. GLM-4.5).\n\nEvaluations show clear gains over GLM-4.5 across reasoning, agents, and coding, reaching near parity with Claude Sonnet 4 in practical tasks while outperforming other open-source baselines. GLM-4.6 is available through the Z.ai API platform, OpenRouter, coding agents (Claude Code, Roo Code, Cline, Kilo Code), and soon as downloadable weights on HuggingFace and ModelScope.","geolocation":"sg"},{"api":"chat","id":"novita/meta-llama/llama-3-70b-instruct","object":"model","created":1738865908,"owned_by":"system","input_price":5.1e-7,"caching_price":5.1e-7,"cached_price":5.1e-7,"output_price":7.4e-7,"max_output_tokens":0,"context_window":8192,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":false,"description":"Meta's latest class of model (Llama 3) launched with a variety of sizes \u0026 flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases. It has demonstrated strong performance compared to leading closed-source models in human evaluations.","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/deepseek/deepseek-v3-0324","object":"model","created":1748631837,"owned_by":"system","input_price":4e-7,"caching_price":4e-7,"cached_price":4e-7,"output_price":0.0000013,"max_output_tokens":0,"context_window":128000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"DeepSeek R1 is the latest open-source model released by the DeepSeek team, featuring impressive reasoning capabilities, particularly achieving performance comparable to OpenAI's o1 model in mathematics, coding, and reasoning tasks.","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/qwen/qwen-2.5-72b-instruct","object":"model","created":1745882204,"owned_by":"system","input_price":3.8e-7,"caching_price":3.8e-7,"cached_price":3.8e-7,"output_price":4e-7,"max_output_tokens":0,"context_window":32000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters.","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/mistralai/mistral-nemo","object":"model","created":1718037161,"owned_by":"system","input_price":1.7e-7,"caching_price":1.7e-7,"cached_price":1.7e-7,"output_price":1.7e-7,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":false,"description":"A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA. The model is multilingual, supporting English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese, Korean, Arabic, and Hindi. It supports function calling and is released under the Apache 2.0 license.","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/sao10k/l3-70b-euryale-v2.1","object":"model","created":1734535928,"owned_by":"system","input_price":0.00000148,"caching_price":0.00000148,"cached_price":0.00000148,"output_price":0.00000148,"max_output_tokens":0,"context_window":16000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":false,"description":"The uncensored llama3 model is a powerhouse of creativity, excelling in both roleplay and story writing. It offers a liberating experience during roleplays, free from any restrictions. This model stands out for its immense creativity, boasting a vast array of unique ideas and plots, truly a treasure trove for those seeking originality. Its unrestricted nature during roleplays allows for the full breadth of imagination to unfold, akin to an enhanced, big-brained version of Stheno. Perfect for creative minds seeking a boundless platform for their imaginative expressions, the uncensored llama3 model is an ideal choice","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/zai-org/glm-4.6","object":"model","created":1753899508,"owned_by":"system","input_price":6e-7,"caching_price":0.0000022,"cached_price":6e-7,"output_price":0.0000022,"max_output_tokens":131072,"context_window":204800,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GLM-4.6 is Z AI’s latest flagship model, designed to push agentic and coding performance further. It expands the context window from 128K to 200K tokens, improves reasoning and tool-use capabilities, and delivers stronger results in coding benchmarks and real-world development workflows. GLM-4.6 demonstrates refined writing quality, more capable agent behavior, and higher token efficiency (≈15% fewer tokens vs. GLM-4.5).\n\nEvaluations show clear gains over GLM-4.5 across reasoning, agents, and coding, reaching near parity with Claude Sonnet 4 in practical tasks while outperforming other open-source baselines. GLM-4.6 is available through the Z.ai API platform, OpenRouter, coding agents (Claude Code, Roo Code, Cline, Kilo Code), and soon as downloadable weights on HuggingFace and ModelScope.","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/deepseek/deepseek-prover-v2-671b","object":"model","created":1738263837,"owned_by":"system","input_price":7e-7,"caching_price":7e-7,"cached_price":7e-7,"output_price":0.0000025,"max_output_tokens":0,"context_window":160000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":false,"description":"DeepSeek-R1-Distill-Qwen-7B is a 7 billion parameter dense language model distilled from DeepSeek-R1, leveraging reinforcement learning-enhanced reasoning data generated by DeepSeek's larger models. The distillation process transfers advanced reasoning, math, and code capabilities into a smaller, more efficient model architecture based on Qwen2.5-Math-7B. This model demonstrates strong performance across mathematical benchmarks (92.8% pass@1 on MATH-500), coding tasks (Codeforces rating 1189), and general reasoning (49.1% pass@1 on GPQA Diamond), achieving competitive accuracy relative to larger models while maintaining smaller inference costs.","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/nousresearch/hermes-2-pro-llama-3-8b","object":"model","created":1738865908,"owned_by":"system","input_price":1.4e-7,"caching_price":1.4e-7,"cached_price":1.4e-7,"output_price":1.4e-7,"max_output_tokens":0,"context_window":8192,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":false,"description":"Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house.","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/deepseek/deepseek-r1-distill-qwen-14b","object":"model","created":1738263837,"owned_by":"system","input_price":1.5e-7,"caching_price":1.5e-7,"cached_price":1.5e-7,"output_price":1.5e-7,"max_output_tokens":0,"context_window":128000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"DeepSeek R1 Distill Qwen 14B is a distilled large language model based on Qwen 2.5 14B, using outputs from DeepSeek R1. It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\n\nOther benchmark results include:\n\nAIME 2024 pass@1: 69.7\nMATH-500 pass@1: 93.9\nCodeForces Rating: 1481\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/qwen/qwen3.5-397b-a17b","object":"model","created":1764371804,"owned_by":"system","input_price":6e-7,"caching_price":0.0000036,"cached_price":6e-7,"output_price":0.0000036,"max_output_tokens":65536,"context_window":262144,"supports_caching":false,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"The Qwen3.5 series 397B-A17B native vision-language model is based on a hybrid architecture design that integrates linear attention mechanisms with sparse Mixture-of-Experts (MoE), achieving higher inference efficiency. Across a variety of tasks—including language understanding, logical reasoning, code generation, agentic tasks, image understanding, video understanding, and graphical user interface (GUI) interaction—it demonstrates exceptional performance comparable to current top-tier frontier models. Possessing robust code generation and agentic capabilities, it exhibits strong generalization across various agent scenarios.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"novita/deepseek/deepseek-r1-distill-qwen-32b","object":"model","created":1738263837,"owned_by":"system","input_price":3e-7,"caching_price":3e-7,"cached_price":3e-7,"output_price":3e-7,"max_output_tokens":0,"context_window":12800,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"DeepSeek R1 Distill Qwen 32B is a distilled large language model based on Qwen 2.5 32B, using outputs from DeepSeek R1. It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\n\nOther benchmark results include:\nAIME 2024 pass@1: 72.6\nMATH-500 pass@1: 94.3\nCodeForces Rating: 1691\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/meta-llama/llama-3.2-1b-instruct","object":"model","created":1738865908,"owned_by":"system","input_price":2e-8,"caching_price":2e-8,"cached_price":2e-8,"output_price":2e-8,"max_output_tokens":0,"context_window":131000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":false,"description":"The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 1B and 3B sizes (text in/text out).","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/deepseek/deepseek-v3.2","object":"model","created":1772283132,"owned_by":"system","input_price":2.69e-7,"caching_price":2.69e-7,"cached_price":1.345e-7,"output_price":4e-7,"max_output_tokens":65536,"context_window":163840,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"DeepSeek-V3.2 is a next-generation foundation model designed to unify high computational efficiency with state-of-the-art reasoning and agentic performance. Built upon DeepSeek Sparse Attention (DSA) for efficient long-context reasoning, a scalable reinforcement learning framework reaching frontier-level performance, and a large-scale agentic task synthesis pipeline for reliable tool-use and multi-step decision-making.","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/sao10k/l3-8b-lunaris","object":"model","created":1734535928,"owned_by":"system","input_price":5e-8,"caching_price":5e-8,"cached_price":5e-8,"output_price":5e-8,"max_output_tokens":0,"context_window":8192,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":false,"description":"A generalist / roleplaying model merge based on Llama 3.","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/deepseek/deepseek-r1-distill-llama-70b","object":"model","created":1738263837,"owned_by":"system","input_price":8e-7,"caching_price":8e-7,"cached_price":8e-7,"output_price":8e-7,"max_output_tokens":0,"context_window":32000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":false,"description":"DeepSeek R1 Distill LLama 70B","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/Sao10K/L3-8B-Stheno-v3.2","object":"model","created":1734535928,"owned_by":"system","input_price":5e-8,"caching_price":5e-8,"cached_price":5e-8,"output_price":5e-8,"max_output_tokens":0,"context_window":8192,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":false,"description":"Sao10K/L3-8B-Stheno-v3.2 is a highly skilled actor that excels at fully immersing itself in any role assigned.","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/qwen/qwen2.5-vl-72b-instruct","object":"model","created":1745882204,"owned_by":"system","input_price":8e-7,"caching_price":8e-7,"cached_price":8e-7,"output_price":8e-7,"max_output_tokens":0,"context_window":96000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":false,"description":"Qwen2 VL 72B is a multimodal LLM from the Qwen Team with the following key enhancements:\n\nSoTA understanding of images of various resolution \u0026 ratio: Qwen2-VL achieves state-of-the-art performance on visual understanding benchmarks, including MathVista, DocVQA, RealWorldQA, MTVQA, etc.\n\nUnderstanding videos of 20min+: Qwen2-VL can understand videos over 20 minutes for high-quality video-based question answering, dialog, content creation, etc.\n\nAgent that can operate your mobiles, robots, etc.: with the abilities of complex reasoning and decision making, Qwen2-VL can be integrated with devices like mobile phones, robots, etc., for automatic operation based on visual environment and text instructions.\n\nMultilingual Support: to serve global users, besides English and Chinese, Qwen2-VL now supports the understanding of texts in different languages inside images, including most European languages, Japanese, Korean, Arabic, Vietnamese, etc.","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/meta-llama/llama-3.1-8b-instruct","object":"model","created":1738865908,"owned_by":"system","input_price":5e-8,"caching_price":5e-8,"cached_price":5e-8,"output_price":5e-8,"max_output_tokens":0,"context_window":16384,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":false,"description":"Meta's latest class of models, Llama 3.1, launched with a variety of sizes and configurations. The 8B instruct-tuned version is particularly fast and efficient. It has demonstrated strong performance in human evaluations, outperforming several leading closed-source models.","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/GLM-5","object":"model","created":1774005451,"owned_by":"system","input_price":0.000001,"cached_price":2e-7,"output_price":0.0000032,"max_output_tokens":131072,"context_window":202800,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GLM-5 is an open-source foundation model engineered for complex system engineering and long-horizon Agent tasks, delivering reliable productivity for top-tier programmers. Transcending the boundary from \"writing code\" to \"building systems,\" it moves beyond traditional snippet generation to offer senior-architect-level planning and execution capabilities. By rejecting the \"frontend-heavy, logic-light\" approach, GLM-5 demonstrates exceptional reasoning and self-healing abilities in backend refactoring, complex algorithm implementation, and deep debugging—autonomously analyzing logs and iteratively fixing persistent bugs until the system runs. As the first open-source model featuring Opus-class style and system engineering depth, GLM-5 provides extreme logic density alongside the freedom of local deployment and high cost-effectiveness, making it the ideal choice for large-scale backend development and automated Agent construction.","geolocation":"global"},{"api":"chat","id":"novita/sao10k/l31-70b-euryale-v2.2","object":"model","created":1734535928,"owned_by":"system","input_price":0.00000148,"caching_price":0.00000148,"cached_price":0.00000148,"output_price":0.00000148,"max_output_tokens":0,"context_window":16000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":false,"description":"Euryale L3.1 70B v2.2 is a model focused on creative roleplay from Sao10k. It is the successor of Euryale L3 70B v2.1.","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/deepseek/deepseek-v3-turbo","object":"model","created":1748631837,"owned_by":"system","input_price":4e-7,"caching_price":4e-7,"cached_price":4e-7,"output_price":0.0000013,"max_output_tokens":0,"context_window":128000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"DeepSeek R1 is the latest open-source model released by the DeepSeek team, featuring impressive reasoning capabilities, particularly achieving performance comparable to OpenAI's o1 model in mathematics, coding, and reasoning tasks.","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/meta-llama/llama-3.2-3b-instruct","object":"model","created":1738865908,"owned_by":"system","input_price":3e-8,"caching_price":3e-8,"cached_price":3e-8,"output_price":5e-8,"max_output_tokens":0,"context_window":32768,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 1B and 3B sizes (text in/text out)","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/meta-llama/llama-3-8b-instruct","object":"model","created":1738865908,"owned_by":"system","input_price":4e-8,"caching_price":4e-8,"cached_price":4e-8,"output_price":4e-8,"max_output_tokens":0,"context_window":8192,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":false,"description":"Meta's latest class of model (Llama 3) launched with a variety of sizes \u0026 flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases. It has demonstrated strong performance compared to leading closed-source models in human evaluations.","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/qwen/qwen3-235b-a22b-fp8","object":"model","created":1745882204,"owned_by":"system","input_price":2e-7,"caching_price":2e-7,"cached_price":2e-7,"output_price":8e-7,"max_output_tokens":0,"context_window":128000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":false,"description":"Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/meta-llama/llama-4-maverick-17b-128e-instruct-fp8","object":"model","created":1738263837,"owned_by":"system","input_price":2e-7,"caching_price":2e-7,"cached_price":2e-7,"output_price":8.5e-7,"max_output_tokens":1048576,"context_window":1048576,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":false,"description":"A lightweight and ultra-fast variant of Llama 3.3 70B, for use when quick response times are needed most.","geolocation":"global"},{"api":"chat","id":"novita/deepseek/deepseek-r1","object":"model","created":1738263837,"owned_by":"system","input_price":0.000004,"caching_price":0.000004,"cached_price":0.000004,"output_price":0.000004,"max_output_tokens":0,"context_window":64000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"DeepSeek R1 is the latest open-source model released by the DeepSeek team, featuring impressive reasoning capabilities, particularly achieving performance comparable to OpenAI's o1 model in mathematics, coding, and reasoning tasks.","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/gryphe/mythomax-l2-13b","object":"model","created":1688259600,"owned_by":"system","input_price":9e-8,"caching_price":9e-8,"cached_price":9e-8,"output_price":9e-8,"max_output_tokens":0,"context_window":4096,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"The idea behind this merge is that each layer is composed of several tensors, which are in turn responsible for specific functions. Using MythoLogic-L2's robust understanding as its input and Huginn's extensive writing capability as its output seems to have resulted in a model that exceeds at both, confirming my theory. (More details to be released at a later time).","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/meta-llama/llama-3.3-70b-instruct","object":"model","created":1738865908,"owned_by":"system","input_price":3.9e-7,"caching_price":3.9e-7,"cached_price":3.9e-7,"output_price":3.9e-7,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/microsoft/wizardlm-2-8x22b","object":"model","created":1738865908,"owned_by":"system","input_price":6.2e-7,"caching_price":6.2e-7,"cached_price":6.2e-7,"output_price":6.2e-7,"max_output_tokens":0,"context_window":65535,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":false,"description":"WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model. It demonstrates highly competitive performance compared to leading proprietary models, and it consistently outperforms all existing state-of-the-art opensource models.","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/deepseek/deepseek-r1-turbo","object":"model","created":1738263837,"owned_by":"system","input_price":7e-7,"caching_price":0.0000025,"cached_price":7e-7,"output_price":0.0000025,"max_output_tokens":0,"context_window":64000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"DeepSeek R1 is the latest open-source model released by the DeepSeek team, featuring impressive reasoning capabilities, particularly achieving performance comparable to OpenAI's o1 model in mathematics, coding, and reasoning tasks.","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/zai-org/glm-4.5","object":"model","created":1753899508,"owned_by":"system","input_price":6e-7,"caching_price":0.0000022,"cached_price":6e-7,"output_price":0.0000022,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/deepseek/deepseek_v3","object":"model","created":1748631837,"owned_by":"system","input_price":8.9e-7,"caching_price":8.9e-7,"cached_price":8.9e-7,"output_price":8.9e-7,"max_output_tokens":0,"context_window":64000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"DeepSeek-V3 is the latest model from the DeepSeek team, building upon the instruction following and coding abilities of the previous versions. Pre-trained on nearly 15 trillion tokens, the reported evaluations reveal that the model outperforms other open-source models and rivals leading closed-source models.","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"novita/minimax/minimax-m2.7","object":"model","created":1773915388,"owned_by":"system","input_price":3e-7,"caching_price":0.0000012,"cached_price":6e-8,"output_price":0.0000012,"max_output_tokens":128000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"MiniMax-M2.7 is a next-generation large language model designed for autonomous, real-world productivity and continuous improvement. Built to actively participate in its own evolution, M2.7 integrates advanced agentic capabilities through multi-agent collaboration, enabling it to plan, execute, and refine complex tasks across dynamic environments.\n\nTrained for production-grade performance, M2.7 handles workflows such as live debugging, root cause analysis, financial modeling, and full document generation across Word, Excel, and PowerPoint. It delivers strong results on benchmarks including 56.2% on SWE-Pro and 57.0% on Terminal Bench 2, while achieving a 1495 ELO on GDPval-AA, setting a new standard for multi-agent systems operating in real-world digital workflows.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"novita/moonshotai/kimi-k2-instruct","object":"model","created":1752257908,"owned_by":"system","input_price":5.7e-7,"caching_price":5.7e-7,"cached_price":5.7e-7,"output_price":0.0000023,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"","privacy_comments":"NaN","geolocation":"global"},{"api":"chat","id":"deepseek/deepseek-chat","object":"model","created":1738263837,"owned_by":"system","input_price":1.4e-7,"caching_price":1.4e-7,"cached_price":2.8e-8,"output_price":2.8e-7,"max_output_tokens":384000,"context_window":1000000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"An alias for DeepSeek v4 Flash in non-thinking mode.","data_retention":true,"privacy_comments":"No online information","geolocation":"global"},{"api":"chat","id":"deepseek/deepseek-reasoner","object":"model","created":1738263837,"owned_by":"system","input_price":1.4e-7,"caching_price":1.4e-7,"cached_price":2.8e-8,"output_price":2.8e-7,"max_output_tokens":384000,"context_window":1000000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"An alias for DeepSeek v4 Flash in thinking mode.","data_retention":true,"privacy_comments":"No online information","geolocation":"global"},{"api":"chat","id":"deepseek/deepseek-v4-flash","object":"model","created":1777029550,"owned_by":"system","input_price":1.4e-7,"caching_price":1.4e-7,"cached_price":2.8e-8,"output_price":2.8e-7,"max_output_tokens":384000,"context_window":1000000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"DeepSeek v4 Flash - 284B total / 13B active params. Your fast, efficient, and economical choice.","data_retention":true,"privacy_comments":"No online information","geolocation":"global"},{"api":"chat","id":"deepseek/deepseek-v4-pro","object":"model","created":1777029550,"owned_by":"system","input_price":0.00000174,"caching_price":0.00000174,"cached_price":1.45e-7,"output_price":0.00000348,"max_output_tokens":384000,"context_window":1000000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"DeepSeek v4 Pro - 1.6T total / 49B active params. Performance rivaling the world's top closed-source models.","data_retention":true,"privacy_comments":"No online information","geolocation":"global"},{"api":"chat","id":"coding/gemini-2.5-flash@europe-west1","object":"model","created":1747765524,"owned_by":"system","input_price":3e-7,"caching_price":5.5e-7,"cached_price":7.5e-8,"output_price":0.0000025,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's first hybrid reasoning model which supports a 1M token context window and has thinking budgets. Most balanced Gemini model, optimized for low latency use cases.","geolocation":"global"},{"api":"chat","id":"coding/gemini-2.5-pro","object":"model","created":1746582113,"owned_by":"system","input_price":0.00000125,"caching_price":0.000002375,"cached_price":3.1e-7,"output_price":0.00001,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.","geolocation":"global"},{"api":"chat","id":"coding/gemini-2.5-pro@europe-west1","object":"model","created":1746582113,"owned_by":"system","input_price":0.00000125,"caching_price":0.000002375,"cached_price":3.1e-7,"output_price":0.00001,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.","geolocation":"global"},{"api":"chat","id":"coding/gemini-2.5-pro@europe-west4","object":"model","created":1746582113,"owned_by":"system","input_price":0.00000125,"caching_price":0.000002375,"cached_price":3.1e-7,"output_price":0.00001,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.","geolocation":"global"},{"api":"chat","id":"coding/gemini-2.5-flash@us-central1","object":"model","created":1747765524,"owned_by":"system","input_price":3e-7,"caching_price":5.5e-7,"cached_price":7.5e-8,"output_price":0.0000025,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's first hybrid reasoning model which supports a 1M token context window and has thinking budgets. Most balanced Gemini model, optimized for low latency use cases.","geolocation":"global"},{"api":"chat","id":"coding/claude-sonnet-4-20250514","object":"model","created":1747933971,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Claude Sonnet 4 significantly improves on Sonnet 3.7's industry-leading capabilities, excelling in coding with a state-of-the-art 72.7% on SWE-bench. The model balances performance and efficiency for internal and external use cases, with enhanced steerability for greater control over implementations.","data_retention":true,"data_retention_days":30,"geolocation":"global"},{"api":"chat","id":"coding/gemini-2.5-pro@us-west1","object":"model","created":1746582113,"owned_by":"system","input_price":0.00000125,"caching_price":0.000002375,"cached_price":3.1e-7,"output_price":0.00001,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.","geolocation":"global"},{"api":"chat","id":"coding/gemini-2.5-pro@europe-north1","object":"model","created":1746582113,"owned_by":"system","input_price":0.00000125,"caching_price":0.000002375,"cached_price":3.1e-7,"output_price":0.00001,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.","geolocation":"global"},{"api":"chat","id":"coding/gemini-2.5-pro@europe-west8","object":"model","created":1746582113,"owned_by":"system","input_price":0.00000125,"caching_price":0.000002375,"cached_price":3.1e-7,"output_price":0.00001,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.","geolocation":"global"},{"api":"chat","id":"coding/gemini-2.5-flash@us-east5","object":"model","created":1747765524,"owned_by":"system","input_price":3e-7,"caching_price":5.5e-7,"cached_price":7.5e-8,"output_price":0.0000025,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's first hybrid reasoning model which supports a 1M token context window and has thinking budgets. Most balanced Gemini model, optimized for low latency use cases.","geolocation":"global"},{"api":"chat","id":"coding/gemini-2.5-flash@us-south1","object":"model","created":1747765524,"owned_by":"system","input_price":3e-7,"caching_price":5.5e-7,"cached_price":7.5e-8,"output_price":0.0000025,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's first hybrid reasoning model which supports a 1M token context window and has thinking budgets. Most balanced Gemini model, optimized for low latency use cases.","geolocation":"global"},{"api":"chat","id":"coding/gemini-2.5-flash@europe-central2","object":"model","created":1747765524,"owned_by":"system","input_price":3e-7,"caching_price":5.5e-7,"cached_price":7.5e-8,"output_price":0.0000025,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's first hybrid reasoning model which supports a 1M token context window and has thinking budgets. Most balanced Gemini model, optimized for low latency use cases.","geolocation":"global"},{"api":"chat","id":"coding/gemini-2.5-pro@us-central1","object":"model","created":1746582113,"owned_by":"system","input_price":0.00000125,"caching_price":0.000002375,"cached_price":3.1e-7,"output_price":0.00001,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.","geolocation":"global"},{"api":"chat","id":"coding/gemini-2.5-pro@us-east1","object":"model","created":1746582113,"owned_by":"system","input_price":0.00000125,"caching_price":0.000002375,"cached_price":3.1e-7,"output_price":0.00001,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.","geolocation":"global"},{"api":"chat","id":"coding/gemini-2.5-pro@us-east5","object":"model","created":1746582113,"owned_by":"system","input_price":0.00000125,"caching_price":0.000002375,"cached_price":3.1e-7,"output_price":0.00001,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.","geolocation":"global"},{"api":"chat","id":"coding/gemini-2.5-flash","object":"model","created":1747765524,"owned_by":"system","input_price":3e-7,"caching_price":5.5e-7,"cached_price":7.5e-8,"output_price":0.0000025,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's first hybrid reasoning model which supports a 1M token context window and has thinking budgets. Most balanced Gemini model, optimized for low latency use cases.","geolocation":"global"},{"api":"chat","id":"coding/gemini-2.5-flash@europe-west4","object":"model","created":1747765524,"owned_by":"system","input_price":3e-7,"caching_price":5.5e-7,"cached_price":7.5e-8,"output_price":0.0000025,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's first hybrid reasoning model which supports a 1M token context window and has thinking budgets. Most balanced Gemini model, optimized for low latency use cases.","geolocation":"global"},{"api":"chat","id":"coding/gemini-2.5-flash@europe-west8","object":"model","created":1747765524,"owned_by":"system","input_price":3e-7,"caching_price":5.5e-7,"cached_price":7.5e-8,"output_price":0.0000025,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's first hybrid reasoning model which supports a 1M token context window and has thinking budgets. Most balanced Gemini model, optimized for low latency use cases.","geolocation":"global"},{"api":"chat","id":"coding/claude-opus-4-20250514","object":"model","created":1747934845,"owned_by":"system","input_price":0.000015,"caching_price":0.00001875,"cached_price":0.0000015,"output_price":0.000075,"max_output_tokens":32000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Claude Opus 4 is Anthropic's most powerful model yet and the best coding model in the world, leading on SWE-bench (72.5%) and Terminal-bench (43.2%). It delivers sustained performance on long-running tasks that require focused effort and thousands of steps, with the ability to work continuously for several hours—dramatically outperforming all Sonnet models and significantly expanding what AI agents can accomplish.","data_retention":true,"data_retention_days":30,"geolocation":"global"},{"api":"chat","id":"coding/gemini-2.5-pro@us-south1","object":"model","created":1746582113,"owned_by":"system","input_price":0.00000125,"caching_price":0.000002375,"cached_price":3.1e-7,"output_price":0.00001,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.","geolocation":"global"},{"api":"chat","id":"coding/gemini-2.5-pro@europe-central2","object":"model","created":1746582113,"owned_by":"system","input_price":0.00000125,"caching_price":0.000002375,"cached_price":3.1e-7,"output_price":0.00001,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.","geolocation":"global"},{"api":"chat","id":"coding/gemini-2.5-flash@us-east1","object":"model","created":1747765524,"owned_by":"system","input_price":3e-7,"caching_price":5.5e-7,"cached_price":7.5e-8,"output_price":0.0000025,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's first hybrid reasoning model which supports a 1M token context window and has thinking budgets. Most balanced Gemini model, optimized for low latency use cases.","geolocation":"global"},{"api":"chat","id":"coding/gemini-2.5-flash@us-west1","object":"model","created":1747765524,"owned_by":"system","input_price":3e-7,"caching_price":5.5e-7,"cached_price":7.5e-8,"output_price":0.0000025,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's first hybrid reasoning model which supports a 1M token context window and has thinking budgets. Most balanced Gemini model, optimized for low latency use cases.","geolocation":"global"},{"api":"chat","id":"coding/gemini-2.5-flash@europe-north1","object":"model","created":1747765524,"owned_by":"system","input_price":3e-7,"caching_price":5.5e-7,"cached_price":7.5e-8,"output_price":0.0000025,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's first hybrid reasoning model which supports a 1M token context window and has thinking budgets. Most balanced Gemini model, optimized for low latency use cases.","geolocation":"global"},{"api":"chat","id":"perplexity/sonar-reasoning-pro","object":"model","created":1741313308,"owned_by":"system","input_price":0.000002,"caching_price":0.000002,"cached_price":0.000002,"output_price":0.000008,"max_output_tokens":8192,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":false,"description":"Premier reasoning offering powered by DeepSeek R1 with Chain of Thought (CoT).","geolocation":"global"},{"api":"chat","id":"perplexity/sonar","object":"model","created":1741313308,"owned_by":"system","input_price":0.000001,"caching_price":0.000001,"cached_price":0.000001,"output_price":0.000001,"max_output_tokens":8192,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":false,"description":"Lightweight offering with search grounding, quicker and cheaper than Sonar Pro.","geolocation":"global"},{"api":"chat","id":"perplexity/sonar-pro","object":"model","created":1741313308,"owned_by":"system","input_price":0.000003,"caching_price":0.000003,"cached_price":0.000003,"output_price":0.000015,"max_output_tokens":8192,"context_window":204800,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":false,"description":"Premier search offering with search grounding, supporting advanced queries and follow-ups.","geolocation":"global"},{"api":"chat","id":"parasail/Kimi K2.5","object":"model","created":1774953955,"owned_by":"system","input_price":6e-7,"output_price":0.0000028,"max_output_tokens":131072,"context_window":262144,"supports_caching":false,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Kimi K2.5 is an open-source, native multimodal agentic model built through continual pretraining on approximately 15 trillion mixed visual and text tokens atop Kimi-K2-Base. It seamlessly integrates vision and language understanding with advanced agentic capabilities, instant and thinking modes, as well as conversational and agentic paradigms.","geolocation":"global"},{"api":"chat","id":"parasail/parasail-kimi-k2-instruct","object":"model","created":1755534220,"owned_by":"system","input_price":9.9e-7,"caching_price":0.00000299,"cached_price":9.9e-7,"output_price":0.00000299,"max_output_tokens":16384,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"","geolocation":"global"},{"api":"chat","id":"parasail/parasail-qwen25-vl-72b-instruct","object":"model","created":1738865908,"owned_by":"system","input_price":7e-7,"caching_price":7e-7,"cached_price":7e-7,"output_price":7e-7,"max_output_tokens":8192,"context_window":32768,"supports_caching":false,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"","geolocation":"global"},{"api":"chat","id":"parasail/parasail-qwen3-235b-a22b-instruct-2507","object":"model","created":1753115020,"owned_by":"system","input_price":1.5e-7,"caching_price":8.5e-7,"cached_price":1.5e-7,"output_price":8.5e-7,"max_output_tokens":8192,"context_window":262144,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"","geolocation":"global"},{"api":"chat","id":"parasail/parasail-gemma3-27b-it","object":"model","created":1741963556,"owned_by":"system","input_price":3e-7,"caching_price":5e-7,"cached_price":3e-7,"output_price":5e-7,"max_output_tokens":8192,"context_window":128000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemma 3 1B is the smallest of the new Gemma 3 family. It handles context windows up to 32k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Note: Gemma 3 1B is not multimodal. For the smallest multimodal Gemma 3 model, please see [Gemma 3 4B](google/gemma-3-4b-it)","geolocation":"global"},{"api":"chat","id":"openai/gpt-5.4-pro","object":"model","created":1772745489,"owned_by":"system","input_price":0.00003,"output_price":0.00018,"max_output_tokens":128000,"context_window":1050000,"supports_caching":false,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.4 Pro is OpenAI's most advanced model, building on GPT-5.4's unified architecture with enhanced reasoning capabilities for complex, high-stakes tasks. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs. Optimized for step-by-step reasoning, instruction following, and accuracy, GPT-5.4 Pro excels at agentic coding, long-context workflows, and multi-step problem solving.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/o3:flex","object":"model","created":1744827057,"owned_by":"system","input_price":0.000001,"caching_price":0.000001,"cached_price":2.5e-7,"output_price":0.000004,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"O3 Flex is a cheaper version of the o3 model","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/gpt-5.5","object":"model","created":1777109681,"owned_by":"system","input_price":0.000005,"cached_price":5e-7,"output_price":0.00003,"max_output_tokens":128000,"context_window":1050000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.5 is OpenAI's frontier model designed for complex professional workloads, building on GPT-5.4 with stronger reasoning, higher reliability, and improved token efficiency on hard tasks. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs, enabling large-scale reasoning, coding, and multimodal workflows within a single system.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/o1:medium","object":"model","created":1734459999,"owned_by":"system","input_price":0.000015,"caching_price":0.000015,"cached_price":0.0000075,"output_price":0.00006,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"The o1 series of models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user. The o1 reasoning model is designed to solve hard problems across domains. The knowledge cutoff for o1 and o1-mini models is October, 2023.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/o3-mini:low","object":"model","created":1738351721,"owned_by":"system","input_price":0.0000011,"caching_price":0.0000011,"cached_price":5.5e-7,"output_price":0.0000044,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/gpt-4.1-nano","object":"model","created":1744654969,"owned_by":"system","input_price":1e-7,"caching_price":1e-7,"cached_price":2.5e-8,"output_price":4e-7,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"For tasks that demand low latency, GPT‑4.1 nano is the fastest and cheapest model in the GPT-4.1 series. It delivers exceptional performance at a small size with its 1 million token context window, and scores 80.1% on MMLU, 50.3% on GPQA, and 9.8% on Aider polyglot coding – even higher than GPT‑4o mini. It’s ideal for tasks like classification or autocompletion.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/gpt-5.4-mini","object":"model","created":1773832106,"owned_by":"system","input_price":7.5e-7,"caching_price":0.0000045,"cached_price":7.5e-8,"output_price":0.0000045,"max_output_tokens":128000,"context_window":400000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.4 mini brings the core capabilities of GPT-5.4 to a faster, more efficient model optimized for high-throughput workloads. It supports text and image inputs with strong performance across reasoning, coding, and tool use, while reducing latency and cost for large-scale deployments.\n\nThe model is designed for production environments that require a balance of capability and efficiency, making it well suited for chat applications, coding assistants, and agent workflows that operate at scale. GPT-5.4 mini delivers reliable instruction following, solid multi-step reasoning, and consistent performance across diverse tasks with improved cost efficiency.","data_retention":true,"privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"openai/gpt-5-mini:priority","object":"model","created":1754586506,"owned_by":"system","input_price":4.5e-7,"caching_price":0.0000036,"cached_price":4.5e-8,"output_price":0.0000036,"max_output_tokens":128000,"context_window":400000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5 is OpenAI's flagship model for coding, reasoning, and agentic tasks across domains.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/o4-mini:high","object":"model","created":1744824542,"owned_by":"system","input_price":0.0000011,"caching_price":0.0000011,"cached_price":2.75e-7,"output_price":0.0000044,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/gpt-5.4","object":"model","created":1772745445,"owned_by":"system","input_price":0.0000025,"cached_price":2.5e-7,"output_price":0.000015,"max_output_tokens":128000,"context_window":1050000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.4 is OpenAI's latest frontier model, unifying the Codex and GPT lines into a single system. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs, enabling high-context reasoning, coding, and multimodal analysis within the same workflow. The model delivers improved performance in coding, document understanding, tool use, and instruction following, capable of generating production-quality code, synthesizing information across multiple sources, and executing complex multi-step workflows with fewer iterations and greater token efficiency.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/gpt-4o-2024-05-13","object":"model","created":1732127594,"owned_by":"system","input_price":0.0000025,"caching_price":0.0000025,"cached_price":0.0000025,"output_price":0.00001,"max_output_tokens":4096,"context_window":128000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"The 2024-11-20 version of GPT-4o offers a leveled-up creative writing ability with more natural, engaging, and tailored writing to improve relevance \u0026 readability. It’s also better at working with uploaded files, providing deeper insights \u0026 more thorough responses.\n\nGPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/gpt-5.1-chat","object":"model","created":1764610091,"owned_by":"system","input_price":0.00000125,"caching_price":0.00001,"cached_price":1.25e-7,"output_price":0.00001,"max_output_tokens":16384,"context_window":128000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.1 Chat points to the GPT-5.1 snapshot currently used in ChatGPT. We recommend GPT-5.1 for most API usage, but feel free to use this GPT-5.1 Chat model to test our latest improvements for chat use cases.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/o3-mini","object":"model","created":1738351721,"owned_by":"system","input_price":0.0000011,"caching_price":0.0000011,"cached_price":5.5e-7,"output_price":0.0000044,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/gpt-5.2-chat","object":"model","created":1766164106,"owned_by":"system","input_price":0.00000175,"caching_price":0.000014,"cached_price":1.75e-7,"output_price":0.000014,"max_output_tokens":16384,"context_window":128000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT‑5.2 sets a new state of the art across many benchmarks, including GDPval, where it outperforms industry professionals at well-specified knowledge work tasks spanning 44 occupations.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/gpt-5-mini","object":"model","created":1754586319,"owned_by":"system","input_price":2.5e-7,"caching_price":2.5e-7,"cached_price":2.5e-8,"output_price":0.000002,"max_output_tokens":128000,"context_window":400000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5 mini is a faster, more cost-efficient version of GPT-5. It's great for well-defined tasks and precise prompts.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/o3","object":"model","created":1744827057,"owned_by":"system","input_price":0.000002,"caching_price":0.000002,"cached_price":5e-7,"output_price":0.000008,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"The o1 series of models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user. The o1 reasoning model is designed to solve hard problems across domains. The knowledge cutoff for o1 and o1-mini models is October, 2023.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/gpt-5:priority","object":"model","created":1754586506,"owned_by":"system","input_price":0.0000025,"caching_price":0.00002,"cached_price":2.5e-7,"output_price":0.00002,"max_output_tokens":128000,"context_window":400000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5 is OpenAI's flagship model for coding, reasoning, and agentic tasks across domains.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/o1:high","object":"model","created":1734459999,"owned_by":"system","input_price":0.000015,"caching_price":0.000015,"cached_price":0.0000075,"output_price":0.00006,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"The o1 series of models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user. The o1 reasoning model is designed to solve hard problems across domains. The knowledge cutoff for o1 and o1-mini models is October, 2023.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/o4-mini:flex","object":"model","created":1744824542,"owned_by":"system","input_price":5.5e-7,"caching_price":5.5e-7,"cached_price":1.38e-7,"output_price":0.0000022,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/chatgpt-4o","object":"model","created":1723597200,"owned_by":"system","input_price":0.000005,"caching_price":0.000005,"cached_price":0.000005,"output_price":0.000015,"max_output_tokens":16000,"context_window":128000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"OpenAI ChatGPT 4o is continually updated by OpenAI to point to the current version of GPT-4o used by ChatGPT. It therefore differs slightly from the API version of [GPT-4o](/models/openai/gpt-4o) in that it has additional RLHF. It is intended for research and evaluation.\n\nOpenAI notes that this model is not suited for production use-cases as it may be removed or redirected to another model in the future.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/gpt-4.1-mini","object":"model","created":1744654981,"owned_by":"system","input_price":4e-7,"caching_price":4e-7,"cached_price":1e-7,"output_price":0.0000016,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-4.1 Mini is a mid-sized model delivering performance competitive with GPT-4o at substantially lower latency and cost. It retains a 1 million token context window and scores 45.1% on hard instruction evals, 35.8% on MultiChallenge, and 84.1% on IFEval. Mini also shows strong coding ability (e.g., 31.6% on Aider’s polyglot diff benchmark) and vision understanding, making it suitable for interactive applications with tight performance constraints.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/gpt-5.1","object":"model","created":1754586506,"owned_by":"system","input_price":0.00000125,"caching_price":0.00001,"cached_price":1.25e-7,"output_price":0.00001,"max_output_tokens":128000,"context_window":400000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5 is OpenAI's flagship model for coding, reasoning, and agentic tasks across domains.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/gpt-5-nano:flex","object":"model","created":1754586455,"owned_by":"system","input_price":2.5e-8,"caching_price":2.5e-8,"cached_price":2.5e-9,"output_price":2e-7,"max_output_tokens":128000,"context_window":400000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5 nano is OpenAI's fastest, cheapest version of GPT-5. It's great for summarization and classification tasks.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/gpt-5.2","object":"model","created":1765472906,"owned_by":"system","input_price":0.00000175,"caching_price":1.75e-7,"cached_price":1.75e-7,"output_price":0.000014,"max_output_tokens":128000,"context_window":400000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"The best model for coding and agentic tasks across industries","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/gpt-5.4-nano","object":"model","created":1773832106,"owned_by":"system","input_price":2e-7,"caching_price":0.00000125,"cached_price":2e-8,"output_price":0.00000125,"max_output_tokens":128000,"context_window":400000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.4 mini brings the core capabilities of GPT-5.4 to a faster, more efficient model optimized for high-throughput workloads. It supports text and image inputs with strong performance across reasoning, coding, and tool use, while reducing latency and cost for large-scale deployments.\n\nThe model is designed for production environments that require a balance of capability and efficiency, making it well suited for chat applications, coding assistants, and agent workflows that operate at scale. GPT-5.4 mini delivers reliable instruction following, solid multi-step reasoning, and consistent performance across diverse tasks with improved cost efficiency.","data_retention":true,"privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"openai/gpt-5.5-pro","object":"model","created":1777109483,"owned_by":"system","input_price":0.00003,"output_price":0.00018,"max_output_tokens":128000,"context_window":1050000,"supports_caching":false,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.5 Pro is OpenAI's high-capability model optimized for deep reasoning and accuracy on complex, high-stakes workloads. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs, and is designed for long-horizon problem solving, agentic coding, and precise execution across multi-step workflows.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/gpt-5","object":"model","created":1754586506,"owned_by":"system","input_price":0.00000125,"caching_price":0.00001,"cached_price":1.25e-7,"output_price":0.00001,"max_output_tokens":128000,"context_window":400000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5 is OpenAI's flagship model for coding, reasoning, and agentic tasks across domains.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/gpt-5.3-chat","object":"model","created":1773832106,"owned_by":"system","input_price":0.00000175,"caching_price":0.000014,"cached_price":1.75e-7,"output_price":0.000014,"max_output_tokens":16384,"context_window":128000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5.3 Chat is an update to ChatGPT's most-used model that makes everyday conversations smoother, more useful, and more directly helpful. It delivers more accurate answers with better contextualization and significantly reduces unnecessary refusals, caveats, and overly cautious phrasing that can interrupt conversational flow.","data_retention":true,"privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"openai/o4-mini","object":"model","created":1744824542,"owned_by":"system","input_price":0.0000011,"caching_price":0.0000011,"cached_price":2.75e-7,"output_price":0.0000044,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/o3-mini:medium","object":"model","created":1738351721,"owned_by":"system","input_price":0.0000011,"caching_price":0.0000011,"cached_price":5.5e-7,"output_price":0.0000044,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/o3-mini:high","object":"model","created":1738351721,"owned_by":"system","input_price":0.0000011,"caching_price":0.0000011,"cached_price":5.5e-7,"output_price":0.0000044,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/gpt-4o-2024-08-06","object":"model","created":1732127594,"owned_by":"system","input_price":0.0000025,"caching_price":0.0000025,"cached_price":0.00000125,"output_price":0.00001,"max_output_tokens":16384,"context_window":128000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"The 2024-11-20 version of GPT-4o offers a leveled-up creative writing ability with more natural, engaging, and tailored writing to improve relevance \u0026 readability. It’s also better at working with uploaded files, providing deeper insights \u0026 more thorough responses.\n\nGPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/gpt-4o","object":"model","created":1732127594,"owned_by":"system","input_price":0.0000025,"caching_price":0.0000025,"cached_price":0.00000125,"output_price":0.00001,"max_output_tokens":16384,"context_window":128000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"The 2024-11-20 version of GPT-4o offers a leveled-up creative writing ability with more natural, engaging, and tailored writing to improve relevance \u0026 readability. It’s also better at working with uploaded files, providing deeper insights \u0026 more thorough responses.\n\nGPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/o4-mini-deep-research","object":"model","created":1752771650,"owned_by":"system","input_price":0.000002,"caching_price":0.000002,"cached_price":5e-7,"output_price":0.000008,"max_output_tokens":200000,"context_window":100000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Optimized for fast reasoning and minimal latency, O4 Mini Deep Research supports caching and high-speed inference. Ideal for lightweight agent use.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/o1:low","object":"model","created":1734459999,"owned_by":"system","input_price":0.000015,"caching_price":0.000015,"cached_price":0.0000075,"output_price":0.00006,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"The o1 series of models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user. The o1 reasoning model is designed to solve hard problems across domains. The knowledge cutoff for o1 and o1-mini models is October, 2023.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/gpt-4o-2024-11-20","object":"model","created":1732127594,"owned_by":"system","input_price":0.0000025,"caching_price":0.0000025,"cached_price":0.00000125,"output_price":0.00001,"max_output_tokens":16384,"context_window":128000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"The 2024-11-20 version of GPT-4o offers a leveled-up creative writing ability with more natural, engaging, and tailored writing to improve relevance \u0026 readability. It’s also better at working with uploaded files, providing deeper insights \u0026 more thorough responses.\n\nGPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/o3-deep-research","object":"model","created":1752771666,"owned_by":"system","input_price":0.00001,"caching_price":0.00001,"cached_price":0.0000025,"output_price":0.00004,"max_output_tokens":200000,"context_window":100000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"O3 Deep Research is a premium OpenAI model tuned for long-context research and high-recall reasoning tasks, optimized for analytical depth.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/gpt-4o-mini","object":"model","created":1721264400,"owned_by":"system","input_price":1.5e-7,"caching_price":1.5e-7,"cached_price":7.5e-8,"output_price":6e-7,"max_output_tokens":16384,"context_window":128000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.\n\nAs their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/models/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.\n\nGPT-4o mini achieves an 82% score on MMLU and presently ranks higher than GPT-4 on chat preferences [common leaderboards](https://arena.lmsys.org/).\n\nCheck out the [launch announcement](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) to learn more.\n\n#multimodal","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/gpt-5-nano","object":"model","created":1754586455,"owned_by":"system","input_price":5e-8,"caching_price":5e-8,"cached_price":5e-9,"output_price":4e-7,"max_output_tokens":128000,"context_window":400000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5 nano is OpenAI's fastest, cheapest version of GPT-5. It's great for summarization and classification tasks.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/o4-mini:low","object":"model","created":1744824542,"owned_by":"system","input_price":0.0000011,"caching_price":0.0000011,"cached_price":2.75e-7,"output_price":0.0000044,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/o1","object":"model","created":1734459999,"owned_by":"system","input_price":0.000015,"caching_price":0.000015,"cached_price":0.0000075,"output_price":0.00006,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"The o1 series of models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user. The o1 reasoning model is designed to solve hard problems across domains. The knowledge cutoff for o1 and o1-mini models is October, 2023.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/gpt-5:flex","object":"model","created":1754586506,"owned_by":"system","input_price":6.25e-7,"caching_price":6.25e-7,"cached_price":6.25e-8,"output_price":0.000005,"max_output_tokens":128000,"context_window":400000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5 is OpenAI's flagship model for coding, reasoning, and agentic tasks across domains.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/o1-pro","object":"model","created":1738865908,"owned_by":"system","input_price":0.00015,"caching_price":0.00015,"cached_price":0.00015,"output_price":0.0006,"max_output_tokens":100000,"context_window":200000,"supports_caching":false,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"The o1 series of models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user. The o1 reasoning model is designed to solve hard problems across domains. The knowledge cutoff for o1 and o1-mini models is October, 2023.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/gpt-5-chat","object":"model","created":1754586506,"owned_by":"system","input_price":0.00000125,"caching_price":0.00001,"cached_price":1.25e-7,"output_price":0.00001,"max_output_tokens":16384,"context_window":128000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5 is OpenAI's flagship model for coding, reasoning, and agentic tasks across domains.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/gpt-4.1","object":"model","created":1744654985,"owned_by":"system","input_price":0.000002,"caching_price":0.000002,"cached_price":5e-7,"output_price":0.000008,"max_output_tokens":32768,"context_window":1047576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning. It supports a 1 million token context window and outperforms GPT-4o and GPT-4.5 across coding (54.6% SWE-bench Verified), instruction compliance (87.4% IFEval), and multimodal understanding benchmarks. It is tuned for precise code diffs, agent reliability, and high recall in large document contexts, making it ideal for agents, IDE tooling, and enterprise knowledge retrieval.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/o4-mini:medium","object":"model","created":1744824542,"owned_by":"system","input_price":0.0000011,"caching_price":0.0000011,"cached_price":2.75e-7,"output_price":0.0000044,"max_output_tokens":100000,"context_window":200000,"supports_caching":true,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini. o3-mini also supports key developer features, like Structured Outputs, function calling, Batch API, and more. Like other models in the o-series, it is designed to excel at science, math, and coding tasks.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"openai/gpt-5-mini:flex","object":"model","created":1754586319,"owned_by":"system","input_price":1.25e-7,"caching_price":1.25e-7,"cached_price":1.25e-8,"output_price":0.000001,"max_output_tokens":128000,"context_window":400000,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"GPT-5 mini is a faster, more cost-efficient version of GPT-5. It's great for well-defined tasks and precise prompts.","data_retention":true,"geolocation":"global"},{"api":"chat","id":"vertex/gemini-2.5-flash@us-central1","object":"model","created":1747765524,"owned_by":"system","input_price":3e-7,"caching_price":5.5e-7,"cached_price":7.5e-8,"output_price":0.0000025,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's first hybrid reasoning model which supports a 1M token context window and has thinking budgets. Most balanced Gemini model, optimized for low latency use cases.","geolocation":"global"},{"api":"chat","id":"vertex/gemini-2.5-flash@us-east1","object":"model","created":1747765524,"owned_by":"system","input_price":3e-7,"caching_price":5.5e-7,"cached_price":7.5e-8,"output_price":0.0000025,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's first hybrid reasoning model which supports a 1M token context window and has thinking budgets. Most balanced Gemini model, optimized for low latency use cases.","geolocation":"global"},{"api":"chat","id":"vertex/gemini-2.5-flash-image","object":"model","created":1758392724,"owned_by":"system","input_price":3e-7,"caching_price":0.0000025,"cached_price":3e-7,"output_price":0.0000025,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":true,"supports_tool_calling":true,"description":"Google's first hybrid reasoning model which supports a 1M token context window and has thinking budgets. Most balanced Gemini model, optimized for low latency use cases.","geolocation":"global"},{"api":"chat","id":"vertex/gemini-3.1-pro-preview","object":"model","created":1771524000,"owned_by":"system","input_price":0.000002,"caching_price":0.0000045,"cached_price":2e-7,"output_price":0.000012,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 3.1 Pro is the next iteration in the Gemini 3 series of models, a suite of highly capable, natively multimodal reasoning models. As of this model card’s date of publication, Gemini 3.1 Pro is Google’s most advanced model for complex tasks. Geminin 3.1 Pro can comprehend vast datasets and challenging problems from massively multimodal information sources, including text, audio, images, video, and entire code repositories.","geolocation":"global"},{"api":"chat","id":"vertex/claude-opus-4-7@us-west-1","object":"model","created":1770317868,"owned_by":"system","input_price":0.0000055,"caching_price":0.00000688,"cached_price":5.5e-7,"output_price":0.0000275,"max_output_tokens":128000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Opus 4.7 is the next generation of Anthropic's Opus family, built for long-running, asynchronous agents. Building on the coding and agentic strengths of Opus 4.6, it delivers stronger performance on complex, multi-step tasks and more reliable agentic execution across extended workflows. It is especially effective for asynchronous agent pipelines where tasks unfold over time - large codebases, multi-stage debugging, and end-to-end project orchestration.\n\nBeyond coding, Opus 4.7 brings improved knowledge work capabilities - from drafting documents and building presentations to analyzing data. It maintains coherence across very long outputs and extended sessions, making it a strong default for tasks that require persistence, judgment, and follow-through.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"us"},{"api":"chat","id":"vertex/gemini-2.5-flash-lite","object":"model","created":1747765524,"owned_by":"system","input_price":1e-7,"caching_price":1.8333e-7,"cached_price":1e-8,"output_price":4e-7,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's smallest and most cost effective model, built for at scale usage.","geolocation":"global"},{"api":"chat","id":"vertex/gemini-2.5-flash-lite@us-east5","object":"model","created":1747765524,"owned_by":"system","input_price":1e-7,"caching_price":1.8333e-7,"cached_price":1e-8,"output_price":4e-7,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's smallest and most cost effective model, built for at scale usage.","geolocation":"global"},{"api":"chat","id":"vertex/claude-haiku-4-5","object":"model","created":1747933971,"owned_by":"system","input_price":0.000001,"caching_price":0.00000125,"cached_price":1e-7,"output_price":0.000005,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic Haiku 4.5","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"global"},{"api":"chat","id":"vertex/gemini-3-pro-preview","object":"model","created":1771524000,"owned_by":"system","input_price":0.000002,"caching_price":0.0000045,"cached_price":2e-7,"output_price":0.000012,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 3.1 Pro is the next iteration in the Gemini 3 series of models, a suite of highly capable, natively multimodal reasoning models. As of this model card’s date of publication, Gemini 3.1 Pro is Google’s most advanced model for complex tasks. Geminin 3.1 Pro can comprehend vast datasets and challenging problems from massively multimodal information sources, including text, audio, images, video, and entire code repositories.","geolocation":"global"},{"api":"chat","id":"vertex/claude-opus-4-7@us-central-1","object":"model","created":1770317868,"owned_by":"system","input_price":0.0000055,"caching_price":0.00000688,"cached_price":5.5e-7,"output_price":0.0000275,"max_output_tokens":128000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Opus 4.7 is the next generation of Anthropic's Opus family, built for long-running, asynchronous agents. Building on the coding and agentic strengths of Opus 4.6, it delivers stronger performance on complex, multi-step tasks and more reliable agentic execution across extended workflows. It is especially effective for asynchronous agent pipelines where tasks unfold over time - large codebases, multi-stage debugging, and end-to-end project orchestration.\n\nBeyond coding, Opus 4.7 brings improved knowledge work capabilities - from drafting documents and building presentations to analyzing data. It maintains coherence across very long outputs and extended sessions, making it a strong default for tasks that require persistence, judgment, and follow-through.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"us"},{"api":"chat","id":"vertex/claude-sonnet-4@us-east5","object":"model","created":1747933971,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"global"},{"api":"chat","id":"vertex/gemini-2.5-flash-lite@us-east1","object":"model","created":1747765524,"owned_by":"system","input_price":1e-7,"caching_price":1.8333e-7,"cached_price":1e-8,"output_price":4e-7,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's smallest and most cost effective model, built for at scale usage.","geolocation":"global"},{"api":"chat","id":"vertex/claude-opus-4","object":"model","created":1747933971,"owned_by":"system","input_price":0.000015,"caching_price":0.00001875,"cached_price":0.0000015,"output_price":0.000075,"max_output_tokens":32000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"global"},{"api":"chat","id":"vertex/claude-opus-4-7@europe-west3","object":"model","created":1770317868,"owned_by":"system","input_price":0.0000055,"caching_price":0.00000688,"cached_price":5.5e-7,"output_price":0.0000275,"max_output_tokens":128000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Opus 4.7 is the next generation of Anthropic's Opus family, built for long-running, asynchronous agents. Building on the coding and agentic strengths of Opus 4.6, it delivers stronger performance on complex, multi-step tasks and more reliable agentic execution across extended workflows. It is especially effective for asynchronous agent pipelines where tasks unfold over time - large codebases, multi-stage debugging, and end-to-end project orchestration.\n\nBeyond coding, Opus 4.7 brings improved knowledge work capabilities - from drafting documents and building presentations to analyzing data. It maintains coherence across very long outputs and extended sessions, making it a strong default for tasks that require persistence, judgment, and follow-through.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"eu"},{"api":"chat","id":"vertex/claude-opus-4-7","object":"model","created":1770317868,"owned_by":"system","input_price":0.000005,"caching_price":0.00000625,"cached_price":5e-7,"output_price":0.000025,"max_output_tokens":128000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Opus 4.7 is the next generation of Anthropic's Opus family, built for long-running, asynchronous agents. Building on the coding and agentic strengths of Opus 4.6, it delivers stronger performance on complex, multi-step tasks and more reliable agentic execution across extended workflows. It is especially effective for asynchronous agent pipelines where tasks unfold over time - large codebases, multi-stage debugging, and end-to-end project orchestration.\n\nBeyond coding, Opus 4.7 brings improved knowledge work capabilities - from drafting documents and building presentations to analyzing data. It maintains coherence across very long outputs and extended sessions, making it a strong default for tasks that require persistence, judgment, and follow-through.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"global"},{"api":"chat","id":"vertex/claude-opus-4-1@us-east5","object":"model","created":1747933971,"owned_by":"system","input_price":0.000015,"caching_price":0.00001875,"cached_price":0.0000015,"output_price":0.000075,"max_output_tokens":32000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"global"},{"api":"chat","id":"vertex/gemini-2.5-flash-lite@europe-north1","object":"model","created":1747765524,"owned_by":"system","input_price":1e-7,"caching_price":1.8333e-7,"cached_price":1e-8,"output_price":4e-7,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's smallest and most cost effective model, built for at scale usage.","geolocation":"global"},{"api":"chat","id":"vertex/gemini-2.5-flash@us-east5","object":"model","created":1747765524,"owned_by":"system","input_price":3e-7,"caching_price":5.5e-7,"cached_price":7.5e-8,"output_price":0.0000025,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's first hybrid reasoning model which supports a 1M token context window and has thinking budgets. Most balanced Gemini model, optimized for low latency use cases.","geolocation":"global"},{"api":"chat","id":"vertex/gemini-2.5-pro@europe-north1","object":"model","created":1746582113,"owned_by":"system","input_price":0.00000125,"caching_price":0.000002375,"cached_price":3.1e-7,"output_price":0.00001,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.","geolocation":"global"},{"api":"chat","id":"vertex/claude-3-7-sonnet","object":"model","created":1718845200,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"global"},{"api":"chat","id":"vertex/claude-opus-4-7@us-east1","object":"model","created":1770317868,"owned_by":"system","input_price":0.0000055,"caching_price":0.00000688,"cached_price":5.5e-7,"output_price":0.0000275,"max_output_tokens":128000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Opus 4.7 is the next generation of Anthropic's Opus family, built for long-running, asynchronous agents. Building on the coding and agentic strengths of Opus 4.6, it delivers stronger performance on complex, multi-step tasks and more reliable agentic execution across extended workflows. It is especially effective for asynchronous agent pipelines where tasks unfold over time - large codebases, multi-stage debugging, and end-to-end project orchestration.\n\nBeyond coding, Opus 4.7 brings improved knowledge work capabilities - from drafting documents and building presentations to analyzing data. It maintains coherence across very long outputs and extended sessions, making it a strong default for tasks that require persistence, judgment, and follow-through.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"us"},{"api":"chat","id":"vertex/claude-sonnet-4-6@europe-west1","object":"model","created":1747933971,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":128000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Sonnet 4.6 is Anthropic's most capable Sonnet-class model yet, with frontier performance across coding, agents, and professional work. It excels at iterative development, complex codebase navigation, end-to-end project management with memory, polished document creation, and confident computer use for web QA and workflow automation","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"eu"},{"api":"chat","id":"vertex/gemini-2.5-flash","object":"model","created":1747765524,"owned_by":"system","input_price":3e-7,"caching_price":5.5e-7,"cached_price":7.5e-8,"output_price":0.0000025,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's first hybrid reasoning model which supports a 1M token context window and has thinking budgets. Most balanced Gemini model, optimized for low latency use cases.","geolocation":"global"},{"api":"chat","id":"vertex/claude-sonnet-4-5@europe-west1","object":"model","created":1747933971,"owned_by":"system","input_price":0.0000033,"caching_price":0.00000413,"cached_price":3.3e-7,"output_price":0.0000165,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"eu"},{"api":"chat","id":"vertex/gemini-2.5-pro@us-south1","object":"model","created":1746582113,"owned_by":"system","input_price":0.00000125,"caching_price":0.000002375,"cached_price":3.1e-7,"output_price":0.00001,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.","geolocation":"global"},{"api":"chat","id":"vertex/claude-sonnet-4-5","object":"model","created":1747933971,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"global"},{"api":"chat","id":"vertex/gemini-2.5-pro@us-east1","object":"model","created":1746582113,"owned_by":"system","input_price":0.00000125,"caching_price":0.000002375,"cached_price":3.1e-7,"output_price":0.00001,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.","geolocation":"global"},{"api":"chat","id":"vertex/gemini-2.5-pro@us-west1","object":"model","created":1746582113,"owned_by":"system","input_price":0.00000125,"caching_price":0.000002375,"cached_price":3.1e-7,"output_price":0.00001,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.","geolocation":"global"},{"api":"chat","id":"vertex/gemini-2.5-flash@europe-central2","object":"model","created":1747765524,"owned_by":"system","input_price":3e-7,"caching_price":5.5e-7,"cached_price":7.5e-8,"output_price":0.0000025,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's first hybrid reasoning model which supports a 1M token context window and has thinking budgets. Most balanced Gemini model, optimized for low latency use cases.","geolocation":"global"},{"api":"chat","id":"vertex/claude-sonnet-4-5@us-east5","object":"model","created":1747933971,"owned_by":"system","input_price":0.0000033,"caching_price":0.00000413,"cached_price":3.3e-7,"output_price":0.0000165,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"us"},{"api":"chat","id":"vertex/gemini-2.5-pro@us-central1","object":"model","created":1746582113,"owned_by":"system","input_price":0.00000125,"caching_price":0.000002375,"cached_price":3.1e-7,"output_price":0.00001,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.","geolocation":"global"},{"api":"chat","id":"vertex/gemini-2.5-pro@europe-central2","object":"model","created":1746582113,"owned_by":"system","input_price":0.00000125,"caching_price":0.000002375,"cached_price":3.1e-7,"output_price":0.00001,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.","geolocation":"global"},{"api":"chat","id":"vertex/gemini-2.5-flash@us-south1","object":"model","created":1747765524,"owned_by":"system","input_price":3e-7,"caching_price":5.5e-7,"cached_price":7.5e-8,"output_price":0.0000025,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's first hybrid reasoning model which supports a 1M token context window and has thinking budgets. Most balanced Gemini model, optimized for low latency use cases.","geolocation":"global"},{"api":"chat","id":"vertex/claude-opus-4-6@us-east5","object":"model","created":1770317868,"owned_by":"system","input_price":0.0000055,"caching_price":0.00000688,"cached_price":5.5e-7,"output_price":0.0000275,"max_output_tokens":128000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Claude Opus 4.6 is Anthropic's most powerful model yet and the best coding model in the world.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"us"},{"api":"chat","id":"vertex/gemini-2.5-flash@europe-west1","object":"model","created":1747765524,"owned_by":"system","input_price":3e-7,"caching_price":5.5e-7,"cached_price":7.5e-8,"output_price":0.0000025,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's first hybrid reasoning model which supports a 1M token context window and has thinking budgets. Most balanced Gemini model, optimized for low latency use cases.","geolocation":"global"},{"api":"chat","id":"vertex/claude-opus-4-5@us-east5","object":"model","created":1764004371,"owned_by":"system","input_price":0.0000055,"caching_price":0.000006875,"cached_price":5.5e-7,"output_price":0.0000275,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic Opus 4.5","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"us"},{"api":"chat","id":"vertex/gemini-2.5-flash-lite@europe-west8","object":"model","created":1747765524,"owned_by":"system","input_price":1e-7,"caching_price":1.8333e-7,"cached_price":1e-8,"output_price":4e-7,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's smallest and most cost effective model, built for at scale usage.","geolocation":"global"},{"api":"chat","id":"vertex/deepseek-v3.2","object":"model","created":1773944724,"owned_by":"system","input_price":5.6e-7,"caching_price":0.00000168,"cached_price":5.6e-8,"output_price":0.00000168,"max_output_tokens":65535,"context_window":163840,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":true,"supports_tool_calling":true,"description":"DeepSeek-V3.2 is a model that harmonizes high computational efficiency with superior reasoning and agent performance. DeepSeek's approach is built upon three key technical breakthroughs: DeepSeek Sparse Attention (DSA), scalable reinforcement learning framework, and large scale agentic task synthesis pipeline.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"vertex/claude-sonnet-4-6","object":"model","created":1747933971,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":128000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Sonnet 4.6 is Anthropic's most capable Sonnet-class model yet, with frontier performance across coding, agents, and professional work. It excels at iterative development, complex codebase navigation, end-to-end project management with memory, polished document creation, and confident computer use for web QA and workflow automation","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"global"},{"api":"chat","id":"vertex/gemini-2.5-flash-lite@europe-central2","object":"model","created":1747765524,"owned_by":"system","input_price":1e-7,"caching_price":1.8333e-7,"cached_price":1e-8,"output_price":4e-7,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's smallest and most cost effective model, built for at scale usage.","geolocation":"global"},{"api":"chat","id":"vertex/claude-opus-4-6","object":"model","created":1770317868,"owned_by":"system","input_price":0.000005,"caching_price":0.00000625,"cached_price":5e-7,"output_price":0.000025,"max_output_tokens":128000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Claude Opus 4.6 is Anthropic's most powerful model yet and the best coding model in the world.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"global"},{"api":"chat","id":"vertex/claude-opus-4-1","object":"model","created":1747933971,"owned_by":"system","input_price":0.000015,"caching_price":0.00001875,"cached_price":0.0000015,"output_price":0.000075,"max_output_tokens":32000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"global"},{"api":"chat","id":"vertex/claude-opus-4-1@europe-west1","object":"model","created":1747933971,"owned_by":"system","input_price":0.000015,"caching_price":0.00001875,"cached_price":0.0000015,"output_price":0.000075,"max_output_tokens":32000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"global"},{"api":"chat","id":"vertex/gemini-2.5-flash-lite@us-west1","object":"model","created":1747765524,"owned_by":"system","input_price":1e-7,"caching_price":1.8333e-7,"cached_price":1e-8,"output_price":4e-7,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's smallest and most cost effective model, built for at scale usage.","geolocation":"global"},{"api":"chat","id":"vertex/kimi-k2","object":"model","created":1776345053,"owned_by":"system","input_price":6e-7,"caching_price":0.0000025,"cached_price":6e-8,"output_price":0.0000025,"max_output_tokens":262144,"context_window":262144,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Kimi K2 Thinking is an open-source model that operates as a \"thinking agent,\" reasoning step-by-step while using tools to achieve state-of-the-art performance on various benchmarks. It is capable of executing up to 200-300 sequential tool calls without human intervention, allowing it to solve complex problems across a wide range of tasks. The model uses Quantization-Aware Training (QAT) to support INT4 inference, which provides a roughly 2x improvement in generation speed.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"vertex/gemini-2.5-pro","object":"model","created":1746582113,"owned_by":"system","input_price":0.00000125,"caching_price":0.000002375,"cached_price":3.1e-7,"output_price":0.00001,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.","geolocation":"global"},{"api":"chat","id":"vertex/claude-opus-4-5@europe-west1","object":"model","created":1764004371,"owned_by":"system","input_price":0.0000055,"caching_price":0.000006875,"cached_price":5.5e-7,"output_price":0.0000275,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic Opus 4.5","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"eu"},{"api":"chat","id":"vertex/claude-opus-4@us-east5","object":"model","created":1747933971,"owned_by":"system","input_price":0.000015,"caching_price":0.00001875,"cached_price":0.0000015,"output_price":0.000075,"max_output_tokens":32000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"global"},{"api":"chat","id":"vertex/claude-opus-4@europe-west1","object":"model","created":1747933971,"owned_by":"system","input_price":0.000015,"caching_price":0.00001875,"cached_price":0.0000015,"output_price":0.000075,"max_output_tokens":32000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"global"},{"api":"chat","id":"vertex/gemini-2.5-flash-lite@us-central1","object":"model","created":1747765524,"owned_by":"system","input_price":1e-7,"caching_price":1.8333e-7,"cached_price":1e-8,"output_price":4e-7,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's smallest and most cost effective model, built for at scale usage.","geolocation":"global"},{"api":"chat","id":"vertex/claude-3-7-sonnet@us-east5","object":"model","created":1718845200,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"global"},{"api":"chat","id":"vertex/claude-sonnet-4","object":"model","created":1747933971,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"global"},{"api":"chat","id":"vertex/claude-sonnet-4@europe-west1","object":"model","created":1747933971,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"global"},{"api":"chat","id":"vertex/gemini-2.5-pro@europe-west4","object":"model","created":1746582113,"owned_by":"system","input_price":0.00000125,"caching_price":0.000002375,"cached_price":3.1e-7,"output_price":0.00001,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.","geolocation":"global"},{"api":"chat","id":"vertex/claude-sonnet-4-6@us-east5","object":"model","created":1747933971,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":128000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Sonnet 4.6 is Anthropic's most capable Sonnet-class model yet, with frontier performance across coding, agents, and professional work. It excels at iterative development, complex codebase navigation, end-to-end project management with memory, polished document creation, and confident computer use for web QA and workflow automation","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"us"},{"api":"chat","id":"vertex/gemini-2.5-flash-lite@us-south1","object":"model","created":1747765524,"owned_by":"system","input_price":1e-7,"caching_price":1.8333e-7,"cached_price":1e-8,"output_price":4e-7,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's smallest and most cost effective model, built for at scale usage.","geolocation":"global"},{"api":"chat","id":"vertex/gemini-2.5-flash@europe-west8","object":"model","created":1747765524,"owned_by":"system","input_price":3e-7,"caching_price":5.5e-7,"cached_price":7.5e-8,"output_price":0.0000025,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's first hybrid reasoning model which supports a 1M token context window and has thinking budgets. Most balanced Gemini model, optimized for low latency use cases.","geolocation":"global"},{"api":"chat","id":"vertex/claude-opus-4-6@europe-west1","object":"model","created":1770317868,"owned_by":"system","input_price":0.0000055,"caching_price":0.00000688,"cached_price":5.5e-7,"output_price":0.0000275,"max_output_tokens":128000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Claude Opus 4.6 is Anthropic's most powerful model yet and the best coding model in the world.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"eu"},{"api":"chat","id":"vertex/gemini-3.1-flash-lite","object":"model","created":1778175883,"owned_by":"system","input_price":2.5e-7,"caching_price":8.333e-8,"cached_price":2.5e-8,"output_price":0.0000015,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 3.1 Flash Lite Preview is the most cost-efficient model in the Gemini family, optimized for high-volume, low-latency tasks. It delivers fast responses with solid quality for everyday use cases including summarization, classification, and simple reasoning.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"vertex/claude-3-7-sonnet@europe-west1","object":"model","created":1718845200,"owned_by":"system","input_price":0.000003,"caching_price":0.00000375,"cached_price":3e-7,"output_price":0.000015,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic's most intelligent model. The first hybrid reasoning model on the market with the highest level of intelligence and capability with toggleable extended thinking. Top-tier results in reasoning, coding, multilingual tasks, long-context handling, honesty, and image processing.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"global"},{"api":"chat","id":"vertex/gemini-3-pro-image-preview","object":"model","created":1746582113,"owned_by":"system","input_price":0.000002,"caching_price":0.0000045,"cached_price":2e-7,"output_price":0.000012,"max_output_tokens":32768,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":true,"supports_tool_calling":true,"description":"Gemini 3 Pro Image, or Gemini 3 Pro (with Nano Banana), is designed to tackle the most challenging image generation by incorporating state-of-the-art reasoning capabilities. It's the best model for complex and multi-turn image generation and editing, having improved accuracy and enhanced image quality.","geolocation":"global"},{"api":"chat","id":"vertex/claude-opus-4-7@europe-west2","object":"model","created":1770317868,"owned_by":"system","input_price":0.0000055,"caching_price":0.00000688,"cached_price":5.5e-7,"output_price":0.0000275,"max_output_tokens":128000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Opus 4.7 is the next generation of Anthropic's Opus family, built for long-running, asynchronous agents. Building on the coding and agentic strengths of Opus 4.6, it delivers stronger performance on complex, multi-step tasks and more reliable agentic execution across extended workflows. It is especially effective for asynchronous agent pipelines where tasks unfold over time - large codebases, multi-stage debugging, and end-to-end project orchestration.\n\nBeyond coding, Opus 4.7 brings improved knowledge work capabilities - from drafting documents and building presentations to analyzing data. It maintains coherence across very long outputs and extended sessions, making it a strong default for tasks that require persistence, judgment, and follow-through.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"eu"},{"api":"chat","id":"vertex/claude-opus-4-7@europe-west4","object":"model","created":1770317868,"owned_by":"system","input_price":0.0000055,"caching_price":0.00000688,"cached_price":5.5e-7,"output_price":0.0000275,"max_output_tokens":128000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Opus 4.7 is the next generation of Anthropic's Opus family, built for long-running, asynchronous agents. Building on the coding and agentic strengths of Opus 4.6, it delivers stronger performance on complex, multi-step tasks and more reliable agentic execution across extended workflows. It is especially effective for asynchronous agent pipelines where tasks unfold over time - large codebases, multi-stage debugging, and end-to-end project orchestration.\n\nBeyond coding, Opus 4.7 brings improved knowledge work capabilities - from drafting documents and building presentations to analyzing data. It maintains coherence across very long outputs and extended sessions, making it a strong default for tasks that require persistence, judgment, and follow-through.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"eu"},{"api":"chat","id":"vertex/gemini-2.5-flash-lite@europe-west4","object":"model","created":1747765524,"owned_by":"system","input_price":1e-7,"caching_price":1.8333e-7,"cached_price":1e-8,"output_price":4e-7,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's smallest and most cost effective model, built for at scale usage.","geolocation":"global"},{"api":"chat","id":"vertex/gemini-2.5-pro@europe-west1","object":"model","created":1746582113,"owned_by":"system","input_price":0.00000125,"caching_price":0.000002375,"cached_price":3.1e-7,"output_price":0.00001,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.","geolocation":"global"},{"api":"chat","id":"vertex/gemini-2.5-flash@europe-west4","object":"model","created":1747765524,"owned_by":"system","input_price":3e-7,"caching_price":5.5e-7,"cached_price":7.5e-8,"output_price":0.0000025,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's first hybrid reasoning model which supports a 1M token context window and has thinking budgets. Most balanced Gemini model, optimized for low latency use cases.","geolocation":"global"},{"api":"chat","id":"vertex/claude-haiku-4-5@europe-west1","object":"model","created":1747933971,"owned_by":"system","input_price":0.0000011,"caching_price":0.000001375,"cached_price":1.1e-7,"output_price":0.0000055,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic Haiku 4.5","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"eu"},{"api":"chat","id":"vertex/claude-opus-4-7@europe-west1","object":"model","created":1770317868,"owned_by":"system","input_price":0.0000055,"caching_price":0.00000688,"cached_price":5.5e-7,"output_price":0.0000275,"max_output_tokens":128000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Opus 4.7 is the next generation of Anthropic's Opus family, built for long-running, asynchronous agents. Building on the coding and agentic strengths of Opus 4.6, it delivers stronger performance on complex, multi-step tasks and more reliable agentic execution across extended workflows. It is especially effective for asynchronous agent pipelines where tasks unfold over time - large codebases, multi-stage debugging, and end-to-end project orchestration.\n\nBeyond coding, Opus 4.7 brings improved knowledge work capabilities - from drafting documents and building presentations to analyzing data. It maintains coherence across very long outputs and extended sessions, making it a strong default for tasks that require persistence, judgment, and follow-through.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"eu"},{"api":"chat","id":"vertex/claude-haiku-4-5@us-east5","object":"model","created":1747933971,"owned_by":"system","input_price":0.0000011,"caching_price":0.000001375,"cached_price":1.1e-7,"output_price":0.0000055,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic Haiku 4.5","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"us"},{"api":"chat","id":"vertex/gemini-2.5-pro@us-east5","object":"model","created":1746582113,"owned_by":"system","input_price":0.00000125,"caching_price":0.000002375,"cached_price":3.1e-7,"output_price":0.00001,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.","geolocation":"global"},{"api":"chat","id":"vertex/gemini-3-flash-preview","object":"model","created":1766022113,"owned_by":"system","input_price":5e-7,"caching_price":0.000001,"cached_price":5e-8,"output_price":0.000003,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 3 Flash Preview is designed to deliver strong agentic capabilities (near-Pro level) at substantial speed and value. Making it perfect for engaging multi-turn chats, and collaborating back and forth with your coding agent without getting out of flow. Compared to 2.5 Flash it delivers significant improvements across the board.","geolocation":"global"},{"api":"chat","id":"vertex/gemini-2.5-flash@us-west1","object":"model","created":1747765524,"owned_by":"system","input_price":3e-7,"caching_price":5.5e-7,"cached_price":7.5e-8,"output_price":0.0000025,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's first hybrid reasoning model which supports a 1M token context window and has thinking budgets. Most balanced Gemini model, optimized for low latency use cases.","geolocation":"global"},{"api":"chat","id":"vertex/gemini-3.1-flash-lite-preview","object":"model","created":1772559883,"owned_by":"system","input_price":2.5e-7,"caching_price":8.333e-8,"cached_price":2.5e-8,"output_price":0.0000015,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 3.1 Flash Lite Preview is the most cost-efficient model in the Gemini family, optimized for high-volume, low-latency tasks. It delivers fast responses with solid quality for everyday use cases including summarization, classification, and simple reasoning.","geolocation":"global"},{"api":"chat","id":"vertex/gemini-2.5-flash-lite@europe-west1","object":"model","created":1747765524,"owned_by":"system","input_price":1e-7,"caching_price":1.8333e-7,"cached_price":1e-8,"output_price":4e-7,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's smallest and most cost effective model, built for at scale usage.","geolocation":"global"},{"api":"chat","id":"vertex/gemini-2.5-pro@europe-west8","object":"model","created":1746582113,"owned_by":"system","input_price":0.00000125,"caching_price":0.000002375,"cached_price":3.1e-7,"output_price":0.00001,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.","geolocation":"global"},{"api":"chat","id":"vertex/gemini-3.1-flash-image-preview","object":"model","created":1772125200,"owned_by":"system","input_price":5e-7,"caching_price":0,"cached_price":0,"output_price":0.000002,"max_output_tokens":32768,"context_window":131072,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":true,"supports_tool_calling":true,"description":"Gemini 3.1 Flash Image is optimized for image understanding and generation and offers a balance of price and performance.","privacy_comments":"N/A","geolocation":"global"},{"api":"chat","id":"vertex/claude-opus-4-7@europe-west6","object":"model","created":1770317868,"owned_by":"system","input_price":0.0000055,"caching_price":0.00000688,"cached_price":5.5e-7,"output_price":0.0000275,"max_output_tokens":128000,"context_window":1000000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Opus 4.7 is the next generation of Anthropic's Opus family, built for long-running, asynchronous agents. Building on the coding and agentic strengths of Opus 4.6, it delivers stronger performance on complex, multi-step tasks and more reliable agentic execution across extended workflows. It is especially effective for asynchronous agent pipelines where tasks unfold over time - large codebases, multi-stage debugging, and end-to-end project orchestration.\n\nBeyond coding, Opus 4.7 brings improved knowledge work capabilities - from drafting documents and building presentations to analyzing data. It maintains coherence across very long outputs and extended sessions, making it a strong default for tasks that require persistence, judgment, and follow-through.","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"eu"},{"api":"chat","id":"vertex/claude-opus-4-5","object":"model","created":1764004371,"owned_by":"system","input_price":0.000005,"caching_price":0.00000625,"cached_price":5e-7,"output_price":0.000025,"max_output_tokens":64000,"context_window":200000,"supports_caching":true,"supports_vision":true,"supports_computer_use":true,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Anthropic Opus 4.5","data_retention_days":30,"privacy_comments":"https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance#customer_data_retention_and_achieving_zero_data_retention","geolocation":"global"},{"api":"chat","id":"vertex/gemini-2.5-flash@europe-north1","object":"model","created":1747765524,"owned_by":"system","input_price":3e-7,"caching_price":5.5e-7,"cached_price":7.5e-8,"output_price":0.0000025,"max_output_tokens":65535,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Google's first hybrid reasoning model which supports a 1M token context window and has thinking budgets. Most balanced Gemini model, optimized for low latency use cases.","geolocation":"global"},{"api":"chat","id":"nebius/openai/gpt-oss-120b","object":"model","created":1772444580,"owned_by":"system","input_price":1.5e-7,"caching_price":1.5e-7,"cached_price":1.5e-7,"output_price":6e-7,"max_output_tokens":128000,"context_window":131000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Open-weight agentic model with configurable reasoning, full CoT visibility, strong tool use, and fine-tuning support.","privacy_comments":"Don't store prompts and responses","geolocation":"eu"},{"api":"chat","id":"nebius/moonshotai/kimi-k2.5","object":"model","created":1771249200,"owned_by":"system","input_price":5e-7,"caching_price":0.0000025,"cached_price":5e-7,"output_price":0.0000025,"max_output_tokens":128000,"context_window":256000,"supports_caching":false,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"Kimi K2.5 is an open-source, native multimodal agentic model built through continual pretraining on approximately 15 trillion mixed visual and text tokens atop Kimi-K2-Base","geolocation":"eu"},{"api":"chat","id":"nebius/meta-llama/Llama-3.3-70B-Instruct","object":"model","created":1715697754,"owned_by":"system","input_price":1.3e-7,"caching_price":1.3e-7,"cached_price":1.3e-7,"output_price":4e-7,"max_output_tokens":0,"context_window":128000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"A lightweight and ultra-fast variant of Llama 3.3 70B, for use when quick response times are needed most.","privacy_comments":"Don't store prompts and responses","geolocation":"eu"},{"api":"chat","id":"nebius/deepseek-ai/DeepSeek-V3.2","object":"model","created":1772282644,"owned_by":"system","input_price":3e-7,"caching_price":3e-7,"cached_price":3e-7,"output_price":4.5e-7,"max_output_tokens":128000,"context_window":164000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"A model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance.","privacy_comments":"Don't store prompts and responses","geolocation":"global"},{"api":"chat","id":"alibaba/qwen-turbo","object":"model","created":1745882204,"owned_by":"system","input_price":5e-8,"caching_price":5e-8,"cached_price":5e-8,"output_price":2e-7,"max_output_tokens":0,"context_window":1000000,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.","geolocation":"global"},{"api":"chat","id":"alibaba/qwen3-30b-a3b-instruct-2507","object":"model","created":1753953463,"owned_by":"system","input_price":2e-7,"caching_price":8e-7,"cached_price":2e-7,"output_price":8e-7,"max_output_tokens":65536,"context_window":131072,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Qwen3-30B-A3B-Instruct-2507 is a 30.5B-parameter mixture-of-experts language model from Qwen, with 3.3B active parameters per inference. It operates in non-thinking mode and is designed for high-quality instruction following, multilingual understanding, and agentic tool use. Post-trained on instruction data, it demonstrates competitive performance across reasoning (AIME, ZebraLogic), coding (MultiPL-E, LiveCodeBench), and alignment (IFEval, WritingBench) benchmarks. It outperforms its non-instruct variant on subjective and open-ended tasks while retaining strong factual and coding performance.","geolocation":"global"},{"api":"chat","id":"alibaba/qwen3-coder-plus","object":"model","created":1753348663,"owned_by":"system","input_price":0.000001,"output_price":0.000005,"max_output_tokens":65536,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"","geolocation":"global"},{"api":"chat","id":"alibaba/qwen3.5","object":"model","created":1753953463,"owned_by":"system","input_price":6e-7,"caching_price":0.0000036,"cached_price":6e-7,"output_price":0.0000036,"max_output_tokens":65536,"context_window":256000,"supports_caching":false,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":true,"supports_image_generation":false,"supports_tool_calling":true,"description":"The Qwen3.5 series 397B-A17B native vision-language model is built on a hybrid architecture that integrates a linear attention mechanism with a sparse mixture-of-experts model, achieving higher inference efficiency. It delivers state-of-the-art performance comparable to leading-edge models across a wide range of tasks, including language understanding, logical reasoning, code generation, agent-based tasks, image understanding, video understanding, and graphical user interface (GUI) interactions. With its robust code-generation and agent capabilities, the model exhibits strong generalization across diverse agent.","geolocation":"global"},{"api":"chat","id":"alibaba/qwen-plus","object":"model","created":1745882204,"owned_by":"system","input_price":4e-7,"caching_price":4e-7,"cached_price":4e-7,"output_price":0.0000012,"max_output_tokens":0,"context_window":131072,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.","geolocation":"global"},{"api":"chat","id":"alibaba/qwen-max","object":"model","created":1745882204,"owned_by":"system","input_price":0.0000016,"caching_price":0.0000016,"cached_price":0.0000016,"output_price":0.0000064,"max_output_tokens":0,"context_window":32768,"supports_caching":false,"supports_vision":false,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.","geolocation":"global"},{"api":"chat","id":"alibaba/qwen3-coder-flash","object":"model","created":1753348663,"owned_by":"system","input_price":3e-7,"caching_price":3e-7,"cached_price":8e-8,"output_price":0.0000015,"max_output_tokens":65536,"context_window":1048576,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"","geolocation":"global"},{"api":"chat","id":"alibaba/qwen3-max","object":"model","created":1757524929,"owned_by":"system","input_price":8.61e-7,"output_price":0.000003441,"max_output_tokens":65536,"context_window":262144,"supports_caching":true,"supports_vision":true,"supports_computer_use":false,"supports_reasoning":false,"supports_image_generation":false,"supports_tool_calling":true,"description":"This is the best-performing model in the Qwen series. It is ideal for complex, multi-step tasks.","geolocation":"global"}]}
