diff --git a/.changeset/provider-registry-runtime-v2.md b/.changeset/provider-registry-runtime-v2.md new file mode 100644 index 00000000000..b3b7f4209f9 --- /dev/null +++ b/.changeset/provider-registry-runtime-v2.md @@ -0,0 +1,4 @@ +--- +--- + +Internal provider/model data API and migration integration for v2 runtime. diff --git a/docs/en/references/data/provider-registry.md b/docs/en/references/data/provider-registry.md new file mode 100644 index 00000000000..d0790a8750d --- /dev/null +++ b/docs/en/references/data/provider-registry.md @@ -0,0 +1,473 @@ +# Provider Registry Reference + +This document describes the Provider/Model registry system architecture, schemas, and data flows. + +## Overview + +The registry system manages AI model and provider configurations with a three-layer merge architecture: + +1. **Preset Layer** (read-only, bundled in app) - Registry definitions +2. **Override Layer** (read-only) - Provider-specific model overrides +3. **User Layer** (SQLite, writable) - User customizations + +``` +┌─────────────────────────────────────────────────────────────────────────────────┐ +│ Data Layer Architecture │ +├─────────────────────────────────────────────────────────────────────────────────┤ +│ │ +│ Preset Layer (Read-only) User Layer (SQLite, Writable) │ +│ ════════════════════════ ═══════════════════════════ │ +│ │ +│ providers.json user_provider │ +│ • Provider configurations • Endpoint overrides │ +│ • Endpoint mappings • Multi API Key (1:N) │ +│ • API compatibility • API features override │ +│ │ +│ models.json user_model (merged table) │ +│ • Base model definitions • presetModelId → override │ +│ • Capabilities, modalities • presetModelId null → custom │ +│ • Context windows, pricing • Source tracking │ +│ │ +│ provider-models.json │ +│ • Provider-model mappings │ +│ • Provider-level overrides │ +│ • Variant configurations │ +│ │ +└─────────────────────────────────────────────────────────────────────────────────┘ +``` + +## Merge Priority + +When resolving a model or provider configuration: + +**Models**: `user_model` > `provider-models.json` > `models.json` + +**Providers**: `user_provider` > `providers.json` + +--- + +## Preset Schemas + +Location: `packages/provider-registry/src/schemas/` + +### Provider Schema (`provider.ts`) + +Defines how to connect to AI service providers. + +```typescript +// Endpoint types encode format information +// CHAT_COMPLETIONS → OpenAI format +// MESSAGES → Anthropic format +// RESPONSES → OpenAI Responses API +// GENERATE_CONTENT → Gemini format + +const EndpointTypeSchema = z.enum([ + // Text generation + 'CHAT_COMPLETIONS', // OpenAI /v1/chat/completions + 'TEXT_COMPLETIONS', // OpenAI /v1/completions (legacy) + 'MESSAGES', // Anthropic /v1/messages + 'RESPONSES', // OpenAI /v1/responses + 'GENERATE_CONTENT', // Gemini /v1beta/models/{model}:generateContent + + // Embeddings + 'EMBEDDINGS', // /v1/embeddings + 'RERANK', // /v1/rerank + + // Images + 'IMAGE_GENERATION', // /v1/images/generations + 'IMAGE_EDIT', // /v1/images/edits + + // Audio + 'AUDIO_TRANSCRIPTION', // /v1/audio/transcriptions + 'AUDIO_TRANSLATION', // /v1/audio/translations + 'TEXT_TO_SPEECH', // /v1/audio/speech + + // Video + 'VIDEO_GENERATION' +]) + +const ProviderConfigSchema = z.object({ + id: z.string(), // Provider ID + name: z.string(), // Display name + description: z.string().optional(), + + // Endpoint configuration: type → full URL + endpoints: z.record(z.string(), z.string().url()), + // Example: + // { + // 'CHAT_COMPLETIONS': 'https://api.openai.com/v1/chat/completions', + // 'EMBEDDINGS': 'https://api.openai.com/v1/embeddings' + // } + + default_chat_endpoint: EndpointTypeSchema.optional(), + + api_compatibility: z.object({ + supports_array_content: z.boolean().optional(), // default: true + supports_stream_options: z.boolean().optional(), // default: true + supports_developer_role: z.boolean().optional(), // default: true + supports_service_tier: z.boolean().optional(), // default: false + supports_thinking_control: z.boolean().optional() // default: true + }).optional(), + + website: z.string().url().optional(), + models_api_url: z.string().url().optional(), // Models list API + metadata: z.record(z.string(), z.any()).optional() +}) +``` + +### Model Schema (`model.ts`) + +Defines model capabilities and configurations. + +```typescript +const ModelCapabilityTypeSchema = z.enum([ + 'FUNCTION_CALL', // Function calling + 'REASONING', // Extended thinking + 'IMAGE_RECOGNITION', // Vision understanding + 'IMAGE_GENERATION', // Image creation + 'AUDIO_RECOGNITION', // Audio understanding + 'AUDIO_GENERATION', // Speech synthesis + 'EMBEDDING', // Vector embeddings + 'RERANK', // Result reranking + 'AUDIO_TRANSCRIPT', // Speech-to-text + 'VIDEO_RECOGNITION', // Video understanding + 'VIDEO_GENERATION', // Video creation + 'STRUCTURED_OUTPUT', // JSON mode + 'FILE_INPUT', // File attachments + 'WEB_SEARCH', // Built-in search + 'CODE_EXECUTION', // Code sandbox + 'FILE_SEARCH', // File search + 'COMPUTER_USE' // Computer control +]) + +const ModalitySchema = z.enum(['TEXT', 'VISION', 'AUDIO', 'VIDEO', 'VECTOR']) + +const ModelConfigSchema = z.object({ + id: z.string(), // Model ID for API calls + name: z.string().optional(), // Display name + description: z.string().optional(), + + capabilities: z.array(ModelCapabilityTypeSchema).optional(), + input_modalities: z.array(ModalitySchema).optional(), + output_modalities: z.array(ModalitySchema).optional(), + + context_window: z.number().optional(), + max_output_tokens: z.number().optional(), + max_input_tokens: z.number().optional(), + + pricing: ModelPricingSchema.optional(), + reasoning: ReasoningSchema.optional(), + parameters: ParameterSupportSchema.optional(), + + family: z.string().optional(), // e.g., "GPT-4", "Claude 3" + publisher: z.string().optional(), // e.g., "anthropic", "openai" + open_weights: z.boolean().optional(), // Weights publicly available + alias: z.array(z.string()).optional(), // Date version aliases + + metadata: z.record(z.string(), z.any()).optional() +}) +``` + +### Provider-Models Schema (`provider-models.ts`) + +Defines provider-specific model overrides. + +```typescript +const CapabilityOverrideSchema = z.object({ + add: z.array(ModelCapabilityTypeSchema).optional(), // Add capabilities + remove: z.array(ModelCapabilityTypeSchema).optional(), // Remove capabilities + force: z.array(ModelCapabilityTypeSchema).optional() // Complete replacement +}) + +const ProviderModelOverrideSchema = z.object({ + provider_id: z.string(), + model_id: z.string(), + + // Variant identifier for same model with different configurations + // Examples: 'free', 'thinking', 'nitro', 'search' + model_variant: z.string().optional(), + + capabilities: CapabilityOverrideSchema.optional(), + limits: z.object({ + context_window: z.number().optional(), + max_output_tokens: z.number().optional(), + max_input_tokens: z.number().optional() + }).optional(), + + pricing: ModelPricingSchema.partial().optional(), + reasoning: ReasoningSchema.optional(), + parameters: ParameterSupportSchema.partial().optional(), + + // Endpoint type overrides (when model uses different endpoints than provider default) + endpoint_types: z.array(EndpointTypeSchema).optional(), + // Modality overrides (when provider supports different modalities than base model) + input_modalities: z.array(ModalitySchema).optional(), + output_modalities: z.array(ModalitySchema).optional(), + + disabled: z.boolean().optional(), + replace_with: z.string().optional(), + + reason: z.string().optional(), // Override reason + priority: z.number().default(0) // Higher = takes precedence +}) +``` + +--- + +## Runtime Types + +Location: `packages/shared/data/types/` + +### UniqueModelId + +Format: `providerId::modelId` + +```typescript +type UniqueModelId = `${string}::${string}` + +// Create: createUniqueModelId('anthropic', 'claude-3-5-sonnet') +// → 'anthropic::claude-3-5-sonnet' + +// Parse: parseUniqueModelId('anthropic::claude-3-5-sonnet') +// → { providerId: 'anthropic', modelId: 'claude-3-5-sonnet' } +``` + +Uses `::` separator to avoid conflicts with model IDs containing `:` (e.g., `openrouter:anthropic/claude-3`). + +### RuntimeModel + +The merged "final state" model configuration for consumers. + +```typescript +// Type-safe union types (mirroring registry Zod enums) +type Modality = 'TEXT' | 'VISION' | 'AUDIO' | 'VIDEO' | 'VECTOR' +type EndpointType = + | 'CHAT_COMPLETIONS' | 'TEXT_COMPLETIONS' | 'MESSAGES' + | 'RESPONSES' | 'GENERATE_CONTENT' + | 'EMBEDDINGS' | 'RERANK' + | 'IMAGE_GENERATION' | 'IMAGE_EDIT' + | 'AUDIO_TRANSCRIPTION' | 'AUDIO_TRANSLATION' | 'TEXT_TO_SPEECH' + | 'VIDEO_GENERATION' + +interface RuntimeReasoning { + type: string // 'openai-chat', 'anthropic', 'gemini', etc. + supportedEfforts: string[] + defaultEffort?: string + thinkingTokenLimits?: { min?: number; max?: number; default?: number } + interleaved?: boolean // Supports interleaved thinking output +} + +interface RuntimeModel { + uniqueId: UniqueModelId // "anthropic::claude-3-5-sonnet" + id: string // "claude-3-5-sonnet" + providerId: string // "anthropic" + + name: string + description?: string + group?: string // UI grouping + family?: string // "Claude 3" + ownedBy?: string + + capabilities: ModelCapability[] + inputModalities?: Modality[] // Supported input: TEXT, VISION, AUDIO, VIDEO + outputModalities?: Modality[] // Supported output: TEXT, VISION, AUDIO, VIDEO, VECTOR + + contextWindow?: number + maxOutputTokens?: number + maxInputTokens?: number + + endpointTypes?: EndpointType[] // Supported endpoint types (array, model may support multiple) + supportsStreaming: boolean + + reasoning?: RuntimeReasoning + parameters?: RuntimeParameterSupport + pricing?: RuntimeModelPricing + + isEnabled: boolean + isHidden: boolean + replaceWith?: UniqueModelId +} +``` + +### RuntimeProvider + +The merged "final state" provider configuration. + +```typescript +interface RuntimeProvider { + id: string + source: 'preset' | 'user' | 'merged' + presetProviderId?: string + + name: string + description?: string + + endpoints: Record + defaultChatEndpoint?: string + + apiKeys: RuntimeApiKey[] + activeApiKeyId?: string + authType: 'api-key' | 'oauth' | 'iam-aws' | 'iam-gcp' | 'iam-azure' + + apiCompatibility: RuntimeApiCompatibility + settings: RuntimeProviderSettings + + isEnabled: boolean + isAuthenticated: boolean +} +``` + +--- + +## User Database Schemas + +Location: `src/main/data/db/schemas/` + +### user_provider Table + +Stores user's provider configurations. + +| Column | Type | Description | +|--------|------|-------------| +| id | UUID | Primary key | +| providerId | TEXT | User-defined unique ID | +| presetProviderId | TEXT | Links to registry preset | +| name | TEXT | Display name | +| endpoints | JSON | Endpoint URL overrides | +| defaultChatEndpoint | TEXT | Default text generation endpoint | +| apiKeys | JSON | Array of ApiKeyEntry | +| authConfig | JSON | Authentication configuration | +| apiCompatibility | JSON | API compatibility overrides | +| providerSettings | JSON | Provider-specific settings | +| isEnabled | BOOLEAN | Whether enabled | +| sortOrder | INTEGER | UI ordering | + +**Design principle**: One provider instance = One API host (1:1 relationship) + +### user_model Table + +Stores all user models with fully resolved configurations. Capabilities are resolved once at add-time from registry, so no runtime merge is needed. + +| Column | Type | Description | +|--------|------|-------------| +| providerId | TEXT | Provider ID (part of PK) | +| modelId | TEXT | Model ID (part of PK) | +| presetModelId | TEXT | Traceability marker (which preset this came from) | +| name | TEXT | Display name | +| description | TEXT | Description | +| group | TEXT | UI grouping | +| capabilities | JSON | Complete capability list (resolved at add time) | +| inputModalities | JSON | Supported input modalities (e.g., TEXT, VISION, AUDIO) | +| outputModalities | JSON | Supported output modalities (e.g., TEXT, VISION, VECTOR) | +| endpointTypes | JSON | Endpoint type overrides (array) | +| customEndpointUrl | TEXT | Complete URL override | +| contextWindow | INTEGER | Context window override | +| maxOutputTokens | INTEGER | Max output override | +| supportsStreaming | BOOLEAN | Streaming support | +| reasoning | JSON | Reasoning configuration (includes `interleaved` flag) | +| parameters | JSON | Parameter support | +| isEnabled | BOOLEAN | Whether enabled | +| isHidden | BOOLEAN | Whether hidden in lists | +| sortOrder | INTEGER | UI ordering | + +**Note**: `presetModelId` is a traceability marker only — it records which preset model was used as the template, but is not used for runtime merging. + +--- + +## Merge Utilities + +Location: `packages/shared/data/utils/modelMerger.ts` + +### mergeModelConfig + +Merges model configurations with proper priority. + +```typescript +function mergeModelConfig( + userModel: UserModel | null, + registryOverride: RegistryProviderModelOverride | null, + presetModel: RegistryModel | null, + providerId: string +): RuntimeModel + +// Priority: userModel > registryOverride > presetModel +``` + +### mergeProviderConfig + +Merges provider configurations. + +```typescript +function mergeProviderConfig( + userProvider: UserProvider | null, + presetProvider: RegistryProvider | null +): RuntimeProvider + +// Priority: userProvider > presetProvider +``` + +### applyCapabilityOverride + +Applies registry provider-model capability modifications (not user-level). + +```typescript +function applyCapabilityOverride( + base: string[], + override: { add?: string[]; remove?: string[]; force?: string[] } +): string[] + +// 'force' completely replaces base +// Otherwise: add new, then remove specified +``` + +--- + +## Model ID Variants + +### Variant Types + +| Type | Example | Handling | +|------|---------|----------| +| Pricing variant | `:free`, `:nitro`, `-free` | Separate provider-models entry | +| Capability variant | `-thinking`, `-search` | capabilities.add in provider-models | +| Date version | `-20251101` | alias array in models.json | +| Provider prefix | `anthropic/`, `google/` | Strip during import | + +### Normalization Rules + +1. Strip provider prefixes: `anthropic/claude-3` → `claude-3` +2. Strip pricing suffixes: `claude-3:free` → `claude-3` (with variant entry) +3. Preserve capability variants: `claude-3-thinking` → separate handling +4. Track date versions: `gpt-4-turbo-2024-04-09` → in `alias` array + +--- + +## Data Files + +| File | Description | +|------|-------------| +| `packages/provider-registry/data/providers.json` | Provider configurations | +| `packages/provider-registry/data/models.json` | Base model definitions | +| `packages/provider-registry/data/provider-models.json` | Provider-model overrides | +| `packages/provider-registry/data/openrouter-models.json` | OpenRouter import data | +| `packages/provider-registry/data/aihubmix-models.json` | AIHubMix import data | +| `packages/provider-registry/data/modelsdev-models.json` | models.dev import data | + +--- + +## API Compatibility Defaults + +| Feature | Default | Description | +|---------|---------|-------------| +| `supports_array_content` | true | Array format for content | +| `supports_stream_options` | true | stream_options parameter | +| `supports_developer_role` | true | Developer role in messages | +| `supports_service_tier` | false | service_tier parameter | +| `supports_thinking_control` | true | Thinking control parameters | + +--- + +## See Also + +- [Data Management Overview](./README.md) - System selection and patterns +- [Registry Web UI](../../packages/provider-registry/web/) - Review and edit interface diff --git a/electron.vite.config.ts b/electron.vite.config.ts index 1e039ba596c..d817c8f8431 100644 --- a/electron.vite.config.ts +++ b/electron.vite.config.ts @@ -36,6 +36,8 @@ export default defineConfig({ '@logger': resolve('src/main/services/LoggerService'), '@mcp-trace/trace-core': resolve('packages/mcp-trace/trace-core'), '@mcp-trace/trace-node': resolve('packages/mcp-trace/trace-node'), + '@cherrystudio/provider-registry/node': resolve('packages/provider-registry/src/registry-reader'), + '@cherrystudio/provider-registry': resolve('packages/provider-registry/src'), '@test-mocks': resolve('tests/__mocks__') } }, @@ -116,6 +118,8 @@ export default defineConfig({ '@cherrystudio/ai-core': resolve('packages/aiCore/src'), '@cherrystudio/extension-table-plus': resolve('packages/extension-table-plus/src'), '@cherrystudio/ai-sdk-provider': resolve('packages/ai-sdk-provider/src'), + '@cherrystudio/provider-registry/node': resolve('packages/provider-registry/src/registry-reader'), + '@cherrystudio/provider-registry': resolve('packages/provider-registry/src'), '@cherrystudio/ui/icons': resolve('packages/ui/src/components/icons'), '@cherrystudio/ui': resolve('packages/ui/src'), '@test-mocks': resolve('tests/__mocks__') diff --git a/migrations/sqlite-drizzle/0009_medical_giant_girl.sql b/migrations/sqlite-drizzle/0009_medical_giant_girl.sql new file mode 100644 index 00000000000..8fbd5fb98f5 --- /dev/null +++ b/migrations/sqlite-drizzle/0009_medical_giant_girl.sql @@ -0,0 +1,51 @@ +CREATE TABLE `user_model` ( + `provider_id` text NOT NULL, + `model_id` text NOT NULL, + `preset_model_id` text, + `name` text, + `description` text, + `group` text, + `capabilities` text, + `input_modalities` text, + `output_modalities` text, + `endpoint_types` text, + `custom_endpoint_url` text, + `context_window` integer, + `max_output_tokens` integer, + `supports_streaming` integer, + `reasoning` text, + `parameters` text, + `pricing` text, + `is_enabled` integer DEFAULT true, + `is_hidden` integer DEFAULT false, + `is_deprecated` integer DEFAULT false, + `sort_order` integer DEFAULT 0, + `notes` text, + `user_overrides` text, + `created_at` integer, + `updated_at` integer, + PRIMARY KEY(`provider_id`, `model_id`) +); +--> statement-breakpoint +CREATE INDEX `user_model_preset_idx` ON `user_model` (`preset_model_id`);--> statement-breakpoint +CREATE INDEX `user_model_provider_enabled_idx` ON `user_model` (`provider_id`,`is_enabled`);--> statement-breakpoint +CREATE INDEX `user_model_provider_sort_idx` ON `user_model` (`provider_id`,`sort_order`);--> statement-breakpoint +CREATE TABLE `user_provider` ( + `provider_id` text PRIMARY KEY NOT NULL, + `preset_provider_id` text, + `name` text NOT NULL, + `endpoint_configs` text, + `default_chat_endpoint` text, + `api_keys` text DEFAULT '[]', + `auth_config` text, + `api_features` text, + `provider_settings` text, + `websites` text, + `is_enabled` integer DEFAULT true, + `sort_order` integer DEFAULT 0, + `created_at` integer, + `updated_at` integer +); +--> statement-breakpoint +CREATE INDEX `user_provider_preset_idx` ON `user_provider` (`preset_provider_id`);--> statement-breakpoint +CREATE INDEX `user_provider_enabled_sort_idx` ON `user_provider` (`is_enabled`,`sort_order`); \ No newline at end of file diff --git a/migrations/sqlite-drizzle/meta/0009_snapshot.json b/migrations/sqlite-drizzle/meta/0009_snapshot.json new file mode 100644 index 00000000000..3a9ee620de1 --- /dev/null +++ b/migrations/sqlite-drizzle/meta/0009_snapshot.json @@ -0,0 +1,1722 @@ +{ + "version": "6", + "dialect": "sqlite", + "id": "06b58aff-1332-4170-be6b-45a53d5e3cda", + "prevId": "4105c4f2-d9df-4048-acab-0049e5a95941", + "tables": { + "app_state": { + "name": "app_state", + "columns": { + "key": { + "name": "key", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "value": { + "name": "value", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "group": { + "name": "group", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "entity_type": { + "name": "entity_type", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "sort_order": { + "name": "sort_order", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": { + "group_entity_sort_idx": { + "name": "group_entity_sort_idx", + "columns": ["entity_type", "sort_order"], + "isUnique": false + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "knowledge_base": { + "name": "knowledge_base", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "dimensions": { + "name": "dimensions", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "embedding_model_id": { + "name": "embedding_model_id", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "rerank_model_id": { + "name": "rerank_model_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "file_processor_id": { + "name": "file_processor_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "chunk_size": { + "name": "chunk_size", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "chunk_overlap": { + "name": "chunk_overlap", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "threshold": { + "name": "threshold", + "type": "real", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "document_count": { + "name": "document_count", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "search_mode": { + "name": "search_mode", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "hybrid_alpha": { + "name": "hybrid_alpha", + "type": "real", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": { + "knowledge_base_search_mode_check": { + "name": "knowledge_base_search_mode_check", + "value": "\"knowledge_base\".\"search_mode\" IN ('default', 'bm25', 'hybrid') OR \"knowledge_base\".\"search_mode\" IS NULL" + } + } + }, + "knowledge_item": { + "name": "knowledge_item", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "base_id": { + "name": "base_id", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "group_id": { + "name": "group_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "type": { + "name": "type", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "data": { + "name": "data", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "status": { + "name": "status", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "'idle'" + }, + "error": { + "name": "error", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": { + "knowledge_item_base_type_created_idx": { + "name": "knowledge_item_base_type_created_idx", + "columns": ["base_id", "type", "created_at"], + "isUnique": false + }, + "knowledge_item_base_group_created_idx": { + "name": "knowledge_item_base_group_created_idx", + "columns": ["base_id", "group_id", "created_at"], + "isUnique": false + }, + "knowledge_item_baseId_id_unique": { + "name": "knowledge_item_baseId_id_unique", + "columns": ["base_id", "id"], + "isUnique": true + } + }, + "foreignKeys": { + "knowledge_item_base_id_knowledge_base_id_fk": { + "name": "knowledge_item_base_id_knowledge_base_id_fk", + "tableFrom": "knowledge_item", + "tableTo": "knowledge_base", + "columnsFrom": ["base_id"], + "columnsTo": ["id"], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "knowledge_item_base_id_group_id_knowledge_item_base_id_id_fk": { + "name": "knowledge_item_base_id_group_id_knowledge_item_base_id_id_fk", + "tableFrom": "knowledge_item", + "tableTo": "knowledge_item", + "columnsFrom": ["base_id", "group_id"], + "columnsTo": ["base_id", "id"], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": { + "knowledge_item_type_check": { + "name": "knowledge_item_type_check", + "value": "\"knowledge_item\".\"type\" IN ('file', 'url', 'note', 'sitemap', 'directory')" + }, + "knowledge_item_status_check": { + "name": "knowledge_item_status_check", + "value": "\"knowledge_item\".\"status\" IN ('idle', 'pending', 'ocr', 'read', 'embed', 'completed', 'failed')" + } + } + }, + "mcp_server": { + "name": "mcp_server", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "type": { + "name": "type", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "base_url": { + "name": "base_url", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "command": { + "name": "command", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "registry_url": { + "name": "registry_url", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "args": { + "name": "args", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "env": { + "name": "env", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "headers": { + "name": "headers", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "provider": { + "name": "provider", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "provider_url": { + "name": "provider_url", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "logo_url": { + "name": "logo_url", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "tags": { + "name": "tags", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "long_running": { + "name": "long_running", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "timeout": { + "name": "timeout", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "dxt_version": { + "name": "dxt_version", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "dxt_path": { + "name": "dxt_path", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "reference": { + "name": "reference", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "search_key": { + "name": "search_key", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "config_sample": { + "name": "config_sample", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "disabled_tools": { + "name": "disabled_tools", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "disabled_auto_approve_tools": { + "name": "disabled_auto_approve_tools", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "should_config": { + "name": "should_config", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "sort_order": { + "name": "sort_order", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "is_active": { + "name": "is_active", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": false + }, + "install_source": { + "name": "install_source", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "is_trusted": { + "name": "is_trusted", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "trusted_at": { + "name": "trusted_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "installed_at": { + "name": "installed_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": { + "mcp_server_name_idx": { + "name": "mcp_server_name_idx", + "columns": ["name"], + "isUnique": false + }, + "mcp_server_is_active_idx": { + "name": "mcp_server_is_active_idx", + "columns": ["is_active"], + "isUnique": false + }, + "mcp_server_sort_order_idx": { + "name": "mcp_server_sort_order_idx", + "columns": ["sort_order"], + "isUnique": false + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": { + "mcp_server_type_check": { + "name": "mcp_server_type_check", + "value": "\"mcp_server\".\"type\" IS NULL OR \"mcp_server\".\"type\" IN ('stdio', 'sse', 'streamableHttp', 'inMemory')" + }, + "mcp_server_install_source_check": { + "name": "mcp_server_install_source_check", + "value": "\"mcp_server\".\"install_source\" IS NULL OR \"mcp_server\".\"install_source\" IN ('builtin', 'manual', 'protocol', 'unknown')" + } + } + }, + "message": { + "name": "message", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "parent_id": { + "name": "parent_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "topic_id": { + "name": "topic_id", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "role": { + "name": "role", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "data": { + "name": "data", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "searchable_text": { + "name": "searchable_text", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "status": { + "name": "status", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "siblings_group_id": { + "name": "siblings_group_id", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "assistant_id": { + "name": "assistant_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "assistant_meta": { + "name": "assistant_meta", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "model_id": { + "name": "model_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "model_meta": { + "name": "model_meta", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "trace_id": { + "name": "trace_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "stats": { + "name": "stats", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "deleted_at": { + "name": "deleted_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": { + "message_parent_id_idx": { + "name": "message_parent_id_idx", + "columns": ["parent_id"], + "isUnique": false + }, + "message_topic_created_idx": { + "name": "message_topic_created_idx", + "columns": ["topic_id", "created_at"], + "isUnique": false + }, + "message_trace_id_idx": { + "name": "message_trace_id_idx", + "columns": ["trace_id"], + "isUnique": false + } + }, + "foreignKeys": { + "message_topic_id_topic_id_fk": { + "name": "message_topic_id_topic_id_fk", + "tableFrom": "message", + "tableTo": "topic", + "columnsFrom": ["topic_id"], + "columnsTo": ["id"], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "message_parent_id_message_id_fk": { + "name": "message_parent_id_message_id_fk", + "tableFrom": "message", + "tableTo": "message", + "columnsFrom": ["parent_id"], + "columnsTo": ["id"], + "onDelete": "set null", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": { + "message_role_check": { + "name": "message_role_check", + "value": "\"message\".\"role\" IN ('user', 'assistant', 'system')" + }, + "message_status_check": { + "name": "message_status_check", + "value": "\"message\".\"status\" IN ('pending', 'success', 'error', 'paused')" + } + } + }, + "miniapp": { + "name": "miniapp", + "columns": { + "app_id": { + "name": "app_id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "url": { + "name": "url", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "logo": { + "name": "logo", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "type": { + "name": "type", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "'custom'" + }, + "status": { + "name": "status", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "'enabled'" + }, + "sort_order": { + "name": "sort_order", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "bordered": { + "name": "bordered", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": true + }, + "background": { + "name": "background", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "supported_regions": { + "name": "supported_regions", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "configuration": { + "name": "configuration", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "name_key": { + "name": "name_key", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": { + "miniapp_status_sort_idx": { + "name": "miniapp_status_sort_idx", + "columns": ["status", "sort_order"], + "isUnique": false + }, + "miniapp_type_idx": { + "name": "miniapp_type_idx", + "columns": ["type"], + "isUnique": false + }, + "miniapp_status_type_idx": { + "name": "miniapp_status_type_idx", + "columns": ["status", "type"], + "isUnique": false + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": { + "miniapp_status_check": { + "name": "miniapp_status_check", + "value": "\"miniapp\".\"status\" IN ('enabled', 'disabled', 'pinned')" + }, + "miniapp_type_check": { + "name": "miniapp_type_check", + "value": "\"miniapp\".\"type\" IN ('default', 'custom')" + } + } + }, + "preference": { + "name": "preference", + "columns": { + "scope": { + "name": "scope", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "'default'" + }, + "key": { + "name": "key", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "value": { + "name": "value", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": { + "preference_scope_key_pk": { + "columns": ["scope", "key"], + "name": "preference_scope_key_pk" + } + }, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "entity_tag": { + "name": "entity_tag", + "columns": { + "entity_type": { + "name": "entity_type", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "entity_id": { + "name": "entity_id", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "tag_id": { + "name": "tag_id", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": { + "entity_tag_tag_id_idx": { + "name": "entity_tag_tag_id_idx", + "columns": ["tag_id"], + "isUnique": false + } + }, + "foreignKeys": { + "entity_tag_tag_id_tag_id_fk": { + "name": "entity_tag_tag_id_tag_id_fk", + "tableFrom": "entity_tag", + "tableTo": "tag", + "columnsFrom": ["tag_id"], + "columnsTo": ["id"], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "entity_tag_entity_type_entity_id_tag_id_pk": { + "columns": ["entity_type", "entity_id", "tag_id"], + "name": "entity_tag_entity_type_entity_id_tag_id_pk" + } + }, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "tag": { + "name": "tag", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "color": { + "name": "color", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": { + "tag_name_unique": { + "name": "tag_name_unique", + "columns": ["name"], + "isUnique": true + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "topic": { + "name": "topic", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "is_name_manually_edited": { + "name": "is_name_manually_edited", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": false + }, + "assistant_id": { + "name": "assistant_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "assistant_meta": { + "name": "assistant_meta", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "prompt": { + "name": "prompt", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "active_node_id": { + "name": "active_node_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "group_id": { + "name": "group_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "sort_order": { + "name": "sort_order", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "is_pinned": { + "name": "is_pinned", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": false + }, + "pinned_order": { + "name": "pinned_order", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "deleted_at": { + "name": "deleted_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": { + "topic_group_updated_idx": { + "name": "topic_group_updated_idx", + "columns": ["group_id", "updated_at"], + "isUnique": false + }, + "topic_group_sort_idx": { + "name": "topic_group_sort_idx", + "columns": ["group_id", "sort_order"], + "isUnique": false + }, + "topic_updated_at_idx": { + "name": "topic_updated_at_idx", + "columns": ["updated_at"], + "isUnique": false + }, + "topic_is_pinned_idx": { + "name": "topic_is_pinned_idx", + "columns": ["is_pinned", "pinned_order"], + "isUnique": false + }, + "topic_assistant_id_idx": { + "name": "topic_assistant_id_idx", + "columns": ["assistant_id"], + "isUnique": false + } + }, + "foreignKeys": { + "topic_group_id_group_id_fk": { + "name": "topic_group_id_group_id_fk", + "tableFrom": "topic", + "tableTo": "group", + "columnsFrom": ["group_id"], + "columnsTo": ["id"], + "onDelete": "set null", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "translate_history": { + "name": "translate_history", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "source_text": { + "name": "source_text", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "target_text": { + "name": "target_text", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "source_language": { + "name": "source_language", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "target_language": { + "name": "target_language", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "star": { + "name": "star", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": { + "translate_history_created_at_idx": { + "name": "translate_history_created_at_idx", + "columns": ["created_at"], + "isUnique": false + }, + "translate_history_star_created_at_idx": { + "name": "translate_history_star_created_at_idx", + "columns": ["star", "created_at"], + "isUnique": false + } + }, + "foreignKeys": { + "translate_history_source_language_translate_language_lang_code_fk": { + "name": "translate_history_source_language_translate_language_lang_code_fk", + "tableFrom": "translate_history", + "tableTo": "translate_language", + "columnsFrom": ["source_language"], + "columnsTo": ["lang_code"], + "onDelete": "set null", + "onUpdate": "no action" + }, + "translate_history_target_language_translate_language_lang_code_fk": { + "name": "translate_history_target_language_translate_language_lang_code_fk", + "tableFrom": "translate_history", + "tableTo": "translate_language", + "columnsFrom": ["target_language"], + "columnsTo": ["lang_code"], + "onDelete": "set null", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "translate_language": { + "name": "translate_language", + "columns": { + "lang_code": { + "name": "lang_code", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "value": { + "name": "value", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "emoji": { + "name": "emoji", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "user_model": { + "name": "user_model", + "columns": { + "provider_id": { + "name": "provider_id", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "model_id": { + "name": "model_id", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "preset_model_id": { + "name": "preset_model_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "group": { + "name": "group", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "capabilities": { + "name": "capabilities", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "input_modalities": { + "name": "input_modalities", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "output_modalities": { + "name": "output_modalities", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "endpoint_types": { + "name": "endpoint_types", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "custom_endpoint_url": { + "name": "custom_endpoint_url", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "context_window": { + "name": "context_window", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "max_output_tokens": { + "name": "max_output_tokens", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "supports_streaming": { + "name": "supports_streaming", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "reasoning": { + "name": "reasoning", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "parameters": { + "name": "parameters", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "pricing": { + "name": "pricing", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "is_enabled": { + "name": "is_enabled", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": true + }, + "is_hidden": { + "name": "is_hidden", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": false + }, + "is_deprecated": { + "name": "is_deprecated", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": false + }, + "sort_order": { + "name": "sort_order", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "notes": { + "name": "notes", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "user_overrides": { + "name": "user_overrides", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": { + "user_model_preset_idx": { + "name": "user_model_preset_idx", + "columns": ["preset_model_id"], + "isUnique": false + }, + "user_model_provider_enabled_idx": { + "name": "user_model_provider_enabled_idx", + "columns": ["provider_id", "is_enabled"], + "isUnique": false + }, + "user_model_provider_sort_idx": { + "name": "user_model_provider_sort_idx", + "columns": ["provider_id", "sort_order"], + "isUnique": false + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": { + "user_model_provider_id_model_id_pk": { + "columns": ["provider_id", "model_id"], + "name": "user_model_provider_id_model_id_pk" + } + }, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "user_provider": { + "name": "user_provider", + "columns": { + "provider_id": { + "name": "provider_id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "preset_provider_id": { + "name": "preset_provider_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "endpoint_configs": { + "name": "endpoint_configs", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "default_chat_endpoint": { + "name": "default_chat_endpoint", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "api_keys": { + "name": "api_keys", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": "'[]'" + }, + "auth_config": { + "name": "auth_config", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "api_features": { + "name": "api_features", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "provider_settings": { + "name": "provider_settings", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "websites": { + "name": "websites", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "is_enabled": { + "name": "is_enabled", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": true + }, + "sort_order": { + "name": "sort_order", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": { + "user_provider_preset_idx": { + "name": "user_provider_preset_idx", + "columns": ["preset_provider_id"], + "isUnique": false + }, + "user_provider_enabled_sort_idx": { + "name": "user_provider_enabled_sort_idx", + "columns": ["is_enabled", "sort_order"], + "isUnique": false + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + } + }, + "views": {}, + "enums": {}, + "_meta": { + "schemas": {}, + "tables": {}, + "columns": {} + }, + "internal": { + "indexes": {} + } +} diff --git a/migrations/sqlite-drizzle/meta/_journal.json b/migrations/sqlite-drizzle/meta/_journal.json index 2a168c87f06..7f852a14995 100644 --- a/migrations/sqlite-drizzle/meta/_journal.json +++ b/migrations/sqlite-drizzle/meta/_journal.json @@ -53,7 +53,7 @@ { "idx": 7, "version": "6", - "when": 1774622546141, + "when": 1774697808333, "tag": "0007_steep_hedge_knight", "breakpoints": true }, @@ -63,6 +63,13 @@ "when": 1775137534101, "tag": "0008_wild_ultron", "breakpoints": true + }, + { + "idx": 9, + "version": "6", + "when": 1775543209395, + "tag": "0009_medical_giant_girl", + "breakpoints": true } ], "version": "7" diff --git a/package.json b/package.json index 7c455f0eaa3..b52cf49851f 100644 --- a/package.json +++ b/package.json @@ -94,6 +94,7 @@ "@napi-rs/system-ocr": "1.0.2", "@paymoapp/electron-shutdown-handler": "1.1.2", "cron-parser": "^5.0.8", + "drizzle-zod": "^0.8.3", "express": "5.1.0", "font-list": "2.0.0", "graceful-fs": "4.2.11", @@ -161,6 +162,7 @@ "@cherrystudio/embedjs-utils": "0.1.31", "@cherrystudio/extension-table-plus": "workspace:^", "@cherrystudio/openai": "6.15.0", + "@cherrystudio/provider-registry": "workspace:*", "@cherrystudio/ui": "workspace:*", "@codemirror/lang-json": "6.0.2", "@codemirror/lint": "6.9.5", @@ -263,8 +265,8 @@ "@types/react-dom": "^19.2.3", "@types/react-transition-group": "^4.4.12", "@types/react-window": "^1", - "@types/stream-json": "^1", "@types/semver": "^7.7.1", + "@types/stream-json": "^1", "@types/swagger-jsdoc": "^6", "@types/swagger-ui-express": "^4.1.8", "@types/tinycolor2": "^1", diff --git a/packages/provider-registry/README.md b/packages/provider-registry/README.md new file mode 100644 index 00000000000..ae6a307b1e8 --- /dev/null +++ b/packages/provider-registry/README.md @@ -0,0 +1,196 @@ +# Cherry Studio Catalog + +Comprehensive AI model catalog with provider information, pricing, capabilities, and automatic synchronization. + +## Quick Start + +### 1. Setup API Keys + +Most providers require API keys to list models: + +```bash +# Copy example file +cp .env.example .env + +# Edit .env and add your API keys +# OPENAI_API_KEY=sk-... +# GROQ_API_KEY=gsk_... +# DEEPSEEK_API_KEY=... +``` + +### 2. Sync Provider Models + +**Option A: Sync all providers (batch)** +```bash +npm run sync:all +``` + +**Option B: Import authoritative sources** +```bash +# OpenRouter (360+ models) +npm run import:openrouter + +# AIHubMix (600+ models) +npm run import:aihubmix +``` + +**Option C: Use Web UI** +```bash +cd web +npm run dev +# Open http://localhost:3000/providers +# Click "Sync" button on any provider +``` + +## Features + +### Provider Management +- ✅ 51 providers configured with API endpoints +- ✅ Automatic model discovery via `models_api` +- ✅ Support for multiple API formats (OpenAI, Anthropic, Gemini) +- ✅ Custom transformers for aggregators + +### Model Catalog +- ✅ 1000+ models from various providers +- ✅ Comprehensive metadata (pricing, capabilities, limits) +- ✅ Input/output modalities +- ✅ Case-insensitive model IDs + +### Override System +- ✅ Provider-specific model overrides +- ✅ Tracks all provider-supported models (even if identical) +- ✅ Smart merging (preserves manual edits) +- ✅ Priority system (auto < 100 < manual) +- ✅ Automatic deduplication + +### Synchronization +- ✅ Batch sync all providers +- ✅ Per-provider sync via Web UI +- ✅ API key management +- ✅ Rate limiting and error handling + +## Data Files + +``` +data/ +├── models.json # Base model catalog (authoritative) +├── providers.json # Provider configurations with models_api +└── overrides.json # Provider-specific model overrides +``` + +## Scripts + +| Command | Description | +|---------|-------------| +| `npm run sync:all` | Sync all providers (except OpenRouter/AIHubMix) | +| `npm run import:openrouter` | Import models from OpenRouter | +| `npm run import:aihubmix` | Import models from AIHubMix | +| `npm run build` | Build TypeScript package | +| `npm run test` | Run test suite | + +## Architecture + +### Transformers + +Transform provider API responses to internal format: + +- **OpenAI-compatible** (default): Standard `/v1/models` format +- **OpenRouter**: Custom aggregator format with advanced capabilities +- **AIHubMix**: CSV-based format with type/feature parsing + +### Data Flow + +``` +Provider API → Transformer → ModelConfig[] + ↓ + Compare with models.json + ↓ + ┌──────────────────┴─────────────────┐ + ↓ ↓ + New Model Existing Model + ↓ ↓ + Add to models.json Generate Override + ↓ + Merge with existing + ↓ + Save to overrides.json +``` + +## Documentation + +- [Sync Guide](./docs/SYNC_GUIDE.md) - Detailed synchronization documentation +- [Schema Documentation](./src/schemas/README.md) - Data schemas and validation + +## Development + +### Prerequisites + +- Node.js 18+ +- Yarn 4+ + +### Setup + +```bash +# Install dependencies +yarn install + +# Run tests +npm run test + +# Build package +npm run build + +# Watch mode +npm run dev +``` + +### Adding a Provider + +1. Add provider config to `data/providers.json`: +```json +{ + "id": "new-provider", + "name": "New Provider", + "models_api": { + "endpoints": [ + { + "url": "https://api.provider.com/v1/models", + "endpoint_type": "CHAT_COMPLETIONS", + "format": "OPENAI" + } + ], + "enabled": true, + "update_frequency": "daily" + } +} +``` + +2. Add API key mapping in `scripts/sync-all-providers.ts`: +```typescript +const PROVIDER_ENV_MAP: Record = { + // ... + 'new-provider': 'NEW_PROVIDER_API_KEY' +} +``` + +3. Add to `.env.example`: +```bash +NEW_PROVIDER_API_KEY= +``` + +4. Run sync: +```bash +npm run sync:all +``` + +### Adding a Custom Transformer + +See [Transformers Guide](./docs/SYNC_GUIDE.md#transformers) for details. + +## License + +MIT + +## Contributing + +Contributions welcome! Please read the [Sync Guide](./docs/SYNC_GUIDE.md) first. diff --git a/packages/provider-registry/data/models.json b/packages/provider-registry/data/models.json new file mode 100644 index 00000000000..76b864bd8a3 --- /dev/null +++ b/packages/provider-registry/data/models.json @@ -0,0 +1,37631 @@ +{ + "version": "2026.03.09", + "models": [ + { + "id": "multilingual-e5-large-instruct", + "name": "E5 Multi-Lingual Large Embeddings 0.6B", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 512, + "maxOutputTokens": 512, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.12 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.12 + } + }, + "family": "text-embedding", + "ownedBy": "evroc", + "openWeights": true + }, + { + "id": "kimi-k2-5", + "name": "Kimi K2.5", + "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.", + "capabilities": ["reasoning", "function-call", "image-recognition", "video-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 262144, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.47 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5.9 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.1 + } + }, + "reasoning": { + "supportedEfforts": ["none", "auto"] + }, + "family": "kimi", + "ownedBy": "moonshot", + "openWeights": true + }, + { + "id": "llama-3-3-instruct-fp8", + "name": "Llama 3.3 70B", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.18 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.18 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "kb-whisper-large", + "name": "KB Whisper", + "capabilities": ["audio-recognition", "audio-transcript"], + "inputModalities": ["audio"], + "outputModalities": ["text"], + "contextWindow": 448, + "maxOutputTokens": 448, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.00236 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.00236 + } + }, + "family": "whisper", + "ownedBy": "evroc", + "openWeights": true + }, + { + "id": "phi-4-multimodal-instruct", + "name": "Phi-4 15B", + "description": "Microsoft's latest model", + "capabilities": ["function-call", "image-generation"], + "inputModalities": ["text"], + "outputModalities": ["text", "image"], + "contextWindow": 32000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.24 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.47 + } + }, + "family": "phi", + "ownedBy": "microsoft", + "openWeights": true + }, + { + "id": "whisper-large-v3", + "name": "Whisper 3 Large", + "capabilities": ["audio-recognition", "audio-transcript"], + "inputModalities": ["audio"], + "outputModalities": ["text"], + "contextWindow": 448, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.00236 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.00236 + } + }, + "family": "whisper", + "ownedBy": "openai", + "openWeights": true + }, + { + "id": "gpt-oss", + "name": "GPT OSS 120B", + "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 65536, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.24 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.94 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.005 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "family": "gpt-oss", + "ownedBy": "openai", + "openWeights": true + }, + { + "id": "voxtral-small-2507", + "name": "Voxtral Small 24B", + "description": "Voxtral Small is an enhancement of Mistral Small 3, incorporating state-of-the-art audio input capabilities while retaining best-in-class text performance. It excels at speech transcription, translation and audio understanding. Input audio is priced at $100 per million seconds.", + "capabilities": ["audio-recognition"], + "inputModalities": ["audio", "text"], + "outputModalities": ["text"], + "contextWindow": 32000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.00236 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.00236 + } + }, + "family": "voxtral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "magistral-small-2509", + "name": "Magistral Small 1.2 24B", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.59 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.36 + } + }, + "family": "magistral-small", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "devstral-small-2-instruct-2512", + "name": "Devstral Small 2 24B Instruct 2512", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.12 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.47 + } + }, + "family": "devstral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "qwen3-embedding", + "name": "Qwen3 Embedding 8B", + "description": "The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. This series inherits the exceptional multilingual capabilities, long-text understanding, and reasoning skills of its foundational model. The Qwen3 Embedding series represents significant advancements in multiple text embedding and ranking tasks, including text retrieval, code retrieval, text classification, text clustering, and bitext mining.", + "capabilities": ["function-call", "embedding", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 40960, + "maxOutputTokens": 40960, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.12 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.12 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "family": "text-embedding", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen3-vl-a3b-instruct", + "name": "Qwen3 VL 30B", + "description": "Qwen3-VL-30B-A3B-Instruct is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Instruct variant optimizes instruction-following for general multimodal tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", + "capabilities": ["function-call", "image-recognition", "video-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 100000, + "maxOutputTokens": 100000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.24 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.94 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen3-a3b-instruct-2507-fp8", + "name": "Qwen3 30B 2507", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 64000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.35 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.42 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "gemma-3", + "name": "Gemma 3 27B", + "capabilities": ["function-call", "structured-output", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "family": "gemma", + "ownedBy": "google", + "openWeights": true + }, + { + "id": "qwen3-coder-a3b", + "name": "Qwen3-Coder 30B-A3B", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.07 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.27 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "step-2-16k", + "name": "Step 2 (16K)", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 16384, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5.21 + }, + "output": { + "currency": "USD", + "perMillionTokens": 16.44 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.04 + } + }, + "ownedBy": "stepfun", + "openWeights": false + }, + { + "id": "step-1-32k", + "name": "Step 1 (32K)", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.05 + }, + "output": { + "currency": "USD", + "perMillionTokens": 9.59 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.41 + } + }, + "ownedBy": "stepfun", + "openWeights": false + }, + { + "id": "step-3-5-flash", + "name": "Step 3.5 Flash", + "description": "Step 3.5 Flash is StepFun's most capable open-source foundation model. Built on a sparse Mixture of Experts (MoE) architecture, it selectively activates only 11B of its 196B parameters per token. It is a reasoning model that is incredibly speed efficient even at long contexts.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 256000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.096 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.288 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.019 + } + }, + "family": "step", + "ownedBy": "stepfun", + "openWeights": true + }, + { + "id": "kimi-k2-0905-preview", + "name": "Kimi K2 0905", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 262144, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.15 + } + }, + "family": "kimi", + "ownedBy": "moonshot", + "openWeights": true + }, + { + "id": "kimi-k2", + "name": "Kimi K2 Thinking", + "description": "Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. Kimi K2 excels across a broad range of benchmarks, particularly in coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) tasks. It supports long-context inference up to 128K tokens and is designed with a novel training stack that includes the MuonClip optimizer for stable large-scale MoE training.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 262144, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.15 + } + }, + "family": "kimi-thinking", + "ownedBy": "moonshot", + "openWeights": true + }, + { + "id": "kimi-k2-0711-preview", + "name": "Kimi K2 0711", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.15 + } + }, + "family": "kimi", + "ownedBy": "moonshot", + "openWeights": true + }, + { + "id": "kimi-k2-turbo-preview", + "name": "Kimi K2 Turbo", + "description": "The kimi-k2-turbo-preview model is a high-speed version of kimi-k2, with the same model parameters as kimi-k2, but the output speed has been increased from 10 tokens per second to 40 tokens per second.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 262144, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "family": "kimi", + "ownedBy": "moonshot", + "openWeights": true + }, + { + "id": "kimi-k2-thinking-turbo", + "name": "Kimi K2 Thinking Turbo", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 262144, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 8 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.15 + } + }, + "reasoning": { + "supportedEfforts": [] + }, + "family": "kimi-thinking", + "ownedBy": "moonshot", + "openWeights": true + }, + { + "id": "gemini-3-flash-preview", + "name": "Gemini 3 Flash Preview", + "description": "Gemini 3 Flash Preview is a high speed, high value thinking model designed for agentic workflows, multi turn chat, and coding assistance. It delivers near Pro level reasoning and tool use performance with substantially lower latency than larger Gemini variants, making it well suited for interactive development, long running agent loops, and collaborative coding tasks. Compared to Gemini 2.5 Flash, it provides broad quality improvements across reasoning, multimodal understanding, and reliability.\n\nThe model supports a 1M token context window and multimodal inputs including text, images, audio, video, and PDFs, with text output. It includes configurable reasoning via thinking levels (minimal, low, medium, high), structured output, tool use, and automatic context caching. Gemini 3 Flash Preview is optimized for users who want strong reasoning and agentic behavior without the cost or latency of full scale frontier models.", + "capabilities": [ + "reasoning", + "function-call", + "structured-output", + "file-input", + "image-recognition", + "audio-recognition", + "video-recognition", + "web-search" + ], + "inputModalities": ["text", "image", "video", "audio"], + "outputModalities": ["text"], + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 24576 + }, + "interleaved": true + }, + "family": "gemini-flash", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "claude-haiku-4-5", + "name": "Claude Haiku 4.5", + "description": "Claude Haiku 4.5 is Anthropic’s fastest and most efficient model, delivering near-frontier intelligence at a fraction of the cost and latency of larger Claude models. Matching Claude Sonnet 4’s performance across reasoning, coding, and computer-use tasks, Haiku 4.5 brings frontier-level capability to real-time and high-volume applications.\n\nIt introduces extended thinking to the Haiku line; enabling controllable reasoning depth, summarized or interleaved thought output, and tool-assisted workflows with full support for coding, bash, web search, and computer-use tools. Scoring >73% on SWE-bench Verified, Haiku 4.5 ranks among the world’s best coding models while maintaining exceptional responsiveness for sub-agents, parallelized execution, and scaled deployment.", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "cacheWrite": { + "currency": "USD", + "perMillionTokens": 1.375 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + }, + "interleaved": true + }, + "family": "claude-haiku", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-sonnet-4-6", + "name": "Claude Sonnet 4.6", + "description": "Sonnet 4.6 is Anthropic's most capable Sonnet-class model yet, with frontier performance across coding, agents, and professional work. It excels at iterative development, complex codebase navigation, end-to-end project management with memory, polished document creation, and confident computer use for web QA and workflow automation.", + "capabilities": ["function-call", "file-input", "image-recognition", "reasoning", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "cacheWrite": { + "currency": "USD", + "perMillionTokens": 3.75 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + }, + "interleaved": true + }, + "family": "claude-sonnet", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-opus-4-6", + "name": "Claude Opus 4.6", + "description": "Opus 4.6 is Anthropic’s strongest model for coding and long-running professional tasks. It is built for agents that operate across entire workflows rather than single prompts, making it especially effective for large codebases, complex refactors, and multi-step debugging that unfolds over time. The model shows deeper contextual understanding, stronger problem decomposition, and greater reliability on hard engineering tasks than prior generations.\n\nBeyond coding, Opus 4.6 excels at sustained knowledge work. It produces near-production-ready documents, plans, and analyses in a single pass, and maintains coherence across very long outputs and extended sessions. This makes it a strong default for tasks that require persistence, judgment, and follow-through, such as technical design, migration planning, and end-to-end project execution.\n\nFor users upgrading from earlier Opus versions, see our [official migration guide here](https://openrouter.ai/docs/guides/guides/model-migrations/claude-4-6-opus)\n", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "cacheWrite": { + "currency": "USD", + "perMillionTokens": 6.25 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 32000 + }, + "interleaved": true + }, + "family": "claude-opus", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-opus-4-5", + "name": "Claude Opus 4.5", + "description": "Claude Opus 4.5 is Anthropic’s frontier reasoning model optimized for complex software engineering, agentic workflows, and long-horizon computer use. It offers strong multimodal capabilities, competitive performance across real-world coding and reasoning benchmarks, and improved robustness to prompt injection. The model is designed to operate efficiently across varied effort levels, enabling developers to trade off speed, depth, and token usage depending on task requirements. It comes with a new parameter to control token efficiency, which can be accessed using the OpenRouter Verbosity parameter with low, medium, or high.\n\nOpus 4.5 supports advanced tool use, extended context management, and coordinated multi-agent setups, making it well-suited for autonomous research, debugging, multi-step planning, and spreadsheet/browser manipulation. It delivers substantial gains in structured reasoning, execution reliability, and alignment compared to prior Opus generations, while reducing token overhead and improving performance on long-running tasks.", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "cacheWrite": { + "currency": "USD", + "perMillionTokens": 6.25 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + }, + "interleaved": true + }, + "family": "claude-opus", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "gemini-3-pro-preview", + "name": "Gemini 3 Pro Preview", + "description": "Gemini 3 Pro is Google’s flagship frontier model for high-precision multimodal reasoning, combining strong performance across text, image, video, audio, and code with a 1M-token context window. Reasoning Details must be preserved when using multi-turn tool calling, see our docs here: https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks. It delivers state-of-the-art benchmark results in general reasoning, STEM problem solving, factual QA, and multimodal understanding, including leading scores on LMArena, GPQA Diamond, MathArena Apex, MMMU-Pro, and Video-MMMU. Interactions emphasize depth and interpretability: the model is designed to infer intent with minimal prompting and produce direct, insight-focused responses.\n\nBuilt for advanced development and agentic workflows, Gemini 3 Pro provides robust tool-calling, long-horizon planning stability, and strong zero-shot generation for complex UI, visualization, and coding tasks. It excels at agentic coding (SWE-Bench Verified, Terminal-Bench 2.0), multimodal analysis, and structured long-form tasks such as research synthesis, planning, and interactive learning experiences. Suitable applications include autonomous agents, coding assistants, multimodal analytics, scientific reasoning, and high-context information processing.", + "capabilities": [ + "reasoning", + "function-call", + "structured-output", + "file-input", + "image-recognition", + "audio-recognition", + "video-recognition", + "web-search" + ], + "inputModalities": ["text", "image", "video", "audio"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 12 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "reasoning": { + "supportedEfforts": ["low", "high"], + "thinkingTokenLimits": { + "min": 128, + "max": 32768 + }, + "interleaved": true + }, + "family": "gemini-pro", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "gemini-2-5-flash", + "name": "Gemini 2.5 Flash", + "description": "Gemini 2.5 Flash is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in \"thinking\" capabilities, enabling it to provide responses with greater accuracy and nuanced context handling. \n\nAdditionally, Gemini 2.5 Flash is configurable through the \"max tokens for reasoning\" parameter, as described in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning).", + "capabilities": [ + "reasoning", + "function-call", + "file-input", + "image-recognition", + "audio-recognition", + "video-recognition", + "web-search" + ], + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text"], + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.075 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 24576 + } + }, + "family": "gemini-flash", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "claude-sonnet-4-5", + "name": "Claude Sonnet 4.5", + "description": "Claude Sonnet 4.5 is Anthropic’s most advanced Sonnet model to date, optimized for real-world agents and coding workflows. It delivers state-of-the-art performance on coding benchmarks such as SWE-bench Verified, with improvements across system design, code security, and specification adherence. The model is designed for extended autonomous operation, maintaining task continuity across sessions and providing fact-based progress tracking.\n\nSonnet 4.5 also introduces stronger agentic capabilities, including improved tool orchestration, speculative parallel execution, and more efficient context and memory management. With enhanced context tracking and awareness of token usage across tool calls, it is particularly well-suited for multi-context and long-running workflows. Use cases span software engineering, cybersecurity, financial analysis, research agents, and other domains requiring sustained reasoning and tool use.", + "capabilities": ["function-call", "file-input", "image-recognition", "reasoning", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "cacheWrite": { + "currency": "USD", + "perMillionTokens": 4.125 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + }, + "interleaved": true + }, + "family": "claude-sonnet", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "gpt-5-nano", + "name": "GPT-5 Nano", + "description": "GPT-5-Nano is the smallest and fastest variant in the GPT-5 system, optimized for developer tools, rapid interactions, and ultra-low latency environments. While limited in reasoning depth compared to its larger counterparts, it retains key instruction-following and safety features. It is the successor to GPT-4.1-nano and offers a lightweight option for cost-sensitive or real-time applications.", + "capabilities": [ + "reasoning", + "function-call", + "structured-output", + "file-input", + "image-recognition", + "web-search" + ], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 400000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.05 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.01 + } + }, + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "family": "gpt-nano", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-4o", + "name": "GPT-4o", + "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)\n\n#multimodal", + "capabilities": ["function-call", "structured-output", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.25 + } + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-5-mini", + "name": "GPT-5 Mini", + "description": "GPT-5 Mini is a compact version of GPT-5, designed to handle lighter-weight reasoning tasks. It provides the same instruction-following and safety-tuning benefits as GPT-5, but with reduced latency and cost. GPT-5 Mini is the successor to OpenAI's o4-mini model.", + "capabilities": [ + "reasoning", + "function-call", + "structured-output", + "file-input", + "image-recognition", + "web-search" + ], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 400000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.03 + } + }, + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "family": "gpt-mini", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gemini-2-5-pro", + "name": "Gemini 2.5 Pro", + "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.", + "capabilities": [ + "reasoning", + "function-call", + "file-input", + "image-recognition", + "audio-recognition", + "video-recognition", + "web-search" + ], + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text"], + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.31 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"], + "thinkingTokenLimits": { + "min": 128, + "max": 32768 + } + }, + "family": "gemini-pro", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "gpt-5", + "name": "GPT-5", + "description": "GPT-5 is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.", + "capabilities": [ + "reasoning", + "function-call", + "structured-output", + "file-input", + "image-recognition", + "web-search" + ], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 400000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.13 + } + }, + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-5-2", + "name": "GPT-5.2", + "description": "GPT-5.2 is the latest frontier-grade model in the GPT-5 series, offering stronger agentic and long context perfomance compared to GPT-5.1. It uses adaptive reasoning to allocate computation dynamically, responding quickly to simple queries while spending more depth on complex tasks.\n\nBuilt for broad task coverage, GPT-5.2 delivers consistent gains across math, coding, sciende, and tool calling workloads, with more coherent long-form answers and improved tool-use reliability.", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 400000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.75 + }, + "output": { + "currency": "USD", + "perMillionTokens": 14 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.175 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high", "max"] + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "nova-2-lite-v1", + "name": "Nova 2 Lite", + "description": "Nova 2 Lite is a fast, cost-effective reasoning model for everyday workloads that can process text, images, and videos to generate text. \n\nNova 2 Lite demonstrates standout capabilities in processing documents, extracting information from videos, generating code, providing accurate grounded answers, and automating multi-step agentic workflows.", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "video-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.5 + } + }, + "family": "nova-lite", + "ownedBy": "amazon", + "openWeights": false + }, + { + "id": "nova-2-pro-v1", + "name": "Nova 2 Pro", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "video-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 64000, + "family": "nova-pro", + "ownedBy": "amazon", + "openWeights": false + }, + { + "id": "nexus-coder", + "name": "LucidQuery Nexus Coder", + "capabilities": ["reasoning", "function-call", "file-input"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 250000, + "maxOutputTokens": 60000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + } + }, + "family": "lucid", + "ownedBy": "lucidquery", + "openWeights": false + }, + { + "id": "rf1", + "name": "LucidNova RF1 100B", + "capabilities": ["reasoning", "function-call", "file-input"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 120000, + "maxOutputTokens": 8000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + } + }, + "family": "nova", + "ownedBy": "lucidquery", + "openWeights": false + }, + { + "id": "gpt-4-1-nano", + "name": "gpt-4.1-nano", + "description": "For tasks that demand low latency, GPT‑4.1 nano is the fastest and cheapest model in the GPT-4.1 series. It delivers exceptional performance at a small size with its 1 million token context window, and scores 80.1% on MMLU, 50.3% on GPQA, and 9.8% on Aider polyglot coding – even higher than GPT‑4o mini. It’s ideal for tasks like classification or autocompletion.", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.03 + } + }, + "family": "gpt-nano", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "doubao-seed-code-preview-251028", + "name": "doubao-seed-code-preview-251028", + "capabilities": ["function-call", "file-input", "image-recognition", "reasoning"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.17 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.14 + } + }, + "reasoning": { + "supportedEfforts": ["none", "high"] + }, + "ownedBy": "bytedance", + "openWeights": false + }, + { + "id": "glm-4-7", + "name": "glm-4.7", + "description": "GLM-4.7 is Z.ai’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.286 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.142 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.08 + } + }, + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "family": "glm", + "ownedBy": "zhipu", + "openWeights": false + }, + { + "id": "grok-4-fast-non-reasoning", + "name": "grok-4-fast-non-reasoning", + "description": "Grok-4-fast is a cost-effective inference model developed by xAI that delivers cutting-edge performance with excellent token efficiency. The model features a 2 million token context window, advanced Web and X search capabilities, and a unified architecture supporting both \"inference\" and \"non-inference\" modes. Compared to Grok 4, it reduces thinking tokens by an average of 40% and lowers the price by 98% while achieving the same performance.", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 2000000, + "maxOutputTokens": 30000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "qwen3-a22b-instruct-2507", + "name": "qwen3-235b-a22b-instruct-2507", + "description": "Qwen3-235B-A22B-Instruct-2507", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.29 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.143 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.04 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "grok-4-fast-reasoning", + "name": "grok-4-fast-reasoning", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 2000000, + "maxOutputTokens": 30000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "reasoning": { + "supportedEfforts": [] + }, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "mistral-large-2512", + "name": "mistral-large-2512", + "description": "Mistral Large 3 2512 is Mistral’s most capable model to date, featuring a sparse mixture-of-experts architecture with 41B active parameters (675B total), and released under the Apache 2.0 license.", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 262144, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.3 + } + }, + "family": "mistral-large", + "ownedBy": "mistral", + "openWeights": false + }, + { + "id": "minimax-m2", + "name": "MiniMax-M2", + "description": "MiniMax-M2 is a compact, high-efficiency large language model optimized for end-to-end coding and agentic workflows. With 10 billion activated parameters (230 billion total), it delivers near-frontier intelligence across general reasoning, tool use, and multi-step task execution while maintaining low latency and deployment efficiency.\n\nThe model excels in code generation, multi-file editing, compile-run-fix loops, and test-validated repair, showing strong results on SWE-Bench Verified, Multi-SWE-Bench, and Terminal-Bench. It also performs competitively in agentic evaluations such as BrowseComp and GAIA, effectively handling long-horizon planning, retrieval, and recovery from execution errors.\n\nBenchmarked by [Artificial Analysis](https://artificialanalysis.ai/models/minimax-m2), MiniMax-M2 ranks among the top open-source models for composite intelligence, spanning mathematics, science, and instruction-following. Its small activation footprint enables fast inference, high concurrency, and improved unit economics, making it well-suited for large-scale agents, developer assistants, and reasoning-driven applications that require responsiveness and cost efficiency.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.33 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.32 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.03 + } + }, + "reasoning": { + "supportedEfforts": [], + "interleaved": true + }, + "family": "minimax", + "ownedBy": "minimax", + "openWeights": false + }, + { + "id": "grok-4-1-fast-reasoning", + "name": "grok-4-1-fast-reasoning", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 2000000, + "maxOutputTokens": 30000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "reasoning": { + "supportedEfforts": [] + }, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "deepseek-v3-2", + "name": "DeepSeek-V3.2-Thinking", + "description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism that reduces training and inference cost while preserving quality in long-context scenarios. A scalable reinforcement learning post-training framework further improves reasoning, with reported performance in the GPT-5 class, and the model has demonstrated gold-medal results on the 2025 IMO and IOI. V3.2 also uses a large-scale agentic task synthesis pipeline to better integrate reasoning into tool-use settings, boosting compliance and generalization in interactive environments.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.29 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.43 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.03 + } + }, + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "family": "deepseek", + "ownedBy": "deepseek", + "openWeights": false + }, + { + "id": "ministral-2512", + "name": "ministral-14b-2512", + "description": "The largest model in the Ministral 3 family, Ministral 3 14B offers frontier capabilities and performance comparable to its larger Mistral Small 3.2 24B counterpart. A powerful and efficient language model with vision capabilities.", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.33 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.33 + } + }, + "ownedBy": "mistral", + "openWeights": false + }, + { + "id": "glm-4-5", + "name": "GLM-4.5", + "description": "GLM-4.5 is our latest flagship foundation model, purpose-built for agent-based applications. It leverages a Mixture-of-Experts (MoE) architecture and supports a context length of up to 128k tokens. GLM-4.5 delivers significantly enhanced capabilities in reasoning, code generation, and agent alignment. It supports a hybrid inference mode with two options, a \"thinking mode\" designed for complex reasoning and tool use, and a \"non-thinking mode\" optimized for instant responses. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 98304, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.286 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.142 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.06 + } + }, + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "family": "glm", + "ownedBy": "zhipu", + "openWeights": false + }, + { + "id": "gemini-2-5-flash-image", + "name": "gemini-2.5-flash-image", + "description": "Gemini 2.5 Flash Image, a.k.a. \"Nano Banana,\" is now generally available. It is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations. Aspect ratios can be controlled with the [image_config API Parameter](https://openrouter.ai/docs/features/multimodal/image-generation#image-aspect-ratio-configuration)", + "capabilities": [ + "file-input", + "image-recognition", + "function-call", + "image-generation", + "web-search", + "reasoning" + ], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 30 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.075 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 24576 + } + }, + "family": "gemini-flash", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "deepseek-chat", + "name": "Deepseek-Chat", + "description": "DeepSeek-V3 is the latest model from the DeepSeek team, building upon the instruction following and coding abilities of the previous versions. Pre-trained on nearly 15 trillion tokens, the reported evaluations reveal that the model outperforms other open-source models and rivals leading closed-source models.\n\nFor model details, please visit [the DeepSeek-V3 repo](https://github.com/deepseek-ai/DeepSeek-V3) for more information, or see the [launch announcement](https://api-docs.deepseek.com/news/news1226).", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.29 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.43 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.028 + } + }, + "reasoning": { + "supportedEfforts": ["none"] + }, + "family": "deepseek", + "ownedBy": "deepseek", + "openWeights": false + }, + { + "id": "gpt-4-1-mini", + "name": "gpt-4.1-mini", + "description": "GPT-4.1 Mini is a mid-sized model delivering performance competitive with GPT-4o at substantially lower latency and cost. It retains a 1 million token context window and scores 45.1% on hard instruction evals, 35.8% on MultiChallenge, and 84.1% on IFEval. Mini also shows strong coding ability (e.g., 31.6% on Aider’s polyglot diff benchmark) and vision understanding, making it suitable for interactive applications with tight performance constraints.", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.6 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.1 + } + }, + "family": "gpt-mini", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gemini-2-0-flash-lite", + "name": "gemini-2.0-flash-lite", + "description": "Gemini-2.0-flash Lightweight Official Version", + "capabilities": ["file-input", "image-recognition", "function-call", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 2000000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.075 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.076 + } + }, + "family": "gemini-flash-lite", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "claude-sonnet-4-5-20250929", + "name": "claude-sonnet-4-5-20250929-thinking", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.30000000000000004 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-sonnet", + "ownedBy": "anthropic", + "openWeights": false, + "alias": ["claude-sonnet-4-5"] + }, + { + "id": "claude-opus-4-5-20251101", + "name": "claude-opus-4-5-20251101", + "capabilities": ["function-call", "file-input", "image-recognition", "reasoning", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-opus", + "ownedBy": "anthropic", + "openWeights": false, + "alias": ["claude-opus-4-5"] + }, + { + "id": "qwen3-max-2025-09-23", + "name": "qwen3-max-2025-09-23", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 258048, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.86 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.43 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "grok-4-1", + "name": "grok-4.1", + "capabilities": ["function-call", "file-input", "image-recognition", "reasoning", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + }, + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "chatgpt-4o-latest", + "name": "chatgpt-4o-latest", + "description": "This model will point to the latest GPT-4o model used by ChatGPT.", + "capabilities": ["file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 2.5 + } + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-5-2-chat-latest", + "name": "gpt-5.2-chat-latest", + "description": "GPT-5.2Chat refers to the GPT-5.2 snapshot currently used in ChatGPT and is optimized for conversational use cases. While GPT-5.2 is recommended for most API applications, GPT-5.2Chat is ideal for testing the latest improvements in chat-based interactions.", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search", "reasoning"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.75 + }, + "output": { + "currency": "USD", + "perMillionTokens": 14 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.175 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high", "max"] + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-5-1", + "name": "gpt-5.1", + "description": "GPT-5.1 is the latest frontier-grade model in the GPT-5 series, offering stronger general-purpose reasoning, improved instruction adherence, and a more natural conversational style compared to GPT-5. It uses adaptive reasoning to allocate computation dynamically, responding quickly to simple queries while spending more depth on complex tasks. The model produces clearer, more grounded explanations with reduced jargon, making it easier to follow even on technical or multi-step problems.\n\nBuilt for broad task coverage, GPT-5.1 delivers consistent gains across math, coding, and structured analysis workloads, with more coherent long-form answers and improved tool-use reliability. It also features refined conversational alignment, enabling warmer, more intuitive responses without compromising precision. GPT-5.1 serves as the primary full-capability successor to GPT-5", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search", "reasoning"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 400000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.13 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"] + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "doubao-seed-1-6-vision-250815", + "name": "doubao-seed-1-6-vision-250815", + "description": "Doubao-Seed-1.6-vision is a visual deep-thinking model that demonstrates stronger general multimodal understanding and reasoning capabilities in scenarios such as education, image moderation, inspection and security, and AI search Q&A. It supports a 256K context window and an output length of up to 64K tokens.", + "capabilities": ["function-call", "file-input", "image-recognition", "reasoning"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.114 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.143 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.021918 + } + }, + "ownedBy": "bytedance", + "openWeights": false + }, + { + "id": "qwen-flash", + "name": "Qwen-Flash", + "description": "The model adopts tiered pricing.", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.022 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.22 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.02 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "gemini-3-pro-image-preview", + "name": "gemini-3-pro-image-preview", + "description": "Nano Banana Pro is Google’s most advanced image-generation and editing model, built on Gemini 3 Pro. It extends the original Nano Banana with significantly improved multimodal reasoning, real-world grounding, and high-fidelity visual synthesis. The model generates context-rich graphics, from infographics and diagrams to cinematic composites, and can incorporate real-time information via Search grounding.\n\nIt offers industry-leading text rendering in images (including long passages and multilingual layouts), consistent multi-image blending, and accurate identity preservation across up to five subjects. Nano Banana Pro adds fine-grained creative controls such as localized edits, lighting and focus adjustments, camera transformations, and support for 2K/4K outputs and flexible aspect ratios. It is designed for professional-grade design, product visualization, storyboarding, and complex multi-element compositions while remaining efficient for general image creation workflows.", + "capabilities": ["file-input", "image-recognition", "function-call", "image-generation", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 120 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + } + }, + "ownedBy": "google", + "openWeights": false + }, + { + "id": "doubao-seed-1-6-thinking-250715", + "name": "doubao-seed-1-6-thinking-250715", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 16000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.121 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.21 + } + }, + "ownedBy": "bytedance", + "openWeights": false + }, + { + "id": "gpt-4-1", + "name": "gpt-4.1", + "description": "GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning. It supports a 1 million token context window and outperforms GPT-4o and GPT-4.5 across coding (54.6% SWE-bench Verified), instruction compliance (87.4% IFEval), and multimodal understanding benchmarks. It is tuned for precise code diffs, agent reliability, and high recall in large document contexts, making it ideal for agents, IDE tooling, and enterprise knowledge retrieval.", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 8 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "qwen-plus", + "name": "Qwen-Plus", + "description": "Qwen-Plus, based on the Qwen2.5 foundation model, is a 131K context model with a balanced performance, speed, and cost combination.", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.12 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.08 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "glm-4-5v", + "name": "GLM-4.5V", + "description": "GLM-4.5V is a vision-language foundation model for multimodal agent applications. Built on a Mixture-of-Experts (MoE) architecture with 106B parameters and 12B activated parameters, it achieves state-of-the-art results in video understanding, image Q&A, OCR, and document parsing, with strong gains in front-end web coding, grounding, and spatial reasoning. It offers a hybrid inference mode: a \"thinking mode\" for deep reasoning and a \"non-thinking mode\" for fast responses. Reasoning behavior can be toggled via the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "capabilities": ["function-call", "file-input", "image-recognition", "reasoning"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 64000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.29 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.86 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.11 + } + }, + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "family": "glm", + "ownedBy": "zhipu", + "openWeights": false + }, + { + "id": "glm-4-6", + "name": "glm-4.6", + "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.286 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.142 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.11 + } + }, + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "family": "glm", + "ownedBy": "zhipu", + "openWeights": false + }, + { + "id": "gemini-2-5-flash-preview-09-2025", + "name": "gemini-2.5-flash-preview-09-2025", + "description": "This latest 2.5 Flash model comes with improvements in two key areas we heard consistent feedback on:\n\nBetter agentic tool use: We've improved how the model uses tools, leading to better performance in more complex, agentic and multi-step applications. This model shows noticeable improvements on key agentic benchmarks, including a 5% gain on SWE-Bench Verified, compared to our last release (48.9% → 54%). More efficient: With thinking on, the model is now significantly more cost-efficient—achieving higher quality outputs while using fewer tokens, reducing latency and cost (see charts above).", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search", "reasoning"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.03 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 24576 + } + }, + "family": "gemini-flash", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "claude-opus-4-1-20250805", + "name": "claude-opus-4-1-20250805-thinking", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 75 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 32000 + } + }, + "family": "claude-opus", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "qwen3-a22b", + "name": "Qwen3-235B-A22B", + "description": "Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass. It supports seamless switching between a \"thinking\" mode for complex reasoning, math, and code tasks, and a \"non-thinking\" mode for general conversational efficiency. The model demonstrates strong reasoning ability, multilingual support (100+ languages and dialects), advanced instruction-following, and agent tool-calling capabilities. It natively handles a 32K token context window and extends up to 131K tokens using YaRN-based scaling.", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.29 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.86 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.15 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + }, + "interleaved": true + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "minimax-m1", + "name": "MiniMax-M1", + "description": "MiniMax-M1 is a large-scale, open-weight reasoning model designed for extended context and high-efficiency inference. It leverages a hybrid Mixture-of-Experts (MoE) architecture paired with a custom \"lightning attention\" mechanism, allowing it to process long sequences—up to 1 million tokens—while maintaining competitive FLOP efficiency. With 456 billion total parameters and 45.9B active per token, this variant is optimized for complex, multi-step reasoning tasks.\n\nTrained via a custom reinforcement learning pipeline (CISPO), M1 excels in long-context understanding, software engineering, agentic tool use, and mathematical reasoning. Benchmarks show strong performance across FullStackBench, SWE-bench, MATH, GPQA, and TAU-Bench, often outperforming other open models like DeepSeek R1 and Qwen3-235B.", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.132 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.254 + } + }, + "family": "minimax", + "ownedBy": "minimax", + "openWeights": false + }, + { + "id": "deepseek-reasoner", + "name": "Deepseek-Reasoner", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.29 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.43 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.028 + } + }, + "family": "deepseek-thinking", + "ownedBy": "deepseek", + "openWeights": false + }, + { + "id": "qwen-max-latest", + "name": "Qwen-Max-Latest", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.343 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.372 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen3-coder-a35b-instruct", + "name": "qwen3-coder-480b-a35b-instruct", + "description": "The code generation model based on Qwen3 has powerful Coding Agent capabilities, achieving state-of-the-art performance compared to open-source models.The model adopts tiered pricing.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.86 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.43 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.82 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen3-a3b", + "name": "Qwen3-30B-A3B", + "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.11 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.08 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.03 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + }, + "interleaved": true + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "minimax-m2-1", + "name": "MiniMax-M2.1", + "description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.\n\nCompared to its predecessor, M2.1 delivers cleaner, more concise outputs and faster perceived response times. It shows leading multilingual coding performance across major systems and application languages, achieving 49.4% on Multi-SWE-Bench and 72.5% on SWE-Bench Multilingual, and serves as a versatile agent “brain” for IDEs, coding tools, and general-purpose assistance.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.03 + } + }, + "reasoning": { + "supportedEfforts": [], + "interleaved": true + }, + "family": "minimax", + "ownedBy": "minimax", + "openWeights": false + }, + { + "id": "gemini-2-5-flash-lite-preview-09-2025", + "name": "gemini-2.5-flash-lite-preview-09-2025", + "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance across common benchmarks compared to earlier Flash models. By default, \"thinking\" (i.e. multi-pass reasoning) is disabled to prioritize speed, but developers can enable it via the [Reasoning API parameter](https://openrouter.ai/docs/use-cases/reasoning-tokens) to selectively trade off cost for intelligence. ", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search", "reasoning"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.01 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 512, + "max": 24576 + } + }, + "family": "gemini-flash-lite", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "grok-4-1-fast-non-reasoning", + "name": "grok-4-1-fast-non-reasoning", + "description": "Grok 4.1 is a new conversational model with significant improvements in real-world usability, delivering exceptional performance in creative, emotional, and collaborative interactions. It is more perceptive to nuanced user intent, more engaging to converse with, and more coherent in personality, while fully preserving its core intelligence and reliability. Built on large-scale reinforcement learning infrastructure, the model is optimized for style, personality, helpfulness, and alignment, and leverages frontier agentic reasoning models as reward evaluators to autonomously assess and iterate on responses at scale, significantly enhancing overall interaction quality.", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 2000000, + "maxOutputTokens": 30000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "doubao-seed-1-8-251215", + "name": "doubao-seed-1-8-251215", + "capabilities": ["function-call", "file-input", "image-recognition", "reasoning"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 224000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.114 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.286 + } + }, + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "bytedance", + "openWeights": false + }, + { + "id": "gpt-5-pro", + "name": "gpt-5-pro", + "description": "GPT-5 Pro is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search", "reasoning"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 400000, + "maxOutputTokens": 272000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 120 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 3.5 + } + }, + "reasoning": { + "supportedEfforts": ["high"] + }, + "family": "gpt-pro", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-5-1-chat-latest", + "name": "gpt-5.1-chat-latest", + "description": "GPT-5.1 Chat refers to the GPT-5.1 snapshot currently used in ChatGPT and is optimized for conversational use cases. While GPT-5.1 is recommended for most API applications, GPT-5.1 Chat is ideal for testing the latest improvements in chat-based interactions.", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search", "reasoning"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.12500000000000003 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"] + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "glm-4-6v", + "name": "GLM-4.6V", + "description": "GLM-4.6V is a large multimodal model designed for high-fidelity visual understanding and long-context reasoning across images, documents, and mixed media. It supports up to 128K tokens, processes complex page layouts and charts directly as visual inputs, and integrates native multimodal function calling to connect perception with downstream tool execution. The model also enables interleaved image-text generation and UI reconstruction workflows, including screenshot-to-HTML synthesis and iterative visual editing.", + "capabilities": ["function-call", "file-input", "image-recognition", "reasoning"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.145 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.43 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "family": "glm", + "ownedBy": "zhipu", + "openWeights": false + }, + { + "id": "claude-haiku-4-5-20251001", + "name": "claude-haiku-4-5-20251001", + "capabilities": ["function-call", "file-input", "image-recognition", "reasoning", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-haiku", + "ownedBy": "anthropic", + "openWeights": false, + "alias": ["claude-haiku-4-5"] + }, + { + "id": "gpt-5-2-codex", + "name": "GPT-5.2 Codex", + "description": "GPT-5.2-Codex is an upgraded version of GPT-5.1-Codex optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5.1-Codex, 5.2-Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs. Reasoning effort can be adjusted with the `reasoning.effort` parameter. Read the [docs here](https://openrouter.ai/docs/use-cases/reasoning-tokens#reasoning-effort-level)\n\nCodex integrates into developer environments including the CLI, IDE extensions, GitHub, and cloud tasks. It adapts reasoning effort dynamically—providing fast responses for small tasks while sustaining extended multi-hour runs for large projects. The model is trained to perform structured code reviews, catching critical flaws by reasoning over dependencies and validating behavior against tests. It also supports multimodal inputs such as images or screenshots for UI development and integrates tool use for search, dependency installation, and environment setup. Codex is intended specifically for agentic coding applications.", + "capabilities": [ + "reasoning", + "function-call", + "structured-output", + "file-input", + "image-recognition", + "web-search" + ], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 400000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.14 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.14 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.175 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high", "max"] + }, + "family": "gpt-codex", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "glm-5", + "name": "GLM-5", + "description": "GLM-5 is Z.ai’s flagship open-source foundation model engineered for complex systems design and long-horizon agent workflows. Built for expert developers, it delivers production-grade performance on large-scale programming tasks, rivaling leading closed-source models. With advanced agentic planning, deep backend reasoning, and iterative self-correction, GLM-5 moves beyond code generation to full-system construction and autonomous execution.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 204800, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "reasoning": { + "supportedEfforts": ["none", "auto"] + }, + "family": "glm", + "ownedBy": "zhipu", + "openWeights": false + }, + { + "id": "glm-4-5-flash", + "name": "GLM-4.5-Flash", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 98304, + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "family": "glm-flash", + "ownedBy": "zhipu", + "openWeights": true + }, + { + "id": "glm-4-7-flash", + "name": "GLM-4.7-Flash", + "description": "As a 30B-class SOTA model, GLM-4.7-Flash offers a new option that balances performance and efficiency. It is further optimized for agentic coding use cases, strengthening coding capabilities, long-horizon task planning, and tool collaboration, and has achieved leading performance among open-source models of the same size on several current public benchmark leaderboards.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.07 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.01 + } + }, + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "family": "glm-flash", + "ownedBy": "zhipu", + "openWeights": true + }, + { + "id": "glm-4-5-air", + "name": "GLM-4.5-Air", + "description": "GLM-4.5-Air is the lightweight variant of our latest flagship model family, also purpose-built for agent-centric applications. Like GLM-4.5, it adopts the Mixture-of-Experts (MoE) architecture but with a more compact parameter size. GLM-4.5-Air also supports hybrid inference modes, offering a \"thinking mode\" for advanced reasoning and tool use, and a \"non-thinking mode\" for real-time interaction. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 98304, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.1 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.02 + } + }, + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "family": "glm-air", + "ownedBy": "zhipu", + "openWeights": true + }, + { + "id": "glm-4-7-flashx", + "name": "GLM-4.7-FlashX", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.07 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.01 + } + }, + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "family": "glm-flash", + "ownedBy": "zhipu", + "openWeights": true + }, + { + "id": "qwen3-next:80b", + "name": "qwen3-next:80b", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 32768, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "cogito-2-1:671b", + "name": "cogito-2.1:671b", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 163840, + "maxOutputTokens": 32000, + "family": "cogito", + "ownedBy": "cogito", + "openWeights": true + }, + { + "id": "qwen3-vl:235b", + "name": "qwen3-vl:235b", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 32768, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "ministral-3:3b", + "name": "ministral-3:3b", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 128000, + "family": "ministral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "qwen3-vl:235b-instruct", + "name": "qwen3-vl:235b-instruct", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 131072, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "devstral-small-2:24b", + "name": "devstral-small-2:24b", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 262144, + "family": "devstral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "qwen3-coder:480b", + "name": "qwen3-coder:480b", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 65536, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "gpt-oss:20b", + "name": "gpt-oss:20b", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 32768, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "family": "gpt-oss", + "ownedBy": "openai", + "openWeights": true + }, + { + "id": "gemma3:27b", + "name": "gemma3:27b", + "capabilities": ["file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 131072, + "family": "gemma", + "ownedBy": "google", + "openWeights": true + }, + { + "id": "rnj-1:8b", + "name": "rnj-1:8b", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 4096, + "family": "rnj", + "ownedBy": "essentialai", + "openWeights": true + }, + { + "id": "qwen3-5:397b", + "name": "qwen3.5:397b", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 81920, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + }, + "interleaved": true + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "deepseek-v3-1:671b", + "name": "deepseek-v3.1:671b", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 163840, + "maxOutputTokens": 163840, + "reasoning": { + "supportedEfforts": ["none"] + }, + "family": "deepseek", + "ownedBy": "deepseek", + "openWeights": true + }, + { + "id": "ministral-3:8b", + "name": "ministral-3:8b", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 128000, + "family": "ministral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "kimi-k2:1t", + "name": "kimi-k2:1t", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 262144, + "family": "kimi", + "ownedBy": "moonshot", + "openWeights": true + }, + { + "id": "devstral-2:123b", + "name": "devstral-2:123b", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 262144, + "family": "devstral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "gpt-oss:120b", + "name": "gpt-oss:120b", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 32768, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "family": "gpt-oss", + "ownedBy": "openai", + "openWeights": true + }, + { + "id": "gemma3:4b", + "name": "gemma3:4b", + "capabilities": ["file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 131072, + "family": "gemma", + "ownedBy": "google", + "openWeights": true + }, + { + "id": "nemotron-3-nano:30b", + "name": "nemotron-3-nano:30b", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 1048576, + "maxOutputTokens": 131072, + "family": "nemotron", + "ownedBy": "nvidia", + "openWeights": true + }, + { + "id": "ministral-3:14b", + "name": "ministral-3:14b", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 128000, + "family": "ministral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "mistral-large-3:675b", + "name": "mistral-large-3:675b", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 262144, + "family": "mistral-large", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "qwen3-coder-next", + "name": "qwen3-coder-next", + "description": "Qwen3-Coder-Next is an open-weight causal language model optimized for coding agents and local development workflows. It uses a sparse MoE design with 80B total parameters and only 3B activated per token, delivering performance comparable to models with 10 to 20x higher active compute, which makes it well suited for cost-sensitive, always-on agent deployment.\n\nThe model is trained with a strong agentic focus and performs reliably on long-horizon coding tasks, complex tool usage, and recovery from execution failures. With a native 256k context window, it integrates cleanly into real-world CLI and IDE environments and adapts well to common agent scaffolds used by modern coding tools. The model operates exclusively in non-thinking mode and does not emit blocks, simplifying integration for production coding agents.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.035 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "gemma3:12b", + "name": "gemma3:12b", + "capabilities": ["file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 131072, + "family": "gemma", + "ownedBy": "google", + "openWeights": true + }, + { + "id": "minimax-m2-5", + "name": "minimax-m2.5", + "description": "MiniMax-M2.5 is a SOTA large language model designed for real-world productivity. Trained in a diverse range of complex real-world digital working environments, M2.5 builds upon the coding expertise of M2.1 to extend into general office work, reaching fluency in generating and operating Word, Excel, and Powerpoint files, context switching between diverse software environments, and working across different agent and human teams. Scoring 80.2% on SWE-Bench Verified, 51.3% on Multi-SWE-Bench, and 76.3% on BrowseComp, M2.5 is also more token efficient than previous generations, having been trained to optimize its actions and output through planning.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 204800, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.03 + } + }, + "reasoning": { + "supportedEfforts": [], + "interleaved": true + }, + "family": "minimax", + "ownedBy": "minimax", + "openWeights": true + }, + { + "id": "mimo-v2-flash", + "name": "MiMo-V2-Flash", + "description": "MiMo-V2-Flash is an open-source foundation language model developed by Xiaomi. It is a Mixture-of-Experts model with 309B total parameters and 15B active parameters, adopting hybrid attention architecture. MiMo-V2-Flash supports a hybrid-thinking toggle and a 256K context window, and excels at reasoning, coding, and agent scenarios. On SWE-bench Verified and SWE-bench Multilingual, MiMo-V2-Flash ranks as the top #1 open-source model globally, delivering performance comparable to Claude Sonnet 4.5 while costing only about 3.5% as much.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config).", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.07 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.21 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "family": "mimo", + "ownedBy": "xiaomi", + "openWeights": true + }, + { + "id": "qwen3-livetranslate-flash-realtime", + "name": "Qwen3-LiveTranslate Flash Realtime", + "capabilities": [ + "image-recognition", + "audio-recognition", + "audio-generation", + "video-recognition", + "function-call", + "reasoning" + ], + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text", "audio"], + "contextWindow": 53248, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 10 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen3-asr-flash", + "name": "Qwen3-ASR Flash", + "capabilities": ["audio-recognition", "function-call", "reasoning"], + "inputModalities": ["audio"], + "outputModalities": ["text"], + "contextWindow": 53248, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.035 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.035 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen-omni-turbo", + "name": "Qwen-Omni Turbo", + "capabilities": [ + "function-call", + "image-recognition", + "audio-recognition", + "audio-generation", + "video-recognition" + ], + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text", "audio"], + "contextWindow": 32768, + "maxOutputTokens": 2048, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.07 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.27 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen-vl-max", + "name": "Qwen-VL Max", + "description": "Qwen VL Max is a visual understanding model with 7500 tokens context length. It excels in delivering optimal performance for a broader spectrum of complex tasks.\n", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.8 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.2 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen3-next-a3b-instruct", + "name": "Qwen3-Next 80B-A3B Instruct", + "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen-turbo", + "name": "Qwen Turbo", + "description": "Qwen-Turbo, based on Qwen2.5, is a 1M context model that provides fast speed and low cost, suitable for simple tasks.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.05 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.01 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 38912 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen3-vl-a22b", + "name": "Qwen3-VL 235B-A22B", + "description": "The Qwen3 series open-source models include hybrid models, thinking models, and non-thinking models, with both reasoning capabilities and general abilities reaching industry SOTA levels at the same scale.", + "capabilities": ["reasoning", "function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.8 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen3-coder-flash", + "name": "Qwen3 Coder Flash", + "description": "Qwen3 Coder Flash is Alibaba's fast and cost efficient version of their proprietary Qwen3 Coder Plus. It is a powerful coding agent model specializing in autonomous programming via tool calling and environment interaction, combining coding proficiency with versatile general-purpose abilities.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.06 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen3-vl-a3b", + "name": "Qwen3-VL 30B-A3B", + "description": "The Qwen3-VL series’ second-largest MoE model Thinking version offers fast response speed, stronger multimodal understanding and reasoning, visual agent capabilities, and ultra-long context support for long videos and long documents; it features comprehensive upgrades in image/video understanding, spatial perception, and universal recognition abilities, making it capable of handling complex real-world tasks.", + "capabilities": ["reasoning", "function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.8 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen3", + "name": "Qwen3 14B", + "description": "Qwen3-4B is a 4 billion parameter dense language model from the Qwen3 series, designed to support both general-purpose and reasoning-intensive tasks. It introduces a dual-mode architecture—thinking and non-thinking—allowing dynamic switching between high-precision logical reasoning and efficient dialogue generation. This makes it well-suited for multi-turn chat, instruction following, and complex agent workflows.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.35 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.01 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qvq-max", + "name": "QVQ Max", + "capabilities": ["reasoning", "function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4.8 + } + }, + "family": "qvq", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen3-5-a17b", + "name": "Qwen3.5 397B-A17B", + "description": "The Qwen3.5 series 397B-A17B native vision-language model is built on a hybrid architecture that integrates a linear attention mechanism with a sparse mixture-of-experts model, achieving higher inference efficiency. It delivers state-of-the-art performance comparable to leading-edge models across a wide range of tasks, including language understanding, logical reasoning, code generation, agent-based tasks, image understanding, video understanding, and graphical user interface (GUI) interactions. With its robust code-generation and agent capabilities, the model exhibits strong generalization across diverse agent.", + "capabilities": ["reasoning", "function-call", "image-recognition", "video-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.6 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.15 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + }, + "interleaved": true + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen-plus-character-ja", + "name": "Qwen Plus Character (Japanese)", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 512, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.4 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen2-5-instruct", + "name": "Qwen2.5 14B Instruct", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.35 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.4 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwq-plus", + "name": "QwQ Plus", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.8 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.4 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen3-coder-a3b-instruct", + "name": "Qwen3-Coder 30B-A3B Instruct", + "description": "Qwen3-Coder-30B-A3B-Instruct is a 30.5B parameter Mixture-of-Experts (MoE) model with 128 experts (8 active per forward pass), designed for advanced code generation, repository-scale understanding, and agentic tool use. Built on the Qwen3 architecture, it supports a native context length of 256K tokens (extendable to 1M with Yarn) and performs strongly in tasks involving function calls, browser use, and structured code completion.\n\nThis model is optimized for instruction-following without “thinking mode”, and integrates well with OpenAI-compatible tool-use formats. ", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.45 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.01 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen-vl-ocr", + "name": "Qwen-VL OCR", + "capabilities": ["image-recognition", "function-call", "file-input"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 34096, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.72 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.72 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen3-omni-flash", + "name": "Qwen3-Omni Flash", + "capabilities": [ + "reasoning", + "function-call", + "image-recognition", + "audio-recognition", + "audio-generation", + "video-recognition" + ], + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text", "audio"], + "contextWindow": 65536, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.43 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.66 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen3-omni-flash-realtime", + "name": "Qwen3-Omni Flash Realtime", + "capabilities": [ + "function-call", + "image-recognition", + "audio-recognition", + "audio-generation", + "video-recognition", + "reasoning" + ], + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text", "audio"], + "contextWindow": 65536, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.52 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.99 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen2-5-vl-instruct", + "name": "Qwen2.5-VL 72B Instruct", + "description": "Qwen2.5-VL-32B is a multimodal vision-language model fine-tuned through reinforcement learning for enhanced mathematical reasoning, structured outputs, and visual problem-solving capabilities. It excels at visual analysis tasks, including object recognition, textual interpretation within images, and precise event localization in extended videos. Qwen2.5-VL-32B demonstrates state-of-the-art performance across multimodal benchmarks such as MMMU, MathVista, and VideoMME, while maintaining strong reasoning and clarity in text-based tasks like MMLU, mathematical problem-solving, and code generation.", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.8 + }, + "output": { + "currency": "USD", + "perMillionTokens": 8.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.025 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen3-vl-plus", + "name": "Qwen3-VL Plus", + "description": "The Qwen3 series visual understanding model achieves an effective fusion of thinking and non-thinking modes. Its visual agent capabilities reach world-class levels on public test sets such as OS World. This version features comprehensive upgrades in visual coding, spatial perception, and multimodal reasoning; visual perception and recognition abilities are greatly enhanced, supporting ultra-long video understanding.", + "capabilities": ["reasoning", "function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.6 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.0274 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen2-5-omni", + "name": "Qwen2.5-Omni 7B", + "capabilities": [ + "function-call", + "image-recognition", + "audio-recognition", + "audio-generation", + "video-recognition" + ], + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text", "audio"], + "contextWindow": 32768, + "maxOutputTokens": 2048, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.4 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen-max", + "name": "Qwen Max", + "description": "Qwen-Max, based on Qwen2.5, provides the best inference performance among [Qwen models](/qwen), especially for complex multi-step tasks. It's a large-scale MoE model that has been pretrained on over 20 trillion tokens and further post-trained with curated Supervised Fine-Tuning (SFT) and Reinforcement Learning from Human Feedback (RLHF) methodologies. The parameter count is unknown.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 6.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.32 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen-omni-turbo-realtime", + "name": "Qwen-Omni Turbo Realtime", + "capabilities": ["function-call", "image-recognition", "audio-recognition", "audio-generation"], + "inputModalities": ["text", "image", "audio"], + "outputModalities": ["text", "audio"], + "contextWindow": 32768, + "maxOutputTokens": 2048, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.27 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.07 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen-mt-turbo", + "name": "Qwen-MT Turbo", + "description": "Based on the comprehensive upgrade of Qwen3, this flagship translation large model supports bidirectional translation across 92 languages. It offers fully enhanced model performance and translation quality, along with more stable terminology customization, format fidelity, and domain-prompting capabilities, making translations more accurate and natural.", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 16384, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.16 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.49 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen-mt-plus", + "name": "Qwen-MT Plus", + "description": "Based on the comprehensive upgrade of Qwen3, this flagship translation large model supports bidirectional translation across 92 languages. It offers fully enhanced model performance and translation quality, along with more stable terminology customization, format fidelity, and domain-prompting capabilities, making translations more accurate and natural.", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 16384, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.46 + }, + "output": { + "currency": "USD", + "perMillionTokens": 7.37 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen3-max", + "name": "Qwen3 Max", + "description": "Qwen3-Max is an updated release built on the Qwen3 series, offering major improvements in reasoning, instruction following, multilingual support, and long-tail knowledge coverage compared to the January 2025 version. It delivers higher accuracy in math, coding, logic, and science tasks, follows complex instructions in Chinese and English more reliably, reduces hallucinations, and produces higher-quality responses for open-ended Q&A, writing, and conversation. The model supports over 100 languages with stronger translation and commonsense reasoning, and is optimized for retrieval-augmented generation (RAG) and tool calling, though it does not include a dedicated “thinking” mode.", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 6 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.24 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen3-5-plus", + "name": "Qwen3.5 Plus", + "description": "The Qwen 3.5 native vision-language Plus model is built on a hybrid architecture that integrates linear attention mechanisms with sparse mixture-of-experts models, achieving higher inference efficiency. In multiple task evaluations, the 3.5 series has demonstrated outstanding performance comparable to current leading frontier models, with leapfrog improvements over the 3 series in both pure-text and multimodal capabilities. This model version is functionally equivalent to the snapshot model qwen3.5-plus-2026-02-15.", + "capabilities": ["reasoning", "function-call", "image-recognition", "video-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.04 + }, + "cacheWrite": { + "currency": "USD", + "perMillionTokens": 0.17125 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen3-coder-plus", + "name": "Qwen3 Coder Plus", + "description": "Qwen3 Coder Plus is Alibaba's proprietary version of the Open Source Qwen3 Coder 480B A35B. It is a powerful coding agent model specializing in autonomous programming via tool calling and environment interaction, combining coding proficiency with versatile general-purpose abilities.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.1 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen3-next-a3b", + "name": "Qwen3-Next 80B-A3B (Thinking)", + "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that excels by outputting structured 'thinking' traces (Chain-of-Thought) by default.\n\nDesigned for hard, multi-step problems, it is ideal for tasks like math proofs, code synthesis, logic puzzles, and agentic planning. Compared to other Qwen3 variants, it offers greater stability during long reasoning chains and is tuned to follow complex instructions without getting repetitive or off-task.\n\nThis model is perfectly suited for agent frameworks, tool use (function calling), and benchmarks where a step-by-step breakdown is required. It leverages throughput-oriented techniques for fast generation of detailed, procedural outputs.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 6 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.015 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + }, + "interleaved": true + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen-vl-plus", + "name": "Qwen-VL Plus", + "description": "Qwen's Enhanced Large Visual Language Model. Significantly upgraded for detailed recognition capabilities and text recognition abilities, supporting ultra-high pixel resolutions up to millions of pixels and extreme aspect ratios for image input. It delivers significant performance across a broad range of visual tasks.\n", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.21 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.63 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.042 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "grok-3-fast", + "name": "Grok 3 Fast", + "capabilities": ["function-call", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.25 + } + }, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "grok-4", + "name": "Grok 4", + "description": "Grok 4 is xAI's latest reasoning model with a 256k context window. It supports parallel tool calling, structured outputs, and both image and text inputs. Note that reasoning is not exposed, reasoning cannot be disabled, and the reasoning effort cannot be specified. Pricing increases once the total tokens in a given request is greater than 128k tokens. See more details on the [xAI docs](https://docs.x.ai/docs/models/grok-4-0709)", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.75 + } + }, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "grok-2-vision", + "name": "Grok 2 Vision", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "grok-code-fast-1", + "name": "Grok Code Fast 1", + "description": "Grok Code Fast 1 is a speedy and economical reasoning model that excels at agentic coding. With reasoning traces visible in the response, developers can steer Grok Code for high-quality work flows.", + "capabilities": ["reasoning", "function-call", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 10000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.02 + } + }, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "grok-2", + "name": "Grok 2", + "capabilities": ["function-call", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "grok-3-mini-fast-latest", + "name": "Grok 3 Mini Fast Latest", + "capabilities": ["reasoning", "function-call", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.15 + } + }, + "reasoning": { + "supportedEfforts": ["low", "high"] + }, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "grok-2-vision-1212", + "name": "Grok 2 Vision (1212)", + "description": "grok-2-vision-1212 is the latest vision model in the Grok family, delivering outstanding performance on vision-based tasks and achieving state-of-the-art results in visual mathematical reasoning and document-based question answering. It supports a wide range of visual inputs, including documents, charts, screenshots, and real-world images, making it well-suited for advanced visual understanding and reasoning use cases.\n\nThe price of calling this model in AIhubMix is ​​10% lower than on the official website.", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "grok-3", + "name": "Grok 3", + "description": "Grok 3 is the latest model from xAI. It's their flagship model that excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in finance, healthcare, law, and science.\n\n", + "capabilities": ["function-call", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.75 + } + }, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "grok-4-fast", + "name": "Grok 4 Fast", + "description": "Grok 4 Fast is xAI's latest multimodal model with SOTA cost-efficiency and a 2M token context window. It comes in two flavors: non-reasoning and reasoning. Read more about the model on xAI's [news post](http://x.ai/news/grok-4-fast).\n\nReasoning can be enabled/disabled using the `reasoning` `enabled` parameter in the API. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#controlling-reasoning-tokens)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 2000000, + "maxOutputTokens": 30000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "reasoning": { + "supportedEfforts": [] + }, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "grok-2-latest", + "name": "Grok 2 Latest", + "capabilities": ["function-call", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "grok-4-1-fast", + "name": "Grok 4.1 Fast", + "description": "Grok 4.1 Fast is xAI's best agentic tool calling model that shines in real-world use cases like customer support and deep research. 2M context window.\n\nReasoning can be enabled/disabled using the `reasoning` `enabled` parameter in the API. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#controlling-reasoning-tokens)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 2000000, + "maxOutputTokens": 30000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "reasoning": { + "supportedEfforts": [] + }, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "grok-2-1212", + "name": "Grok 2 (1212)", + "capabilities": ["function-call", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "grok-3-fast-latest", + "name": "Grok 3 Fast Latest", + "capabilities": ["function-call", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.25 + } + }, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "grok-3-latest", + "name": "Grok 3 Latest", + "capabilities": ["function-call", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.75 + } + }, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "grok-2-vision-latest", + "name": "Grok 2 Vision Latest", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "grok-vision-beta", + "name": "Grok Vision Beta", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 5 + } + }, + "family": "grok-vision", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "grok-3-mini", + "name": "Grok 3 Mini", + "description": "A lightweight model that thinks before responding. Fast, smart, and great for logic-based tasks that do not require deep domain knowledge. The raw thinking traces are accessible.", + "capabilities": ["reasoning", "function-call", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.075 + } + }, + "reasoning": { + "supportedEfforts": ["low", "high"] + }, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "grok-beta", + "name": "Grok Beta", + "capabilities": ["function-call", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 5 + } + }, + "family": "grok-beta", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "grok-3-mini-latest", + "name": "Grok 3 Mini Latest", + "capabilities": ["reasoning", "function-call", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.075 + } + }, + "reasoning": { + "supportedEfforts": ["low", "high"] + }, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "grok-3-mini-fast", + "name": "Grok 3 Mini Fast", + "capabilities": ["reasoning", "function-call", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.15 + } + }, + "reasoning": { + "supportedEfforts": ["low", "high"] + }, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "deepseek-r1-distill-qwen", + "name": "DeepSeek R1 Distill Qwen 32B", + "description": "DeepSeek R1 Distill Qwen 32B is a distilled large language model based on [Qwen 2.5 32B](https://huggingface.co/Qwen/Qwen2.5-32B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\\n\\nOther benchmark results include:\\n\\n- AIME 2024 pass@1: 72.6\\n- MATH-500 pass@1: 94.3\\n- CodeForces Rating: 1691\\n\\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 121808, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "family": "qwen", + "ownedBy": "deepseek", + "openWeights": true + }, + { + "id": "qwen2-5-coder-instruct", + "name": "Qwen2.5 Coder 32B Instruct", + "description": "Qwen2.5-Coder-7B-Instruct is a 7B parameter instruction-tuned language model optimized for code-related tasks such as code generation, reasoning, and bug fixing. Based on the Qwen2.5 architecture, it incorporates enhancements like RoPE, SwiGLU, RMSNorm, and GQA attention with support for up to 128K tokens using YaRN-based extrapolation. It is trained on a large corpus of source code, synthetic data, and text-code grounding, providing robust performance across programming languages and agentic coding workflows.\n\nThis model is part of the Qwen2.5-Coder family and offers strong compatibility with tools like vLLM for efficient deployment. Released under the Apache 2.0 license.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 12952, + "maxOutputTokens": 2048, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "kimi-k2-instruct", + "name": "Kimi K2 Instruct", + "description": "For Claude code only", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 58904, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "family": "kimi", + "ownedBy": "moonshot", + "openWeights": true + }, + { + "id": "deepseek-r1-distill-llama", + "name": "DeepSeek R1 Distill Llama 70B", + "description": "DeepSeek R1 Distill Llama 70B is a distilled large language model based on [Llama-3.3-70B-Instruct](/meta-llama/llama-3.3-70b-instruct), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). The model combines advanced distillation techniques to achieve high performance across multiple benchmarks, including:\n\n- AIME 2024 pass@1: 70.0\n- MATH-500 pass@1: 94.5\n- CodeForces Rating: 1633\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 121808, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.015 + } + }, + "family": "deepseek-thinking", + "ownedBy": "deepseek", + "openWeights": true + }, + { + "id": "kimi-k2-instruct-0905", + "name": "Kimi K2 0905", + "description": "For Claude code only", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 262144, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.195 + } + }, + "family": "kimi", + "ownedBy": "moonshot", + "openWeights": true + }, + { + "id": "nemotron-nano-v2", + "name": "nvidia-nemotron-nano-9b-v2", + "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. \n\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.04 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.16 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.007 + } + }, + "family": "nemotron", + "ownedBy": "nvidia", + "openWeights": true + }, + { + "id": "cosmos-nemotron", + "name": "Cosmos Nemotron 34B", + "capabilities": ["reasoning", "image-recognition", "video-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "family": "nemotron", + "ownedBy": "nvidia", + "openWeights": false + }, + { + "id": "llama-embed-nemotron", + "name": "Llama Embed Nemotron 8B", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 2048, + "family": "llama", + "ownedBy": "meta", + "openWeights": false + }, + { + "id": "nemotron-3-nano-a3b", + "name": "nemotron-3-nano-30b-a3b", + "description": "NVIDIA Nemotron 3 Nano 30B A3B is a small language MoE model with highest compute efficiency and accuracy for developers to build specialized agentic AI systems.\n\nThe model is fully open with open-weights, datasets and recipes so developers can easily\ncustomize, optimize, and deploy the model on their infrastructure for maximum privacy and\nsecurity.\n\nNote: For the free endpoint, all prompts and output are logged to improve the provider's model and its product and services. Please do not upload any personal, confidential, or otherwise sensitive information. This is a trial use only. Do not use for production or business-critical systems.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.06 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.24 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.006 + } + }, + "family": "nemotron", + "ownedBy": "nvidia", + "openWeights": true + }, + { + "id": "parakeet-tdt-v2", + "name": "Parakeet TDT 0.6B v2", + "capabilities": ["audio-recognition"], + "inputModalities": ["audio"], + "outputModalities": ["text"], + "maxOutputTokens": 4096, + "family": "parakeet", + "ownedBy": "nvidia", + "openWeights": false + }, + { + "id": "nemoretriever-ocr-v1", + "name": "NeMo Retriever OCR v1", + "capabilities": ["image-recognition", "file-input"], + "inputModalities": ["image"], + "outputModalities": ["text"], + "maxOutputTokens": 4096, + "family": "nemoretriever", + "ownedBy": "nvidia", + "openWeights": false + }, + { + "id": "llama-3-3-nemotron-super-v1", + "name": "Llama 3.3 Nemotron Super 49b V1", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "ownedBy": "meta", + "openWeights": false + }, + { + "id": "llama-3-1-nemotron-instruct", + "name": "Llama 3.1 Nemotron 51b Instruct", + "description": "NVIDIA's Llama 3.1 Nemotron 70B is a language model designed for generating precise and useful responses. Leveraging [Llama 3.1 70B](/models/meta-llama/llama-3.1-70b-instruct) architecture and Reinforcement Learning from Human Feedback (RLHF), it excels in automatic alignment benchmarks. This model is tailored for applications requiring high accuracy in helpfulness and response generation, suitable for diverse user queries across multiple domains.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + }, + "ownedBy": "meta", + "openWeights": false + }, + { + "id": "llama3-chatqa-1-5", + "name": "Llama3 Chatqa 1.5 70b", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "ownedBy": "meta", + "openWeights": false + }, + { + "id": "llama-3-1-nemotron-ultra-v1", + "name": "Llama-3.1-Nemotron-Ultra-253B-v1", + "description": "Llama-3.1-Nemotron-Ultra-253B-v1 is a large language model (LLM) optimized for advanced reasoning, human-interactive chat, retrieval-augmented generation (RAG), and tool-calling tasks. Derived from Meta’s Llama-3.1-405B-Instruct, it has been significantly customized using Neural Architecture Search (NAS), resulting in enhanced efficiency, reduced memory usage, and improved inference latency. The model supports a context length of up to 128K tokens and can operate efficiently on an 8x NVIDIA H100 node.\n\nNote: you must include `detailed thinking on` in the system prompt to enable reasoning. Please see [Usage Recommendations](https://huggingface.co/nvidia/Llama-3_1-Nemotron-Ultra-253B-v1#quick-start-and-usage-recommendations) for more.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.8 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": false + }, + { + "id": "nemotron-4-instruct", + "name": "Nemotron 4 340b Instruct", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "ownedBy": "nvidia", + "openWeights": false + }, + { + "id": "llama-3-3-nemotron-super-v1-5", + "name": "Llama 3.3 Nemotron Super 49b V1.5", + "description": "Llama-3.3-Nemotron-Super-49B-v1.5 is a 49B-parameter, English-centric reasoning/chat model derived from Meta’s Llama-3.3-70B-Instruct with a 128K context. It’s post-trained for agentic workflows (RAG, tool calling) via SFT across math, code, science, and multi-turn chat, followed by multiple RL stages; Reward-aware Preference Optimization (RPO) for alignment, RL with Verifiable Rewards (RLVR) for step-wise reasoning, and iterative DPO to refine tool-use behavior. A distillation-driven Neural Architecture Search (“Puzzle”) replaces some attention blocks and varies FFN widths to shrink memory footprint and improve throughput, enabling single-GPU (H100/H200) deployment while preserving instruction following and CoT quality.\n\nIn internal evaluations (NeMo-Skills, up to 16 runs, temp = 0.6, top_p = 0.95), the model reports strong reasoning/coding results, e.g., MATH500 pass@1 = 97.4, AIME-2024 = 87.5, AIME-2025 = 82.71, GPQA = 71.97, LiveCodeBench (24.10–25.02) = 73.58, and MMLU-Pro (CoT) = 79.53. The model targets practical inference efficiency (high tokens/s, reduced VRAM) with Transformers/vLLM support and explicit “reasoning on/off” modes (chat-first defaults, greedy recommended when disabled). Suitable for building agents, assistants, and long-context retrieval systems where balanced accuracy-to-cost and reliable tool use matter.\n", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.4 + } + }, + "ownedBy": "meta", + "openWeights": false + }, + { + "id": "gemma-3n-e2b-it", + "name": "Gemma 3n E2b It", + "description": "Gemma 3n E2B IT is a multimodal, instruction-tuned model developed by Google DeepMind, designed to operate efficiently at an effective parameter size of 2B while leveraging a 6B architecture. Based on the MatFormer architecture, it supports nested submodels and modular composition via the Mix-and-Match framework. Gemma 3n models are optimized for low-resource deployment, offering 32K context length and strong multilingual and reasoning performance across common benchmarks. This variant is trained on a diverse corpus including code, math, web, and multimodal data.", + "capabilities": ["function-call", "structured-output", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "family": "gemma", + "ownedBy": "google", + "openWeights": true + }, + { + "id": "codegemma-1-1", + "name": "Codegemma 1.1 7b", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "ownedBy": "nvidia", + "openWeights": true + }, + { + "id": "gemma-3n-e4b-it", + "name": "Gemma 3n E4b It", + "description": "Gemma 3n E4B-it is optimized for efficient execution on mobile and low-resource devices, such as phones, laptops, and tablets. It supports multimodal inputs—including text, visual data, and audio—enabling diverse tasks such as text generation, speech recognition, translation, and image analysis. Leveraging innovations like Per-Layer Embedding (PLE) caching and the MatFormer architecture, Gemma 3n dynamically manages memory usage and computational load by selectively activating model parameters, significantly reducing runtime resource requirements.\n\nThis model supports a wide linguistic range (trained in over 140 languages) and features a flexible 32K token context window. Gemma 3n can selectively load parameters, optimizing memory and computational efficiency based on the task or device capabilities, making it well-suited for privacy-focused, offline-capable applications and on-device AI solutions. [Read more in the blog post](https://developers.googleblog.com/en/introducing-gemma-3n/)", + "capabilities": ["function-call", "structured-output", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.02 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.04 + } + }, + "family": "gemma", + "ownedBy": "google", + "openWeights": true + }, + { + "id": "gemma-2-it", + "name": "Gemma 2 2b It", + "description": "Gemma 2 27B by Google is an open model built from the same research and technology used to create the [Gemini models](/models?q=gemini).\n\nGemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning.\n\nSee the [launch announcement](https://blog.google/technology/developers/google-gemma-2/) for more details. Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.02 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.06 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.002 + } + }, + "family": "gemma", + "ownedBy": "google", + "openWeights": true + }, + { + "id": "gemma-3-it", + "name": "Gemma 3 12b It", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling.", + "capabilities": ["function-call", "structured-output", "image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.01 + } + }, + "family": "gemma", + "ownedBy": "google", + "openWeights": true + }, + { + "id": "codegemma", + "name": "Codegemma 7b", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "ownedBy": "nvidia", + "openWeights": true + }, + { + "id": "phi-3-medium-128k-instruct", + "name": "Phi 3 Medium 128k Instruct", + "capabilities": ["function-call", "structured-output", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.17 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.68 + } + }, + "family": "phi", + "ownedBy": "microsoft", + "openWeights": true + }, + { + "id": "phi-3-small-128k-instruct", + "name": "Phi 3 Small 128k Instruct", + "capabilities": ["function-call", "structured-output", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "family": "phi", + "ownedBy": "microsoft", + "openWeights": true + }, + { + "id": "phi-3-5-vision-instruct", + "name": "Phi 3.5 Vision Instruct", + "capabilities": ["function-call", "structured-output", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.6 + } + }, + "family": "phi", + "ownedBy": "microsoft", + "openWeights": true + }, + { + "id": "phi-3-small-8k-instruct", + "name": "Phi 3 Small 8k Instruct", + "capabilities": ["function-call", "structured-output", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 8000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "family": "phi", + "ownedBy": "microsoft", + "openWeights": true + }, + { + "id": "phi-3-5-moe-instruct", + "name": "Phi 3.5 Moe Instruct", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.16 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.64 + } + }, + "family": "phi", + "ownedBy": "microsoft", + "openWeights": true + }, + { + "id": "phi-4-mini-instruct", + "name": "Phi-4-Mini", + "description": "Microsoft's latest model", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "audio-recognition"], + "inputModalities": ["text", "image", "audio"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.08 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.35 + } + }, + "family": "phi", + "ownedBy": "microsoft", + "openWeights": false + }, + { + "id": "phi-3-medium-4k-instruct", + "name": "Phi 3 Medium 4k Instruct", + "capabilities": ["function-call", "structured-output", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 4000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.17 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.68 + } + }, + "family": "phi", + "ownedBy": "microsoft", + "openWeights": true + }, + { + "id": "phi-3-vision-128k-instruct", + "name": "Phi 3 Vision 128k Instruct", + "capabilities": ["function-call", "structured-output", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "ownedBy": "microsoft", + "openWeights": true + }, + { + "id": "glm4-7", + "name": "GLM-4.7", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 204800, + "maxOutputTokens": 131072, + "family": "glm", + "ownedBy": "zhipu", + "openWeights": true + }, + { + "id": "glm5", + "name": "GLM5", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 202752, + "maxOutputTokens": 131000, + "family": "glm", + "ownedBy": "zhipu", + "openWeights": true + }, + { + "id": "qwq", + "name": "Qwq 32b", + "description": "QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.4 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "devstral-2-instruct-2512", + "name": "Devstral-2-123B-Instruct-2512", + "capabilities": ["reasoning", "function-call", "structured-output", "file-input"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 262144, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.05 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.22 + } + }, + "family": "devstral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "mistral-large-3-instruct-2512", + "name": "Mistral Large 3 675B Instruct 2512", + "capabilities": ["function-call", "structured-output", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 262144, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "mistral-large", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "ministral-instruct-2512", + "name": "Ministral 3 14B Instruct 2512", + "capabilities": ["function-call", "structured-output", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 262144, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "ministral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "mamba-codestral-v0-1", + "name": "Mamba Codestral 7b V0.1", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "ownedBy": "nvidia", + "openWeights": true + }, + { + "id": "mistral-large-2-instruct", + "name": "Mistral Large 2 Instruct", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "codestral-instruct-v0-1", + "name": "Codestral 22b Instruct V0.1", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "mistral-small-3-1-instruct-2503", + "name": "Mistral Small 3.1 24b Instruct 2503", + "description": "Mistral's latest open-source small model; provided by chutes.ai.", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.03 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.11 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.015 + } + }, + "family": "chutesai", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "llama-3-2-vision-instruct", + "name": "Llama 3.2 11b Vision Instruct", + "description": "Llama 3.2 11B Vision is a multimodal model with 11 billion parameters, designed to handle tasks combining visual and textual data. It excels in tasks such as image captioning and visual question answering, bridging the gap between language generation and visual reasoning. Pre-trained on a massive dataset of image-text pairs, it performs well in complex, high-accuracy image analysis.\n\nIts ability to integrate visual understanding with language processing makes it an ideal solution for industries requiring comprehensive visual-linguistic AI applications, such as content creation, AI-driven customer service, and research.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD_VISION.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "capabilities": ["function-call", "structured-output", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.37 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.37 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.175 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "llama3-instruct", + "name": "Llama3 70b Instruct", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "llama-3-3-instruct", + "name": "Llama 3.3 70b Instruct", + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.92 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.92 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.013 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "llama-3-2-instruct", + "name": "Llama 3.2 1b Instruct", + "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.01 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.01 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.005 + } + }, + "family": "unsloth", + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "llama-4-scout-16e-instruct", + "name": "Llama 4 Scout 17b 16e Instruct", + "capabilities": ["function-call", "structured-output", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.11 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.34 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "llama-4-maverick-128e-instruct", + "name": "Llama 4 Maverick 17b 128e Instruct", + "capabilities": ["function-call", "structured-output", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "codellama", + "name": "Codellama 70b", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "ownedBy": "nvidia", + "openWeights": true + }, + { + "id": "llama-3-1-instruct", + "name": "Llama 3.1 405b Instruct", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.02 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.05 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.002 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "deepseek-r1-0528", + "name": "Deepseek R1 0528", + "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.08 + } + }, + "family": "deepseek-thinking", + "ownedBy": "deepseek", + "openWeights": true + }, + { + "id": "deepseek-r1", + "name": "Deepseek R1", + "description": "DeepSeek R1 is here: Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model & [technical report](https://api-docs.deepseek.com/news/news250120).\n\nMIT licensed: Distill & commercialize freely!", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 7 + } + }, + "family": "deepseek-thinking", + "ownedBy": "deepseek", + "openWeights": true + }, + { + "id": "deepseek-v3-1-terminus", + "name": "DeepSeek V3.1 Terminus", + "description": "DeepSeek-V3.1 Terminus is an update to [DeepSeek V3.1](/deepseek/deepseek-chat-v3.1) that maintains the model's original capabilities while addressing issues reported by users, including language consistency and agent capabilities, further optimizing the model's performance in coding and search agents. It is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. ", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.27 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.135 + } + }, + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "family": "deepseek", + "ownedBy": "deepseek", + "openWeights": false + }, + { + "id": "deepseek-v3-1", + "name": "DeepSeek V3.1", + "description": "Thinking mode of DeepSeek-V3.1; \nDeepSeek V3.1 is a text generation model provided by DeepSeek, featuring a hybrid reasoning architecture that achieves an effective integration of thinking and non-thinking modes.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.27 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.135 + } + }, + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "family": "deepseek", + "ownedBy": "deepseek", + "openWeights": false + }, + { + "id": "deepseek-coder-instruct", + "name": "Deepseek Coder 6.7b Instruct", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "ownedBy": "deepseek", + "openWeights": true + }, + { + "id": "flux.1-dev", + "name": "FLUX.1-dev", + "capabilities": ["image-generation"], + "inputModalities": ["text"], + "outputModalities": ["image"], + "contextWindow": 4096, + "family": "flux", + "ownedBy": "bfl", + "openWeights": false + }, + { + "id": "command-a-translate-08-2025", + "name": "Command A Translate", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8000, + "maxOutputTokens": 8000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + }, + "family": "command-a", + "ownedBy": "cohere", + "openWeights": true + }, + { + "id": "command-r7b-arabic-02-2025", + "name": "Command R7B Arabic", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.0375 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.15 + } + }, + "family": "command-r", + "ownedBy": "cohere", + "openWeights": true + }, + { + "id": "command-a-03-2025", + "name": "Command A", + "description": "Command A is Cohere most performant model to date, excelling at tool use, agents, retrieval augmented generation (RAG), and multilingual use cases. Command A has a context length of 256K, only requires two GPUs to run, and has 150% higher throughput compared to Command R+ 08-2024.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 8000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + }, + "family": "command-a", + "ownedBy": "cohere", + "openWeights": true + }, + { + "id": "command-r-08-2024", + "name": "Command R", + "description": "command-r-08-2024 is an update of the [Command R](/models/cohere/command-r) with improved performance for multilingual retrieval-augmented generation (RAG) and tool use. More broadly, it is better at math, code and reasoning and is competitive with the previous version of the larger Command R+ model.\n\nRead the launch post [here](https://docs.cohere.com/changelog/command-gets-refreshed).\n\nUse of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "family": "command-r", + "ownedBy": "cohere", + "openWeights": true + }, + { + "id": "command-r-plus-08-2024", + "name": "Command R+", + "description": "command-r-plus-08-2024 is an update of the [Command R+](/models/cohere/command-r-plus) with roughly 50% higher throughput and 25% lower latencies as compared to the previous Command R+ version, while keeping the hardware footprint the same.\n\nRead the launch post [here](https://docs.cohere.com/changelog/command-gets-refreshed).\n\nUse of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + }, + "family": "command-r", + "ownedBy": "cohere", + "openWeights": true + }, + { + "id": "c4ai-aya-vision", + "name": "Aya Vision 32B", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 16000, + "maxOutputTokens": 4000, + "ownedBy": "cohere", + "openWeights": true + }, + { + "id": "command-r7b-12-2024", + "name": "Command R7B", + "description": "Command R7B (12-2024) is a small, fast update of the Command R+ model, delivered in December 2024. It excels at RAG, tool use, agents, and similar tasks requiring complex reasoning and multiple steps.\n\nUse of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.0375 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.15 + } + }, + "family": "command-r", + "ownedBy": "cohere", + "openWeights": true + }, + { + "id": "c4ai-aya-expanse", + "name": "Aya Expanse 32B", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4000, + "ownedBy": "cohere", + "openWeights": true + }, + { + "id": "command-a-reasoning-08-2025", + "name": "Command A Reasoning", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + }, + "family": "command-a", + "ownedBy": "cohere", + "openWeights": true + }, + { + "id": "command-a-vision-07-2025", + "name": "Command A Vision", + "capabilities": ["image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 8000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + }, + "family": "command-a", + "ownedBy": "cohere", + "openWeights": true + }, + { + "id": "solar-mini", + "name": "solar-mini", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.15 + } + }, + "family": "solar-mini", + "ownedBy": "upstageai", + "openWeights": false + }, + { + "id": "solar-pro3", + "name": "solar-pro3", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.25 + } + }, + "family": "solar-pro", + "ownedBy": "upstageai", + "openWeights": false + }, + { + "id": "solar-pro2", + "name": "solar-pro2", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 65536, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.25 + } + }, + "family": "solar-pro", + "ownedBy": "upstageai", + "openWeights": false + }, + { + "id": "llama-3-1-instant", + "name": "Llama 3.1 8B Instant", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.05 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.08 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "mistral-saba", + "name": "Mistral Saba 24B", + "description": "Mistral Saba is a 24B-parameter language model specifically designed for the Middle East and South Asia, delivering accurate and contextually relevant responses while maintaining efficient performance. Trained on curated regional datasets, it supports multiple Indian-origin languages—including Tamil and Malayalam—alongside Arabic. This makes it a versatile option for a range of regional and multilingual applications. Read more at the blog post [here](https://mistral.ai/en/news/mistral-saba)", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.79 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.79 + } + }, + "family": "mistral", + "ownedBy": "mistral", + "openWeights": false + }, + { + "id": "llama3-8192", + "name": "Llama 3 8B", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.05 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.08 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "qwen-qwq", + "name": "Qwen QwQ 32B", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.29 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "llama-guard-3", + "name": "Llama Guard 3 8B", + "description": "Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 3 was aligned to safeguard against the MLCommons standardized hazards taxonomy and designed to support Llama 3.1 capabilities. Specifically, it provides content moderation in 8 languages, and was optimized to support safety and security for search and code interpreter tool calls.\n", + "capabilities": ["structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.002 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "gemma2-it", + "name": "Gemma 2 9B", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "family": "gemma", + "ownedBy": "google", + "openWeights": true + }, + { + "id": "llama-3-3-versatile", + "name": "Llama 3.3 70B Versatile", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.59 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.79 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "llama-guard-4", + "name": "Llama Guard 4 12B", + "description": "Llama Guard 4 is a Llama 4 Scout-derived multimodal pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM—generating text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 4 was aligned to safeguard against the standardized MLCommons hazards taxonomy and designed to support multimodal Llama 4 capabilities. Specifically, it combines features from previous Llama Guard models, providing content moderation for English and multiple supported languages, along with enhanced capabilities to handle mixed text-and-image prompts, including multiple images. Additionally, Llama Guard 4 is integrated into the Llama Moderations API, extending robust safety classification to text and images.", + "capabilities": ["image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 1024, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "ling-1t", + "name": "Ling-1T", + "description": "Ling-1T is the first flagship non-thinking model in the “Ling 2.0” series, featuring 1 trillion total parameters and approximately 50 billion active parameters per token. Built on the Ling 2.0 architecture, Ling-1T is designed to push the limits of efficient inference and scalable cognition. Ling-1T-base was pretrained on over 20 trillion high-quality, reasoning-intensive tokens, supports up to a 128K context length, and incorporates an Evolutionary Chain of Thought (Evo-CoT) process during mid-stage and post-stage training. This training regimen greatly enhances the model’s efficiency and depth of reasoning, enabling Ling-1T to achieve top performance across multiple complex reasoning benchmarks, balancing accuracy and efficiency.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.57 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.29 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.11 + } + }, + "family": "ling", + "ownedBy": "bailing", + "openWeights": true + }, + { + "id": "ring-1t", + "name": "Ring-1T", + "description": "Ring-1T is an open-source idea model with a trillion parameters released by the Bailing team. It is based on the Ling 2.0 architecture and the Ling-1T-base foundational model for training, with a total parameter count of 1 trillion, an active parameter count of 50 billion, and supports up to a 128K context window. The model is trained via large-scale verifiable reward reinforcement learning (RLVR), combined with the self-developed Icepop reinforcement learning stabilization method and the efficient ASystem reinforcement learning system, significantly improving the model’s deep reasoning and natural language reasoning capabilities. Ring-1T achieves leading performance among open-source models on high-difficulty reasoning benchmarks such as mathematics competitions (e.g., IMO 2025), code generation (e.g., ICPC World Finals 2025), and logical reasoning.", + "capabilities": ["reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.57 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.29 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.11 + } + }, + "family": "ring", + "ownedBy": "bailing", + "openWeights": true + }, + { + "id": "gpt-5-1-codex", + "name": "GPT-5.1-Codex", + "description": "GPT-5.1-Codex is a specialized version of GPT-5.1 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5.1, Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs. Reasoning effort can be adjusted with the `reasoning.effort` parameter. Read the [docs here](https://openrouter.ai/docs/use-cases/reasoning-tokens#reasoning-effort-level)\n\nCodex integrates into developer environments including the CLI, IDE extensions, GitHub, and cloud tasks. It adapts reasoning effort dynamically—providing fast responses for small tasks while sustaining extended multi-hour runs for large projects. The model is trained to perform structured code reviews, catching critical flaws by reasoning over dependencies and validating behavior against tests. It also supports multimodal inputs such as images or screenshots for UI development and integrates tool use for search, dependency installation, and environment setup. Codex is intended specifically for agentic coding applications.", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.125 + }, + "output": { + "currency": "USD", + "perMillionTokens": 9 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.13 + } + }, + "reasoning": { + "supportedEfforts": ["none", "medium", "high"] + }, + "family": "gpt-codex", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-5-1-codex-mini", + "name": "GPT-5.1-Codex-mini", + "description": "GPT-5.1-Codex-Mini is a smaller and faster version of GPT-5.1-Codex", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.225 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.8 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.03 + } + }, + "reasoning": { + "supportedEfforts": ["none", "medium", "high"] + }, + "family": "gpt-codex", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "claude-opus-41", + "name": "Claude Opus 4.1", + "capabilities": ["reasoning", "file-input", "image-recognition", "function-call", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 80000, + "maxOutputTokens": 16000, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"] + }, + "family": "claude-opus", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "gpt-5-1-codex-max", + "name": "GPT-5.1-Codex-max", + "description": "GPT-5.1-Codex-Max is OpenAI’s latest agentic coding model, designed for long-running, high-context software development tasks. It is based on an updated version of the 5.1 reasoning stack and trained on agentic workflows spanning software engineering, mathematics, and research. \nGPT-5.1-Codex-Max delivers faster performance, improved reasoning, and higher token efficiency across the development lifecycle. ", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.125 + }, + "output": { + "currency": "USD", + "perMillionTokens": 9 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.13 + } + }, + "reasoning": { + "supportedEfforts": ["none", "medium", "high", "max"] + }, + "family": "gpt-codex", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gemini-3-1-pro-preview", + "name": "Gemini 3.1 Pro Preview", + "description": "Gemini 3.1 Pro Preview is Google’s frontier reasoning model, delivering enhanced software engineering performance, improved agentic reliability, and more efficient token usage across complex workflows. Building on the multimodal foundation of the Gemini 3 series, it combines high-precision reasoning across text, image, video, audio, and code with a 1M-token context window. Reasoning Details must be preserved when using multi-turn tool calling, see our docs here: https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning. The 3.1 update introduces measurable gains in SWE benchmarks and real-world coding environments, along with stronger autonomous task execution in structured domains such as finance and spreadsheet-based workflows.\n\nDesigned for advanced development and agentic systems, Gemini 3.1 Pro Preview improves long-horizon stability and tool orchestration while increasing token efficiency. It introduces a new medium thinking level to better balance cost, speed, and performance. The model excels in agentic coding, structured planning, multimodal analysis, and workflow automation, making it well-suited for autonomous agents, financial modeling, spreadsheet automation, and high-context enterprise tasks.", + "capabilities": ["reasoning", "function-call", "structured-output", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"], + "thinkingTokenLimits": { + "min": 128, + "max": 32768 + } + }, + "family": "gemini-pro", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "claude-sonnet-4", + "name": "Claude Sonnet 4", + "description": "Claude Sonnet 4 significantly enhances the capabilities of its predecessor, Sonnet 3.7, excelling in both coding and reasoning tasks with improved precision and controllability. Achieving state-of-the-art performance on SWE-bench (72.7%), Sonnet 4 balances capability and computational efficiency, making it suitable for a broad range of applications from routine coding tasks to complex software development projects. Key enhancements include improved autonomous codebase navigation, reduced error rates in agent-driven workflows, and increased reliability in following intricate instructions. Sonnet 4 is optimized for practical everyday use, providing advanced reasoning capabilities while maintaining efficiency and responsiveness in diverse internal and external scenarios.\n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-4)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-sonnet", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "gemini-2-5-flash-preview-05-20", + "name": "gemini-2.5-flash-preview-05-20", + "description": "Gemini-2.5-flash-preview-05-20 is enabled by default for thinking; to disable it, request the name gemini-2.5-flash-preview-05-20-nothink.Only OpenAI-compatible format calls are supported; Gemini SDK is not supported. For the native Gemini SDK, please set the parameter budget=0 directly.", + "capabilities": [ + "function-call", + "structured-output", + "file-input", + "image-recognition", + "audio-recognition", + "video-recognition", + "web-search", + "reasoning" + ], + "inputModalities": ["text", "image", "video", "audio"], + "outputModalities": ["text"], + "contextWindow": 1048576, + "maxOutputTokens": 200000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.135 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.0375 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 24576 + } + }, + "family": "gemini-flash", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "gemini-2-5-flash-lite", + "name": "gemini-2.5-flash-lite", + "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance across common benchmarks compared to earlier Flash models. By default, \"thinking\" (i.e. multi-pass reasoning) is disabled to prioritize speed, but developers can enable it via the [Reasoning API parameter](https://openrouter.ai/docs/use-cases/reasoning-tokens) to selectively trade off cost for intelligence. ", + "capabilities": [ + "function-call", + "structured-output", + "file-input", + "image-recognition", + "audio-recognition", + "video-recognition", + "web-search", + "reasoning" + ], + "inputModalities": ["text", "image", "video", "audio"], + "outputModalities": ["text"], + "contextWindow": 1048576, + "maxOutputTokens": 65535, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.36 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.01 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 512, + "max": 24576 + } + }, + "family": "gemini-flash-lite", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "gpt-5-2-pro", + "name": "gpt-5.2-pro", + "description": "GPT-5.2 Pro is OpenAI’s most advanced model, offering major improvements in agentic coding and long context performance over GPT-5 Pro. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.", + "capabilities": [ + "reasoning", + "function-call", + "structured-output", + "file-input", + "image-recognition", + "web-search" + ], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 400000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 18.9 + }, + "output": { + "currency": "USD", + "perMillionTokens": 151.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 2.1 + } + }, + "reasoning": { + "supportedEfforts": ["medium", "high", "max"] + }, + "family": "gpt-pro", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gemini-2-5-pro-preview-06-05", + "name": "gemini-2.5-pro-preview-06-05", + "description": "Integrated with Google's official search function.", + "capabilities": [ + "function-call", + "structured-output", + "file-input", + "image-recognition", + "audio-recognition", + "video-recognition", + "web-search", + "reasoning" + ], + "inputModalities": ["text", "image", "video", "audio"], + "outputModalities": ["text"], + "contextWindow": 1048576, + "maxOutputTokens": 200000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.125 + }, + "output": { + "currency": "USD", + "perMillionTokens": 9 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.31 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"], + "thinkingTokenLimits": { + "min": 128, + "max": 32768 + } + }, + "family": "gemini-pro", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "gemini-2-5-flash-lite-preview-06-17", + "name": "gemini-2.5-flash-lite-preview-06-17", + "capabilities": [ + "function-call", + "structured-output", + "file-input", + "image-recognition", + "audio-recognition", + "video-recognition", + "web-search", + "reasoning" + ], + "inputModalities": ["text", "video", "image", "audio"], + "outputModalities": ["text"], + "contextWindow": 1048576, + "maxOutputTokens": 65535, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.36 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.025 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 512, + "max": 24576 + } + }, + "family": "gemini-flash-lite", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "grok-4-0709", + "name": "grok-4-0709", + "capabilities": [ + "function-call", + "structured-output", + "file-input", + "image-recognition", + "reasoning", + "web-search" + ], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.7 + }, + "output": { + "currency": "USD", + "perMillionTokens": 13.5 + } + }, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "o3-mini", + "name": "o3-mini", + "description": "OpenAI o3-mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and coding.\n\nThis model supports the `reasoning_effort` parameter, which can be set to \"high\", \"medium\", or \"low\" to control the thinking time of the model. The default is \"medium\". OpenRouter also offers the model slug `openai/o3-mini-high` to default the parameter to \"high\".\n\nThe model features three adjustable reasoning effort levels and supports key developer capabilities including function calling, structured outputs, and streaming, though it does not include vision processing capabilities.\n\nThe model demonstrates significant improvements over its predecessor, with expert testers preferring its responses 56% of the time and noting a 39% reduction in major errors on complex questions. With medium reasoning effort settings, o3-mini matches the performance of the larger o1 model on challenging reasoning evaluations like AIME and GPQA, while maintaining lower latency and cost.", + "capabilities": [ + "function-call", + "structured-output", + "file-input", + "image-recognition", + "reasoning", + "web-search" + ], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.55 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "family": "o", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-5-codex", + "name": "gpt-5-codex", + "description": "GPT-5-Codex is a specialized version of GPT-5 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5, Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs. Reasoning effort can be adjusted with the `reasoning.effort` parameter. Read the [docs here](https://openrouter.ai/docs/use-cases/reasoning-tokens#reasoning-effort-level)\n\nCodex integrates into developer environments including the CLI, IDE extensions, GitHub, and cloud tasks. It adapts reasoning effort dynamically—providing fast responses for small tasks while sustaining extended multi-hour runs for large projects. The model is trained to perform structured code reviews, catching critical flaws by reasoning over dependencies and validating behavior against tests. It also supports multimodal inputs such as images or screenshots for UI development and integrates tool use for search, dependency installation, and environment setup. Codex is intended specifically for agentic coding applications.", + "capabilities": [ + "function-call", + "structured-output", + "file-input", + "image-recognition", + "web-search", + "reasoning" + ], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 400000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.125 + }, + "output": { + "currency": "USD", + "perMillionTokens": 9 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.125 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "family": "gpt-codex", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "claude-sonnet-4-20250514", + "name": "claude-sonnet-4-20250514", + "capabilities": [ + "function-call", + "structured-output", + "file-input", + "image-recognition", + "reasoning", + "web-search", + "computer-use" + ], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.7 + }, + "output": { + "currency": "USD", + "perMillionTokens": 13.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-sonnet", + "ownedBy": "anthropic", + "openWeights": false, + "alias": ["claude-sonnet-4", "claude-sonnet-4-0"] + }, + { + "id": "o4-mini", + "name": "o4-mini", + "description": "OpenAI o4-mini is a compact reasoning model in the o-series, optimized for fast, cost-efficient performance while retaining strong multimodal and agentic capabilities. It supports tool use and demonstrates competitive reasoning and coding performance across benchmarks like AIME (99.5% with Python) and SWE-bench, outperforming its predecessor o3-mini and even approaching o3 in some domains.\n\nDespite its smaller size, o4-mini exhibits high accuracy in STEM tasks, visual problem solving (e.g., MathVista, MMMU), and code editing. It is especially well-suited for high-throughput scenarios where latency or cost is critical. Thanks to its efficient architecture and refined reinforcement learning training, o4-mini can chain tools, generate structured outputs, and solve multi-step tasks with minimal delay—often in under a minute.", + "capabilities": [ + "function-call", + "structured-output", + "file-input", + "image-recognition", + "reasoning", + "web-search" + ], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 100000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.28 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "family": "o", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "claude-opus-4-20250514", + "name": "claude-opus-4-20250514", + "capabilities": [ + "function-call", + "structured-output", + "file-input", + "image-recognition", + "reasoning", + "web-search", + "computer-use" + ], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 13.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 67.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 32000 + } + }, + "family": "claude-opus", + "ownedBy": "anthropic", + "openWeights": false, + "alias": ["claude-opus-4", "claude-opus-4-0"] + }, + { + "id": "o3", + "name": "o3", + "description": "o3 is a well-rounded and powerful model across domains. It sets a new standard for math, science, coding, and visual reasoning tasks. It also excels at technical writing and instruction-following. Use it to think through multi-step problems that involve analysis across text, code, and images. ", + "capabilities": [ + "function-call", + "structured-output", + "file-input", + "image-recognition", + "reasoning", + "web-search" + ], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 10 + }, + "output": { + "currency": "USD", + "perMillionTokens": 40 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "family": "o", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-5-chat-latest", + "name": "gpt-5-chat-latest", + "description": "GPT-5 Chat points to the GPT-5 snapshot currently used in ChatGPT. GPT-5 is our next-generation, high-intelligence flagship model. It accepts both text and image inputs, and produces text outputs.", + "capabilities": [ + "function-call", + "structured-output", + "file-input", + "image-recognition", + "web-search", + "reasoning" + ], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 400000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.125 + }, + "output": { + "currency": "USD", + "perMillionTokens": 9 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.12500000000000003 + } + }, + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "kimi-k2-0905", + "name": "Kimi K2 0905", + "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 262144, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + } + }, + "family": "kimi", + "ownedBy": "moonshot", + "openWeights": true + }, + { + "id": "deepseek-v3-0324", + "name": "DeepSeek V3 0324", + "capabilities": ["function-call", "structured-output", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 163840, + "maxOutputTokens": 163840, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.28 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.14 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "reasoning": { + "supportedEfforts": ["none"] + }, + "family": "deepseek", + "ownedBy": "deepseek", + "openWeights": true + }, + { + "id": "minimax-m1-80k", + "name": "MiniMax M1", + "description": "MiniMax-M1 is an open-source large-scale hybrid attention model with 456B total parameters (45.9B activated per token). It natively supports 1M-token context and reduces FLOPs by 75% versus DeepSeek R1 in 100K-token generation tasks via lightning attention. Built on MoE architecture and optimized by CISPO algorithm, it achieves state-of-the-art performance in long-context reasoning and real-world software engineering scenarios.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 40000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.55 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.2 + } + }, + "family": "minimax", + "ownedBy": "minimax", + "openWeights": true + }, + { + "id": "ernie-4-5-a47b-paddle", + "name": "ERNIE 4.5 300B A47B", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 123000, + "maxOutputTokens": 12000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.28 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.1 + } + }, + "family": "ernie", + "ownedBy": "baidu", + "openWeights": true + }, + { + "id": "ernie-4-5-vl-a47b", + "name": "ERNIE 4.5 VL 424B A47B", + "description": "ERNIE-4.5-VL-424B-A47B is a multimodal Mixture-of-Experts (MoE) model from Baidu’s ERNIE 4.5 series, featuring 424B total parameters with 47B active per token. It is trained jointly on text and image data using a heterogeneous MoE architecture and modality-isolated routing to enable high-fidelity cross-modal reasoning, image understanding, and long-context generation (up to 131k tokens). Fine-tuned with techniques like SFT, DPO, UPO, and RLVR, this model supports both “thinking” and non-thinking inference modes. Designed for vision-language tasks in English and Chinese, it is optimized for efficient scaling and can operate under 4-bit/8-bit quantization.", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 123000, + "maxOutputTokens": 16000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.42 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.25 + } + }, + "family": "ernie", + "ownedBy": "baidu", + "openWeights": true + }, + { + "id": "qwen3-a22b-thinking-2507", + "name": "Qwen3 235B A22b Thinking 2507", + "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.055 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + }, + "interleaved": true + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen3-a3b-fp8", + "name": "Qwen3 30B A3B", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 40960, + "maxOutputTokens": 20000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.45 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen3-fp8", + "name": "Qwen3 32B", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 40960, + "maxOutputTokens": 20000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.45 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen3-a22b-fp8", + "name": "Qwen3 235B A22B", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 40960, + "maxOutputTokens": 20000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.8 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "devstral-medium-2507", + "name": "Devstral Medium", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "devstral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "open-mixtral-8x22b", + "name": "Mixtral 8x22B", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 64000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 6 + } + }, + "family": "mixtral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "ministral-latest", + "name": "Ministral 8B", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.1 + } + }, + "family": "ministral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "pixtral-large-latest", + "name": "Pixtral Large", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 6 + } + }, + "family": "pixtral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "mistral-small-2506", + "name": "Mistral Small 3.2", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "family": "mistral-small", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "devstral-2512", + "name": "Devstral 2", + "description": "Devstral 2 is a state-of-the-art open-source model by Mistral AI specializing in agentic coding. It is a 123B-parameter dense transformer model supporting a 256K context window.\n\nDevstral 2 supports exploring codebases and orchestrating changes across multiple files while maintaining architecture-level context. It tracks framework dependencies, detects failures, and retries with corrections—solving challenges like bug fixing and modernizing legacy systems. The model can be fine-tuned to prioritize specific languages or optimize for large enterprise codebases. It is available under a modified MIT license.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 262144, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.025 + } + }, + "family": "devstral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "pixtral", + "name": "Pixtral 12B", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.15 + } + }, + "family": "pixtral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "mistral-medium-2505", + "name": "Mistral Medium 3", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "mistral-medium", + "ownedBy": "mistral", + "openWeights": false + }, + { + "id": "labs-devstral-small-2512", + "name": "Devstral Small 2", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 256000, + "family": "devstral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "devstral-medium-latest", + "name": "Devstral 2", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 262144, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "devstral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "devstral-small-2505", + "name": "Devstral Small 2505", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.025 + } + }, + "family": "devstral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "mistral-medium-2508", + "name": "Mistral Medium 3.1", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 262144, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "mistral-medium", + "ownedBy": "mistral", + "openWeights": false + }, + { + "id": "mistral-embed", + "name": "Mistral Embed", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8000, + "maxOutputTokens": 3072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "mistral-embed", + "ownedBy": "mistral", + "openWeights": false + }, + { + "id": "mistral-small-latest", + "name": "Mistral Small", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "family": "mistral-small", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "magistral-small", + "name": "Magistral Small", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "family": "magistral-small", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "devstral-small-2507", + "name": "Devstral Small", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "family": "devstral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "codestral-latest", + "name": "Codestral", + "description": "Mistral has launched a new code model - Codestral 25.01; https://mistral.ai/news/codestral-2501/", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.9 + } + }, + "family": "codestral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "open-mixtral-8x7b", + "name": "Mixtral 8x7B", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.7 + } + }, + "family": "mixtral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "mistral-nemo", + "name": "Mistral Nemo", + "description": "A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA.\n\nThe model is multilingual, supporting English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese, Korean, Arabic, and Hindi.\n\nIt supports function calling and is released under the Apache 2.0 license.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.15 + } + }, + "family": "mistral-nemo", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "open-mistral", + "name": "Mistral 7B", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8000, + "maxOutputTokens": 8000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.25 + } + }, + "family": "mistral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "mistral-large-latest", + "name": "Mistral Large", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 262144, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "family": "mistral-large", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "mistral-medium-latest", + "name": "Mistral Medium", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "mistral-medium", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "mistral-large-2411", + "name": "Mistral Large 2.1", + "description": "Mistral Large 2 2411 is an update of [Mistral Large 2](/mistralai/mistral-large) released together with [Pixtral Large 2411](/mistralai/pixtral-large-2411)\n\nIt provides a significant upgrade on the previous [Mistral Large 24.07](/mistralai/mistral-large-2407), with notable improvements in long context understanding, a new system prompt, and more accurate function calling.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 6 + } + }, + "family": "mistral-large", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "magistral-medium-latest", + "name": "Magistral Medium", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + } + }, + "family": "magistral-medium", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "bielik-v3-0-instruct", + "name": "Bielik 11B v3.0 Instruct", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.67 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.67 + } + }, + "ownedBy": "cloudferro-sherlock", + "openWeights": true + }, + { + "id": "bielik-v2-6-instruct", + "name": "Bielik 11B v2.6 Instruct", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.67 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.67 + } + }, + "ownedBy": "cloudferro-sherlock", + "openWeights": true + }, + { + "id": "gemini-2-0-flash-001", + "name": "Gemini 2.0 Flash", + "description": "Gemini Flash 2.0 offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5). It introduces notable enhancements in multimodal understanding, coding capabilities, complex instruction following, and function calling. These advancements come together to deliver more seamless and robust agentic experiences.", + "capabilities": [ + "function-call", + "file-input", + "image-recognition", + "audio-recognition", + "video-recognition", + "web-search" + ], + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.025 + } + }, + "family": "gemini-flash", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "route-llm", + "name": "Route LLM", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "family": "gpt", + "ownedBy": "abacus", + "openWeights": false + }, + { + "id": "qwen-2-5-coder", + "name": "Qwen 2.5 Coder 32B", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.79 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.79 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "gemini-2-0-pro-exp-02-05", + "name": "Gemini 2.0 Pro Exp", + "description": "Integrated with Google's official search and internet connectivity features.", + "capabilities": [ + "function-call", + "file-input", + "image-recognition", + "audio-recognition", + "video-recognition", + "web-search" + ], + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text"], + "contextWindow": 2000000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + } + }, + "family": "gemini-pro", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "o3-pro", + "name": "o3-pro", + "description": "The o-series of models are trained with reinforcement learning to think before they answer and perform complex reasoning. The o3-pro model uses more compute to think harder and provide consistently better answers.\n\nNote that BYOK is required for this model. Set up here: https://openrouter.ai/settings/integrations", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 100000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 20 + }, + "output": { + "currency": "USD", + "perMillionTokens": 80 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 20 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "family": "o-pro", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "claude-3-7-sonnet-20250219", + "name": "Claude Sonnet 3.7", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-sonnet", + "ownedBy": "anthropic", + "openWeights": false, + "alias": ["claude-3-7-sonnet", "claude-3-7-sonnet-latest"] + }, + { + "id": "gpt-4o-2024-11-20", + "name": "GPT-4o (2024-11-20)", + "description": "The 2024-11-20 version of GPT-4o offers a leveled-up creative writing ability with more natural, engaging, and tailored writing to improve relevance & readability. It’s also better at working with uploaded files, providing deeper insights & more thorough responses.\n\nGPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.", + "capabilities": ["function-call", "file-input", "image-recognition", "audio-recognition", "web-search"], + "inputModalities": ["text", "image", "audio"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.25 + } + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-4o-mini", + "name": "GPT-4o Mini", + "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.\n\nAs their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/models/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.\n\nGPT-4o mini achieves an 82% score on MMLU and presently ranks higher than GPT-4 on chat preferences [common leaderboards](https://arena.lmsys.org/).\n\nCheck out the [launch announcement](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) to learn more.\n\n#multimodal", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.08 + } + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "llama-3-1-instruct-turbo", + "name": "Llama 3.1 405B Instruct Turbo", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.5 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "llama-4-maverick-128e-instruct-fp8", + "name": "Llama 4 Maverick 17B 128E Instruct FP8", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.14 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.59 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.075 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "kimi-k2-turbo", + "name": "Kimi K2 Turbo", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + }, + "family": "kimi", + "ownedBy": "moonshot", + "openWeights": false + }, + { + "id": "qwen3-coder", + "name": "Qwen3 Coder 480B A35B Instruct", + "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 66536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.38 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.53 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.022 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen-3", + "name": "Qwen 3.32B", + "description": "cerebras", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 40960, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen3-vl-instruct", + "name": "Qwen3 VL Instruct", + "description": "Qwen3-VL-32B-Instruct is a large-scale multimodal vision-language model designed for high-precision understanding and reasoning across text, images, and video. With 32 billion parameters, it combines deep visual perception with advanced text comprehension, enabling fine-grained spatial reasoning, document and scene analysis, and long-horizon video understanding.Robust OCR in 32 languages, and enhanced multimodal fusion through Interleaved-MRoPE and DeepStack architectures. Optimized for agentic interaction and visual tool use, Qwen3-VL-32B delivers state-of-the-art performance for complex real-world multimodal tasks.", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 129024, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.8 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen3-vl", + "name": "Qwen3 VL Thinking", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 129024, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7 + }, + "output": { + "currency": "USD", + "perMillionTokens": 8.4 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen3-max-preview", + "name": "Qwen3 Max Preview", + "description": "Qwen3-Max-Preview is the latest preview model in the Qwen3 series. This version is functionally equivalent to Qwen3-Max-Thinking — simply set extra_body={\"enable_thinking\": True} to enable the thinking mode. Compared to the Qwen2.5 series, it delivers significant improvements in overall general capabilities, including English–Chinese text understanding, complex instruction following, open-ended reasoning, multilingual processing, and tool-use proficiency. The model also exhibits fewer hallucinations and stronger overall reliability.", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 6 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.24 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "longcat-flash", + "name": "LongCat Flash Thinking", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "family": "longcat", + "ownedBy": "meituan", + "openWeights": false + }, + { + "id": "longcat-flash-chat", + "name": "LongCat Flash Chat", + "description": "LongCat-Flash-Chat is a large-scale Mixture-of-Experts (MoE) model with 560B total parameters, of which 18.6B–31.3B (≈27B on average) are dynamically activated per input. It introduces a shortcut-connected MoE design to reduce communication overhead and achieve high throughput while maintaining training stability through advanced scaling strategies such as hyperparameter transfer, deterministic computation, and multi-stage optimization.\n\nThis release, LongCat-Flash-Chat, is a non-thinking foundation model optimized for conversational and agentic tasks. It supports long context windows up to 128K tokens and shows competitive performance across reasoning, coding, instruction following, and domain benchmarks, with particular strengths in tool use and complex multi-step interactions.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.8 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "family": "longcat", + "ownedBy": "meituan", + "openWeights": false + }, + { + "id": "grok-imagine-image", + "name": "Grok Imagine Image", + "capabilities": ["image-generation", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text", "image"], + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "grok-imagine-image-pro", + "name": "Grok Imagine Image Pro", + "capabilities": ["image-generation", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text", "image"], + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "nemotron-nano-v2-vl", + "name": "Nvidia Nemotron Nano 12B V2 VL", + "description": "NVIDIA Nemotron Nano 2 VL is a 12-billion-parameter open multimodal reasoning model designed for video understanding and document intelligence. It introduces a hybrid Transformer-Mamba architecture, combining transformer-level accuracy with Mamba’s memory-efficient sequence modeling for significantly higher throughput and lower latency.\n\nThe model supports inputs of text and multi-image documents, producing natural-language outputs. It is trained on high-quality NVIDIA-curated synthetic datasets optimized for optical-character recognition, chart reasoning, and multimodal comprehension.\n\nNemotron Nano 2 VL achieves leading results on OCRBench v2 and scores ≈ 74 average across MMMU, MathVista, AI2D, OCRBench, OCR-Reasoning, ChartQA, DocVQA, and Video-MME—surpassing prior open VL baselines. With Efficient Video Sampling (EVS), it handles long-form videos while reducing inference cost.\n\nOpen-weights, training data, and fine-tuning recipes are released under a permissive NVIDIA open license, with deployment supported across NeMo, NIM, and major inference runtimes.", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "family": "nemotron", + "ownedBy": "nvidia", + "openWeights": false + }, + { + "id": "embed-v4-0", + "name": "Embed v4.0", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 1536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.12 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "cohere-embed", + "ownedBy": "cohere", + "openWeights": false + }, + { + "id": "command-a", + "name": "Command A", + "description": "Command A is an open-weights 111B parameter model with a 256k context window focused on delivering great performance across agentic, multilingual, and coding use cases.\nCompared to other leading proprietary and open-weights models Command A delivers maximum performance with minimum hardware costs, excelling on business-critical agentic and multilingual tasks.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 8000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + }, + "family": "command", + "ownedBy": "cohere", + "openWeights": false + }, + { + "id": "kat-coder-pro-v1", + "name": "KAT-Coder-Pro V1", + "capabilities": ["reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.06 + } + }, + "family": "kat-coder", + "ownedBy": "streamlake", + "openWeights": false + }, + { + "id": "mistral", + "name": "Mistral Medium 3.1", + "description": "Mistral Medium 3 is a SOTA & versatile model designed for a wide range of tasks, including programming, mathematical reasoning, understanding long documents, summarization, and dialogue.\n\nIt boasts multi-modal capabilities, enabling it to process visual inputs, and supports dozens of languages, including over 80 coding languages. Additionally, it features function calling and agentic workflows.\n\nMistral Medium 3 is optimized for single-node inference, particularly for long-context applications. Its size allows it to achieve high throughput on a single node.", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "mistral-medium", + "ownedBy": "mistral", + "openWeights": false + }, + { + "id": "codestral-embed", + "name": "Codestral Embed", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 1536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "codestral-embed", + "ownedBy": "mistral", + "openWeights": false + }, + { + "id": "devstral-2", + "name": "Devstral 2", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 256000, + "family": "devstral", + "ownedBy": "mistral", + "openWeights": false + }, + { + "id": "devstral-small", + "name": "Devstral Small 1.1", + "description": "Devstral Small 1.1 is a 24B parameter open-weight language model for software engineering agents, developed by Mistral AI in collaboration with All Hands AI. Finetuned from Mistral Small 3.1 and released under the Apache 2.0 license, it features a 128k token context window and supports both Mistral-style function calling and XML output formats.\n\nDesigned for agentic coding workflows, Devstral Small 1.1 is optimized for tasks such as codebase exploration, multi-file edits, and integration into autonomous development agents like OpenHands and Cline. It achieves 53.6% on SWE-Bench Verified, surpassing all other open models on this benchmark, while remaining lightweight enough to run on a single 4090 GPU or Apple silicon machine. The model uses a Tekken tokenizer with a 131k vocabulary and is deployable via vLLM, Transformers, Ollama, LM Studio, and other OpenAI-compatible runtimes.\n", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "family": "devstral", + "ownedBy": "mistral", + "openWeights": false + }, + { + "id": "mistral-large-3", + "name": "Mistral Large 3", + "description": "Mistral Large 3 is a MoE model with 67.5B total parameters and 41B active parameters, supporting a 256K-token context window. Trained from scratch on 3,000 NVIDIA H200 GPUs, it is one of the strongest permissively licensed open-weight models available.\n\nDesigned for advanced reasoning and long-context understanding, Mistral Large 3 delivers performance on par with the best instruction-tuned open-weight models for general-purpose tasks, while also offering image understanding capabilities. Its multilingual strengths are particularly notable for non-English/Chinese languages, making it well-suited for global applications.\n\nTypical use cases include enterprise assistants, multilingual customer support, content generation and editing, data analysis over long documents, code assistance, and research workflows that require handling large corpora or complex instructions. With its MoE architecture, Mistral Large 3 balances strong performance with efficient inference, providing a versatile backbone for building reliable, production-grade AI systems.", + "capabilities": ["file-input", "image-recognition", "function-call"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 256000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "family": "mistral-large", + "ownedBy": "mistral", + "openWeights": false + }, + { + "id": "ministral", + "name": "Ministral 14B", + "capabilities": ["file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 256000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "family": "ministral", + "ownedBy": "mistral", + "openWeights": false + }, + { + "id": "devstral-small-2", + "name": "Devstral Small 2", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 256000, + "family": "devstral", + "ownedBy": "mistral", + "openWeights": false + }, + { + "id": "codestral", + "name": "Codestral", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.9 + } + }, + "family": "codestral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "magistral", + "name": "Magistral Medium", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + } + }, + "family": "magistral-medium", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "pixtral-large", + "name": "Pixtral Large", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 6 + } + }, + "family": "pixtral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "mistral-small", + "name": "Mistral Small", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "family": "mistral-small", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "mixtral-8x22b-instruct", + "name": "Mixtral 8x22B", + "description": "Mistral's official instruct fine-tuned version of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b). It uses 39B active parameters out of 141B, offering unparalleled cost efficiency for its size. Its strengths include:\n- strong math, coding, and reasoning\n- large context length (64k)\n- fluency in English, French, Italian, German, and Spanish\n\nSee benchmarks on the launch announcement [here](https://mistral.ai/news/mixtral-8x22b/).\n#moe", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 64000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 6 + } + }, + "family": "mixtral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "v0-1-0-md", + "name": "v0-1.0-md", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + }, + "family": "v0", + "ownedBy": "vercel", + "openWeights": false + }, + { + "id": "v0-1-5-md", + "name": "v0-1.5-md", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + }, + "family": "v0", + "ownedBy": "vercel", + "openWeights": false + }, + { + "id": "deepseek-v3", + "name": "DeepSeek V3 0324", + "description": "It has been automatically upgraded to the latest released version, 250324.\nAutomatically upgraded to the latest released version 250324.", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 163840, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.77 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.77 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.07 + } + }, + "reasoning": { + "supportedEfforts": ["none"] + }, + "family": "deepseek", + "ownedBy": "deepseek", + "openWeights": false + }, + { + "id": "deepseek-v3-2-exp", + "name": "DeepSeek V3.2 Exp", + "description": "DeepSeek-V3.2-Exp is an experimental large language model released by DeepSeek as an intermediate step between V3.1 and future architectures. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism designed to improve training and inference efficiency in long-context scenarios while maintaining output quality. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model was trained under conditions aligned with V3.1-Terminus to enable direct comparison. Benchmarking shows performance roughly on par with V3.1 across reasoning, coding, and agentic tool-use tasks, with minor tradeoffs and gains depending on the domain. This release focuses on validating architectural optimizations for extended context lengths rather than advancing raw task accuracy, making it primarily a research-oriented model for exploring efficient transformer designs.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 163840, + "maxOutputTokens": 163840, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.27 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.0274 + } + }, + "reasoning": { + "supportedEfforts": ["none"] + }, + "family": "deepseek", + "ownedBy": "deepseek", + "openWeights": false + }, + { + "id": "recraft-v3", + "name": "Recraft V3", + "capabilities": ["image-generation"], + "inputModalities": ["text"], + "outputModalities": ["image"], + "contextWindow": 512, + "family": "recraft", + "ownedBy": "recraft", + "openWeights": false + }, + { + "id": "recraft-v2", + "name": "Recraft V2", + "capabilities": ["image-generation"], + "inputModalities": ["text"], + "outputModalities": ["image"], + "contextWindow": 512, + "family": "recraft", + "ownedBy": "recraft", + "openWeights": false + }, + { + "id": "flux-kontext-pro", + "name": "FLUX.1 Kontext Pro", + "capabilities": ["image-generation"], + "inputModalities": ["text"], + "outputModalities": ["image"], + "contextWindow": 512, + "family": "flux", + "ownedBy": "bfl", + "openWeights": false + }, + { + "id": "flux-kontext-max", + "name": "FLUX.1 Kontext Max", + "capabilities": ["image-generation"], + "inputModalities": ["text"], + "outputModalities": ["image"], + "contextWindow": 512, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "flux", + "ownedBy": "bfl", + "openWeights": false + }, + { + "id": "flux-pro-1-1", + "name": "FLUX1.1 [pro]", + "capabilities": ["image-generation"], + "inputModalities": ["text"], + "outputModalities": ["image"], + "contextWindow": 512, + "family": "flux", + "ownedBy": "bfl", + "openWeights": false + }, + { + "id": "flux-pro-1-1-ultra", + "name": "FLUX1.1 [pro] Ultra", + "capabilities": ["image-generation"], + "inputModalities": ["text"], + "outputModalities": ["image"], + "contextWindow": 512, + "family": "flux", + "ownedBy": "bfl", + "openWeights": false + }, + { + "id": "flux-pro-1-0-fill", + "name": "FLUX.1 Fill [pro]", + "capabilities": ["image-generation"], + "inputModalities": ["text"], + "outputModalities": ["image"], + "contextWindow": 512, + "family": "flux", + "ownedBy": "bfl", + "openWeights": false + }, + { + "id": "trinity-mini", + "name": "Trinity Mini", + "description": "Trinity Mini is a 26B-parameter (3B active) sparse mixture-of-experts language model featuring 128 experts with 8 active per token. Engineered for efficient reasoning over long contexts (131k) with robust function calling and multi-step agent workflows.", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.05 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.15 + } + }, + "family": "trinity", + "ownedBy": "arceeai", + "openWeights": false + }, + { + "id": "trinity-large-preview", + "name": "Trinity Large Preview", + "description": "Trinity-Large-Preview is a frontier-scale open-weight language model from Arcee, built as a 400B-parameter sparse Mixture-of-Experts with 13B active parameters per token using 4-of-256 expert routing. \n\nIt excels in creative writing, storytelling, role-play, chat scenarios, and real-time voice assistance, better than your average reasoning model usually can. But we’re also introducing some of our newer agentic performance. It was trained to navigate well in agent harnesses like OpenCode, Cline, and Kilo Code, and to handle complex toolchains and long, constraint-filled prompts. \n\nThe architecture natively supports very long context windows up to 512k tokens, with the Preview API currently served at 128k context using 8-bit quantization for practical deployment. Trinity-Large-Preview reflects Arcee’s efficiency-first design philosophy, offering a production-oriented frontier model with open weights and permissive licensing suitable for real-world applications and experimentation.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131000, + "maxOutputTokens": 131000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1 + } + }, + "family": "trinity", + "ownedBy": "arceeai", + "openWeights": false + }, + { + "id": "minimax-m2-1-lightning", + "name": "MiniMax M2.1 Lightning", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 204800, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.03 + } + }, + "reasoning": { + "supportedEfforts": [], + "interleaved": true + }, + "family": "minimax", + "ownedBy": "minimax", + "openWeights": false + }, + { + "id": "seed-1-6", + "name": "Seed 1.6", + "description": "Seed 1.6 is a general-purpose model released by the ByteDance Seed team. It incorporates multimodal capabilities and adaptive deep thinking with a 256K context window.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "family": "seed", + "ownedBy": "bytedance", + "openWeights": false + }, + { + "id": "seed-1-8", + "name": "Seed 1.8", + "capabilities": ["reasoning", "function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "family": "seed", + "ownedBy": "bytedance", + "openWeights": false + }, + { + "id": "voyage-code-2", + "name": "voyage-code-2", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 1536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.12 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "voyage", + "ownedBy": "voyage", + "openWeights": false + }, + { + "id": "voyage-3-5-lite", + "name": "voyage-3.5-lite", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 1536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.02 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "voyage", + "ownedBy": "voyage", + "openWeights": false + }, + { + "id": "voyage-3-5", + "name": "voyage-3.5", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 1536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.06 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "voyage", + "ownedBy": "voyage", + "openWeights": false + }, + { + "id": "voyage-finance-2", + "name": "voyage-finance-2", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 1536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.12 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "voyage", + "ownedBy": "voyage", + "openWeights": false + }, + { + "id": "voyage-law-2", + "name": "voyage-law-2", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 1536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.12 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "voyage", + "ownedBy": "voyage", + "openWeights": false + }, + { + "id": "voyage-code-3", + "name": "voyage-code-3", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 1536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.18 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "voyage", + "ownedBy": "voyage", + "openWeights": false + }, + { + "id": "voyage-3-large", + "name": "voyage-3-large", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 1536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.18 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "voyage", + "ownedBy": "voyage", + "openWeights": false + }, + { + "id": "gemini-embedding-001", + "name": "Gemini Embedding 001", + "description": "gemini-embedding-001 provides a unified cutting edge experience across domains, including science, legal, finance, and coding. This embedding model has consistently held a top spot on the Massive Text Embedding Benchmark (MTEB) Multilingual leaderboard since the experimental launch in March.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 1536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "gemini-embedding", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "imagen-4-0-ultra-generate-001", + "name": "Imagen 4 Ultra", + "capabilities": ["image-generation"], + "inputModalities": ["text"], + "outputModalities": ["image"], + "contextWindow": 480, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "imagen", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "imagen-4-0-fast-generate-001", + "name": "Imagen 4 Fast", + "description": "Imagen 4 is a new-generation image generation model designed to balance high-quality output, inference efficiency, and content safety. It supports image generation, digital watermarking with authenticity verification, user-configurable safety settings, and prompt enhancement via the Prompt Rewriter, while also delivering reliable person generation capabilities. The model ID is imagen-4.0-generate-001, making it suitable for professional creation, design workflows, and various generative AI applications.", + "capabilities": ["image-generation"], + "inputModalities": ["text"], + "outputModalities": ["image"], + "contextWindow": 480, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "imagen", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "imagen-4-0-generate-001", + "name": "Imagen 4", + "description": "Imagen 4 is a new-generation image generation model designed to balance high-quality output, inference efficiency, and content safety. It supports image generation, digital watermarking with authenticity verification, user-configurable safety settings, and prompt enhancement via the Prompt Rewriter, while also delivering reliable person generation capabilities. The model ID is imagen-4.0-generate-001, making it suitable for professional creation, design workflows, and various generative AI applications.", + "capabilities": ["image-generation"], + "inputModalities": ["text"], + "outputModalities": ["image"], + "contextWindow": 480, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "imagen", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "text-multilingual-embedding-002", + "name": "Text Multilingual Embedding 002", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 1536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.03 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "text-embedding", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "gemini-3-pro-image", + "name": "Nano Banana Pro (Gemini 3 Pro Image)", + "capabilities": ["image-generation", "function-call", "image-recognition", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text", "image"], + "contextWindow": 65536, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 120 + } + }, + "family": "gemini-pro", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "text-embedding-005", + "name": "Text Embedding 005", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 1536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.03 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "text-embedding", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "gemini-2-5-flash-image-preview", + "name": "Nano Banana Preview (Gemini 2.5 Flash Image Preview)", + "description": "Aihubmix supports the gemini-2.5-flash-image-preview model; you can add extra parameters modalities=[\"text\", \"image\"] through the OpenAI-compatible chat interface; https://docs.aihubmix.com/en/api/Gemini-Guides#gemini-2-5-flash%3A-quick-task-support", + "capabilities": ["image-generation", "function-call", "image-recognition", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text", "image"], + "contextWindow": 32768, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.075 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 24576 + } + }, + "family": "gemini-flash", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "gemini-3-flash", + "name": "Gemini 3 Flash", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 24576 + } + }, + "family": "gemini-flash", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "gemini-2-0-flash", + "name": "Gemini 2.0 Flash", + "description": "gemini-2.0-flash-free is the free, publicly available version of gemini-2.0-flash, offering the same model capabilities with usage limits in place to ensure service stability. Limits include up to 5 requests per minute, a maximum of 500 requests per day, and a daily quota of 1,000,000 tokens. Free usage is based on shared capacity and is limited in availability. This version is intended for testing and light usage; for consistent and reliable access, please switch to the paid model.", + "capabilities": [ + "function-call", + "structured-output", + "file-input", + "image-recognition", + "audio-recognition", + "video-recognition", + "web-search" + ], + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text"], + "contextWindow": 1048576, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.025 + } + }, + "family": "gemini-flash", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "intellect-3", + "name": "INTELLECT 3", + "description": "INTELLECT-3 is a 106B-parameter Mixture-of-Experts model (12B active) post-trained from GLM-4.5-Air-Base using supervised fine-tuning (SFT) followed by large-scale reinforcement learning (RL). It offers state-of-the-art performance for its size across math, code, science, and general reasoning, consistently outperforming many larger frontier models. Designed for strong multi-step problem solving, it maintains high accuracy on structured tasks while remaining efficient at inference thanks to its MoE architecture.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.1 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.02 + } + }, + "family": "intellect", + "ownedBy": "vercel", + "openWeights": false + }, + { + "id": "mercury-coder-small", + "name": "Mercury Coder Small Beta", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1 + } + }, + "family": "mercury", + "ownedBy": "inception", + "openWeights": false + }, + { + "id": "text-embedding-3-small", + "name": "text-embedding-3-small", + "description": " text-embedding-3-small is OpenAI's improved, more performant version of the ada embedding model. Embeddings are a numerical representation of text that can be used to measure the relatedness between two pieces of text. Embeddings are useful for search, clustering, recommendations, anomaly detection, and classification tasks.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 1536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.02 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "text-embedding", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-5-2-chat", + "name": "GPT-5.2 Chat", + "description": "GPT-5.2 Chat (AKA Instant) is the fast, lightweight member of the 5.2 family, optimized for low-latency chat while retaining strong general intelligence. It uses adaptive reasoning to selectively “think” on harder queries, improving accuracy on math, coding, and multi-step tasks without slowing down typical conversations. The model is warmer and more conversational by default, with better instruction following and more stable short-form reasoning. GPT-5.2 Chat is designed for high-throughput, interactive workloads where responsiveness and consistency matter more than deep deliberation.", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.75 + }, + "output": { + "currency": "USD", + "perMillionTokens": 14 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.18 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high", "max"] + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-oss-safeguard", + "name": "gpt-oss-safeguard-20b", + "description": "gpt-oss-safeguard-20b is a safety reasoning model from OpenAI built upon gpt-oss-20b. This open-weight, 21B-parameter Mixture-of-Experts (MoE) model offers lower latency for safety tasks like content classification, LLM filtering, and trust & safety labeling.\n\nLearn more about this model in OpenAI's gpt-oss-safeguard [user guide](https://cookbook.openai.com/articles/gpt-oss-safeguard-guide).", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.08 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.04 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "family": "gpt-oss", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-5-chat", + "name": "GPT-5 Chat", + "description": "GPT-5 Chat is designed for advanced, natural, multimodal, and context-aware conversations for enterprise applications.", + "capabilities": [ + "reasoning", + "function-call", + "file-input", + "image-recognition", + "image-generation", + "web-search" + ], + "inputModalities": ["text", "image"], + "outputModalities": ["text", "image"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.13 + } + }, + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "o3-deep-research", + "name": "o3-deep-research", + "description": "o3-deep-research is OpenAI's advanced model for deep research, designed to tackle complex, multi-step research tasks.\n\nNote: This model always uses the 'web_search' tool which adds additional cost.", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 100000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 10 + }, + "output": { + "currency": "USD", + "perMillionTokens": 40 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 2.5 + } + }, + "reasoning": { + "supportedEfforts": ["medium"] + }, + "family": "o", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-3-5-turbo", + "name": "GPT-3.5 Turbo", + "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\n\nTraining data up to Sep 2021.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 16385, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.25 + } + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "text-embedding-3-large", + "name": "text-embedding-3-large", + "description": "text-embedding-3-large is OpenAI's most capable embedding model for both english and non-english tasks. Embeddings are a numerical representation of text that can be used to measure the relatedness between two pieces of text. Embeddings are useful for search, clustering, recommendations, anomaly detection, and classification tasks.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 1536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.13 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "text-embedding", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-5-1-instant", + "name": "GPT-5.1 Instant", + "capabilities": [ + "reasoning", + "function-call", + "file-input", + "image-recognition", + "image-generation", + "web-search" + ], + "inputModalities": ["text", "image"], + "outputModalities": ["text", "image"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.13 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"] + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "text-embedding-ada-002", + "name": "text-embedding-ada-002", + "description": "text-embedding-ada-002 is OpenAI's legacy text embedding model.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 1536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "text-embedding", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-3-5-turbo-instruct", + "name": "GPT-3.5 Turbo Instruct", + "description": "This model is a variant of GPT-3.5 Turbo tuned for instructional prompts and omitting chat-related optimizations. Training data: up to Sep 2021.", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "codex-mini", + "name": "Codex Mini", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 100000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 6 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.38 + } + }, + "family": "gpt-codex-mini", + "ownedBy": "vercel", + "openWeights": false + }, + { + "id": "gpt-4o-mini-search-preview", + "name": "GPT 4o Mini Search Preview", + "description": "GPT-4o mini Search Preview is a specialized model for web search in Chat Completions. It is trained to understand and execute web search queries.", + "capabilities": ["function-call", "image-recognition", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.075 + } + }, + "family": "gpt-mini", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "o1", + "name": "o1", + "description": "The latest and strongest model family from OpenAI, o1 is designed to spend more time thinking before responding. The o1 model series is trained with large-scale reinforcement learning to reason using chain of thought. \n\nThe o1 models are optimized for math, science, programming, and other STEM-related tasks. They consistently exhibit PhD-level accuracy on benchmarks in physics, chemistry, and biology. Learn more in the [launch announcement](https://openai.com/o1).\n", + "capabilities": ["reasoning", "function-call", "structured-output", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 100000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 60 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 7.5 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "family": "o", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-4-turbo", + "name": "GPT-4 Turbo", + "description": "The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.\n\nTraining data: up to December 2023.", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 10 + }, + "output": { + "currency": "USD", + "perMillionTokens": 30 + } + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "sonar-reasoning", + "name": "Sonar Reasoning", + "capabilities": ["reasoning", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 127000, + "maxOutputTokens": 8000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + } + }, + "family": "sonar-reasoning", + "ownedBy": "perplexity", + "openWeights": false + }, + { + "id": "sonar", + "name": "Sonar", + "description": "Sonar is lightweight, affordable, fast, and simple to use — now featuring citations and the ability to customize sources. It is designed for companies seeking to integrate lightweight question-and-answer features optimized for speed.", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 127000, + "maxOutputTokens": 8000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1 + } + }, + "family": "sonar", + "ownedBy": "perplexity", + "openWeights": false + }, + { + "id": "sonar-pro", + "name": "Sonar Pro", + "description": "Note: Sonar Pro pricing includes Perplexity search pricing. See [details here](https://docs.perplexity.ai/guides/pricing#detailed-pricing-breakdown-for-sonar-reasoning-pro-and-sonar-pro)\n\nFor enterprises seeking more advanced capabilities, the Sonar Pro API can handle in-depth, multi-step queries with added extensibility, like double the number of citations per search as Sonar on average. Plus, with a larger context window, it can handle longer and more nuanced searches and follow-up questions. ", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 8000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + }, + "family": "sonar-pro", + "ownedBy": "perplexity", + "openWeights": false + }, + { + "id": "sonar-reasoning-pro", + "name": "Sonar Reasoning Pro", + "description": "Note: Sonar Pro pricing includes Perplexity search pricing. See [details here](https://docs.perplexity.ai/guides/pricing#detailed-pricing-breakdown-for-sonar-reasoning-pro-and-sonar-pro)\n\nSonar Reasoning Pro is a premier reasoning model powered by DeepSeek R1 with Chain of Thought (CoT). Designed for advanced use cases, it supports in-depth, multi-step queries with a larger context window and can surface more citations per search, enabling more comprehensive and extensible responses.", + "capabilities": ["reasoning", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 127000, + "maxOutputTokens": 8000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 8 + } + }, + "family": "sonar-reasoning", + "ownedBy": "perplexity", + "openWeights": false + }, + { + "id": "glm-4-6v-flash", + "name": "GLM-4.6V-Flash", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 24000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.02 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.21 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.0043 + } + }, + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "family": "glm", + "ownedBy": "zhipu", + "openWeights": false + }, + { + "id": "nova-2-lite", + "name": "Nova 2 Lite", + "capabilities": ["reasoning", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 1000000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.5 + } + }, + "family": "nova", + "ownedBy": "amazon", + "openWeights": false + }, + { + "id": "titan-embed-text-v2", + "name": "Titan Text Embeddings V2", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 1536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.02 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "titan-embed", + "ownedBy": "vercel", + "openWeights": false + }, + { + "id": "nova-micro", + "name": "Nova Micro", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.035 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.14 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.00875 + } + }, + "family": "nova-micro", + "ownedBy": "amazon", + "openWeights": false + }, + { + "id": "nova-pro", + "name": "Nova Pro", + "capabilities": ["function-call", "file-input", "image-recognition", "video-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 300000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.8 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "family": "nova-pro", + "ownedBy": "amazon", + "openWeights": false + }, + { + "id": "nova-lite", + "name": "Nova Lite", + "capabilities": ["function-call", "file-input", "image-recognition", "video-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 300000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.06 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.24 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.015 + } + }, + "family": "nova-lite", + "ownedBy": "amazon", + "openWeights": false + }, + { + "id": "morph-v3-fast", + "name": "Morph v3 Fast", + "description": "Morph's fastest apply model for code edits. ~10,500 tokens/sec with 96% accuracy for rapid code transformations.\n\nThe model requires the prompt to be in the following format: \n{instruction}\n{initial_code}\n{edit_snippet}\n\nZero Data Retention is enabled for Morph. Learn more about this model in their [documentation](https://docs.morphllm.com/quickstart)", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 16000, + "maxOutputTokens": 16000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.8 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + }, + "family": "morph", + "ownedBy": "morph", + "openWeights": false + }, + { + "id": "morph-v3-large", + "name": "Morph v3 Large", + "description": "Morph's high-accuracy apply model for complex code edits. ~4,500 tokens/sec with 98% accuracy for precise code transformations.\n\nThe model requires the prompt to be in the following format: \n{instruction}\n{initial_code}\n{edit_snippet}\n\nZero Data Retention is enabled for Morph. Learn more about this model in their [documentation](https://docs.morphllm.com/quickstart)", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.9 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.9 + } + }, + "family": "morph", + "ownedBy": "morph", + "openWeights": false + }, + { + "id": "llama-3-1", + "name": "Llama 3.1 8B Instruct", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This is the base 405B pre-trained version.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.03 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": false + }, + { + "id": "llama-3-2", + "name": "Llama 3.2 90B Vision Instruct", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.72 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.72 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": false + }, + { + "id": "llama-4-scout", + "name": "Llama-4-Scout-17B-16E-Instruct-FP8", + "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.08 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "llama-3-3", + "name": "Llama-3.3-70B-Instruct", + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.", + "capabilities": ["function-call", "file-input"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.8 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "llama-4-maverick", + "name": "Llama-4-Maverick-17B-128E-Instruct-FP8", + "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "claude-3-5-sonnet-20240620", + "name": "Claude 3.5 Sonnet (2024-06-20)", + "description": "Claude 3.5 Sonnet delivers performance superior to Opus and speeds faster than its predecessor, all at the same price point. Its core strengths include:\n\nCoding: Autonomously writes, edits, and executes code with advanced reasoning and troubleshooting.\nData Science: Augments human expertise by analyzing unstructured data and using multiple tools to generate insights.\nVisual Processing: Excels at interpreting charts, graphs, and images, accurately transcribing text to derive high-level insights.\nAgentic Tasks: Exceptional tool use makes it highly effective for complex, multi-step agentic workflows that interact with other systems.", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "family": "claude-sonnet", + "ownedBy": "anthropic", + "openWeights": false, + "alias": ["claude-3-5-sonnet-v1"] + }, + { + "id": "claude-3-5-haiku", + "name": "Claude Haiku 3.5", + "description": "Claude 3.5 Haiku features offers enhanced capabilities in speed, coding accuracy, and tool use. Engineered to excel in real-time applications, it delivers quick response times that are essential for dynamic tasks such as chat interactions and immediate coding suggestions.\n\nThis makes it highly suitable for environments that demand both speed and precision, such as software development, customer service bots, and data management systems.\n\nThis model is currently pointing to [Claude 3.5 Haiku (2024-10-22)](/anthropic/claude-3-5-haiku-20241022).", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.8 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.08 + } + }, + "family": "claude-haiku", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-3-7-sonnet", + "name": "Claude Sonnet 3.7", + "description": "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. \n\nClaude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks.\n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-sonnet", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-opus-4-1", + "name": "Claude Opus 4", + "description": "Claude Opus 4.1 is an updated version of Anthropic’s flagship model, offering improved performance in coding, reasoning, and agentic tasks. It achieves 74.5% on SWE-bench Verified and shows notable gains in multi-file code refactoring, debugging precision, and detail-oriented reasoning. The model supports extended thinking up to 64K tokens and is optimized for tasks involving research, data analysis, and tool-assisted reasoning.", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 75 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 32000 + } + }, + "family": "claude-opus", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-3-5-sonnet", + "name": "Claude Sonnet 3.5 v2", + "description": "New Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: Scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "family": "claude-sonnet", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-3-opus", + "name": "Claude Opus 3", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 75 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "family": "claude-opus", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-3-haiku", + "name": "Claude Haiku 3", + "description": "Claude 3 Haiku is Anthropic's fastest and most compact model for\nnear-instant responsiveness. Quick and accurate targeted performance.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-haiku)\n\n#multimodal", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.03 + } + }, + "family": "claude-haiku", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-opus-4", + "name": "Claude Opus 4", + "description": "Claude Opus 4 is benchmarked as the world’s best coding model, at time of release, bringing sustained performance on complex, long-running tasks and agent workflows. It sets new benchmarks in software engineering, achieving leading results on SWE-bench (72.5%) and Terminal-bench (43.2%). Opus 4 supports extended, agentic workflows, handling thousands of task steps continuously for hours without degradation. \n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-4)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 75 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 32000 + } + }, + "family": "claude-opus", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "hermes-4", + "name": "Hermes-4-70B", + "description": "Hermes 4 70B is a hybrid reasoning model from Nous Research, built on Meta-Llama-3.1-70B. It introduces the same hybrid mode as the larger 405B release, allowing the model to either respond directly or generate explicit ... reasoning traces before answering. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThis 70B variant is trained with the expanded post-training corpus (~60B tokens) emphasizing verified reasoning data, leading to improvements in mathematics, coding, STEM, logic, and structured outputs while maintaining general assistant performance. It supports JSON mode, schema adherence, function calling, and tool use, and is designed for greater steerability with reduced refusal rates.", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.13 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.013 + } + }, + "family": "nousresearch", + "ownedBy": "nousresearch", + "openWeights": true + }, + { + "id": "e5-mistral-instruct", + "name": "e5-mistral-7b-instruct", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.01 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "text-embedding", + "ownedBy": "nebius", + "openWeights": true + }, + { + "id": "llama-3_1-nemotron-ultra-v1", + "name": "Llama-3.1-Nemotron-Ultra-253B-v1", + "description": "Llama-3.1-Nemotron-Ultra-253B is a 253 billion parameter reasoning-focused language model optimized for efficiency that excels at math, coding, and general instruction-following tasks while running on a single 8xH100 node.", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.8 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.06 + } + }, + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "bge-en-icl", + "name": "BGE-ICL", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.01 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "text-embedding", + "ownedBy": "baai", + "openWeights": true + }, + { + "id": "bge-multilingual-gemma2", + "name": "bge-multilingual-gemma2", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 3072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.01 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "text-embedding", + "ownedBy": "baai", + "openWeights": true + }, + { + "id": "gemma-2-it-fast", + "name": "Gemma-2-9b-it (Fast)", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.03 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.09 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.003 + } + }, + "ownedBy": "google", + "openWeights": true + }, + { + "id": "gemma-3-it-fast", + "name": "Gemma-3-27b-it (Fast)", + "capabilities": ["function-call", "structured-output", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.02 + } + }, + "ownedBy": "google", + "openWeights": true + }, + { + "id": "qwen3-fast", + "name": "Qwen3-32B (Fast)", + "capabilities": ["function-call", "structured-output", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.02 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen3-a3b-thinking-2507", + "name": "Qwen3-30B-A3B-Thinking-2507", + "description": "Qwen3-30B-A3B-Thinking-2507 is a 30B parameter Mixture-of-Experts reasoning model optimized for complex tasks requiring extended multi-step thinking. The model is designed specifically for “thinking mode,” where internal reasoning traces are separated from final answers.\n\nCompared to earlier Qwen3-30B releases, this version improves performance across logical reasoning, mathematics, science, coding, and multilingual benchmarks. It also demonstrates stronger instruction following, tool use, and alignment with human preferences. With higher reasoning efficiency and extended output budgets, it is best suited for advanced research, competitive problem solving, and agentic applications requiring structured long-context reasoning.", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.01 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + }, + "interleaved": true + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen3-a3b-instruct-2507", + "name": "Qwen3-30B-A3B-Instruct-2507", + "description": "Qwen3-30B-A3B-Instruct-2507 is a 30.5B-parameter mixture-of-experts language model from Qwen, with 3.3B active parameters per inference. It operates in non-thinking mode and is designed for high-quality instruction following, multilingual understanding, and agentic tool use. Post-trained on instruction data, it demonstrates competitive performance across reasoning (AIME, ZebraLogic), coding (MultiPL-E, LiveCodeBench), and alignment (IFEval, WritingBench) benchmarks. It outperforms its non-instruct variant on subjective and open-ended tasks while retaining strong factual and coding performance.", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.01 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen2-5-coder-fast", + "name": "Qwen2.5-Coder-7B (Fast)", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.03 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.09 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.003 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "llama-3-1-instruct-fast", + "name": "Meta-Llama-3.1-8B-Instruct (Fast)", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.03 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.09 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.003 + } + }, + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "llama-3-3-instruct-fast", + "name": "Llama-3.3-70B-Instruct (Fast)", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.75 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.025 + } + }, + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "glm-4-7-fp8", + "name": "GLM-4.7 (FP8)", + "capabilities": ["function-call", "structured-output", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.04 + } + }, + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "ownedBy": "zhipu", + "openWeights": false + }, + { + "id": "deepseek-r1-0528-fast", + "name": "DeepSeek R1 0528 Fast", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 6 + } + }, + "family": "deepseek", + "ownedBy": "deepseek", + "openWeights": true + }, + { + "id": "deepseek-v3-0324-fast", + "name": "DeepSeek-V3-0324 (Fast)", + "capabilities": ["function-call", "structured-output", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.75 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.075 + } + }, + "reasoning": { + "supportedEfforts": ["none"] + }, + "ownedBy": "deepseek", + "openWeights": true + }, + { + "id": "flux-dev", + "name": "FLUX.1-dev", + "capabilities": ["image-generation"], + "inputModalities": ["text"], + "outputModalities": ["image"], + "contextWindow": 77, + "ownedBy": "bfl", + "openWeights": true + }, + { + "id": "flux-schnell", + "name": "FLUX.1-schnell", + "capabilities": ["image-generation"], + "inputModalities": ["text"], + "outputModalities": ["image"], + "contextWindow": 77, + "ownedBy": "bfl", + "openWeights": true + }, + { + "id": "qwen-plus-character", + "name": "Qwen Plus Character", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.115 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.287 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen-math-plus", + "name": "Qwen Math Plus", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 4096, + "maxOutputTokens": 3072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.574 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.721 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen-doc-turbo", + "name": "Qwen Doc Turbo", + "capabilities": ["function-call", "file-input"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.087 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.144 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen-deep-research", + "name": "Qwen Deep Research", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 7.742 + }, + "output": { + "currency": "USD", + "perMillionTokens": 23.367 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen-long", + "name": "Qwen Long", + "capabilities": ["function-call", "file-input"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 10000000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.072 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.287 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen2-5-math-instruct", + "name": "Qwen2.5-Math 72B Instruct", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 4096, + "maxOutputTokens": 3072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.574 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.721 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "moonshot-kimi-k2-instruct", + "name": "Moonshot Kimi K2 Instruct", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.574 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.294 + } + }, + "family": "kimi", + "ownedBy": "moonshot", + "openWeights": true + }, + { + "id": "tongyi-intent-detect-v3", + "name": "Tongyi Intent Detect V3", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 1024, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.058 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.144 + } + }, + "family": "yi", + "ownedBy": "alibaba-cn", + "openWeights": false + }, + { + "id": "qwen-math-turbo", + "name": "Qwen Math Turbo", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 4096, + "maxOutputTokens": 3072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.287 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.861 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "deepseek-r1-distill-qwen-1", + "name": "DeepSeek R1 Distill Qwen 1.5B", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 16384, + "family": "qwen", + "ownedBy": "deepseek", + "openWeights": false + }, + { + "id": "claude-sonnet-4-6@default", + "name": "Claude Sonnet 4.6", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-sonnet", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-opus-4-5@20251101", + "name": "Claude Opus 4.5", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-opus", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-3-5-sonnet@20241022", + "name": "Claude Sonnet 3.5 v2", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "family": "claude-sonnet", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-3-5-haiku@20241022", + "name": "Claude Haiku 3.5", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.8 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.08 + } + }, + "family": "claude-haiku", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-sonnet-4@20250514", + "name": "Claude Sonnet 4", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-sonnet", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-sonnet-4-5@20250929", + "name": "Claude Sonnet 4.5", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-sonnet", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-opus-4-1@20250805", + "name": "Claude Opus 4.1", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 75 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 32000 + } + }, + "family": "claude-opus", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-haiku-4-5@20251001", + "name": "Claude Haiku 4.5", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.1 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-haiku", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-3-7-sonnet@20250219", + "name": "Claude Sonnet 3.7", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-sonnet", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-opus-4-6@default", + "name": "Claude Opus 4.6", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 32000 + } + }, + "family": "claude-opus", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-opus-4@20250514", + "name": "Claude Opus 4", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 75 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 32000 + } + }, + "family": "claude-opus", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "baichuan-m2", + "name": "baichuan-m2-32b", + "capabilities": ["reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.07 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.07 + } + }, + "family": "baichuan", + "ownedBy": "baichuan", + "openWeights": true + }, + { + "id": "hermes-2-pro-llama-3", + "name": "Hermes 2 Pro Llama 3 8B", + "description": "Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house.", + "capabilities": ["structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.14 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.14 + } + }, + "family": "llama", + "ownedBy": "nousresearch", + "openWeights": true + }, + { + "id": "paddleocr-vl", + "name": "PaddleOCR-VL", + "description": "PaddleOCR-VL is an advanced and efficient document parsing model specifically designed for element recognition within documents. Its core component, PaddleOCR-VL-0.9B, is a compact yet powerful vision-language model (VLM) composed of a NaViT-style dynamic resolution visual encoder and the ERNIE-4.5-0.3B language model, enabling precise element recognition. This model supports 109 languages and excels at recognizing complex elements such as text, tables, formulas, and charts while maintaining extremely low resource consumption. Through comprehensive evaluations on widely used public benchmarks and internal benchmarks, PaddleOCR-VL achieves state-of-the-art (SOTA) performance in both page-level document parsing and element-level recognition. It significantly outperforms existing pipeline-based solutions, multimodal document parsing approaches, and advanced general-purpose multimodal large models, while also offering faster inference speed.", + "capabilities": ["file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 16384, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.02 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.02 + } + }, + "ownedBy": "novita-ai", + "openWeights": true + }, + { + "id": "kat-coder", + "name": "KAT-Coder-Pro V1(Free)", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 32000, + "ownedBy": "streamlake", + "openWeights": true + }, + { + "id": "kat-coder-pro", + "name": "Kat Coder Pro", + "description": "KAT-Coder-Pro V1 is KwaiKAT's most advanced agentic coding model in the KAT-Coder series. Designed specifically for agentic coding tasks, it excels in real-world software engineering scenarios, achieving 73.4% solve rate on the SWE-Bench Verified benchmark. \n\nThe model has been optimized for tool-use capability, multi-turn interaction, instruction following, generalization, and comprehensive capabilities through a multi-stage training process, including mid-training, supervised fine-tuning (SFT), reinforcement fine-tuning (RFT), and scalable agentic RL.", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.06 + } + }, + "family": "kat-coder", + "ownedBy": "streamlake", + "openWeights": true + }, + { + "id": "deepseek-ocr-2", + "name": "deepseek/deepseek-ocr-2", + "capabilities": ["file-input", "image-recognition", "function-call"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.03 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.03 + } + }, + "ownedBy": "deepseek", + "openWeights": true + }, + { + "id": "deepseek-prover-v2", + "name": "Deepseek Prover V2 671B", + "description": "Provided by chutes.ai\nDeepSeek Prover V2 is a 671B parameter model, speculated to be geared towards logic and mathematics. Likely an upgrade from DeepSeek-Prover-V1.5 Not much is known about the model yet, as DeepSeek released it on Hugging Face without an announcement or description.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 160000, + "maxOutputTokens": 160000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.5 + } + }, + "ownedBy": "deepseek", + "openWeights": true + }, + { + "id": "deepseek-r1-0528-qwen3", + "name": "DeepSeek R1 0528 Qwen3 8B", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.06 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.09 + } + }, + "family": "qwen", + "ownedBy": "deepseek", + "openWeights": true + }, + { + "id": "deepseek-r1-turbo", + "name": "DeepSeek R1 (Turbo)\t", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 64000, + "maxOutputTokens": 16000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.5 + } + }, + "ownedBy": "deepseek", + "openWeights": true + }, + { + "id": "deepseek-ocr", + "name": "DeepSeek-OCR", + "description": "DeepSeek-OCR is a vision-language model launched by DeepSeek AI, focusing on optical character recognition (OCR) and “contextual optical compression.” The model is designed to explore the limits of compressing contextual information from images, efficiently processing documents and converting them into structured text formats such as Markdown. The model requires an image as input.", + "capabilities": ["structured-output", "file-input", "image-recognition", "function-call"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.03 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.03 + } + }, + "ownedBy": "deepseek", + "openWeights": true + }, + { + "id": "deepseek-v3-turbo", + "name": "DeepSeek V3 (Turbo)\t", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 64000, + "maxOutputTokens": 16000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.3 + } + }, + "reasoning": { + "supportedEfforts": ["none"] + }, + "ownedBy": "deepseek", + "openWeights": true + }, + { + "id": "l3-lunaris", + "name": "Sao10k L3 8B Lunaris\t", + "description": "Lunaris 8B is a versatile generalist and roleplaying model based on Llama 3. It's a strategic merge of multiple models, designed to balance creativity with improved logic and general knowledge.\n\nCreated by [Sao10k](https://huggingface.co/Sao10k), this model aims to offer an improved experience over Stheno v3.2, with enhanced creativity and logical reasoning.\n\nFor best results, use with Llama 3 Instruct context template, temperature 1.4, and min_p 0.1.", + "capabilities": ["structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.05 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "ownedBy": "novita-ai", + "openWeights": true + }, + { + "id": "l3-stheno-v3-2", + "name": "L3 8B Stheno V3.2", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.05 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "ownedBy": "novita-ai", + "openWeights": true + }, + { + "id": "l31-euryale-v2-2", + "name": "L31 70B Euryale V2.2", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.48 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.48 + } + }, + "ownedBy": "novita-ai", + "openWeights": true + }, + { + "id": "l3-euryale-v2-1", + "name": "L3 70B Euryale V2.1\t", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.48 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.48 + } + }, + "ownedBy": "novita-ai", + "openWeights": true + }, + { + "id": "r1v4-lite", + "name": "Skywork R1V4-Lite", + "capabilities": ["structured-output", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "family": "skywork", + "ownedBy": "novita-ai", + "openWeights": true + }, + { + "id": "wizardlm-2-8x22b", + "name": "Wizardlm 2 8x22B", + "description": "WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model. It demonstrates highly competitive performance compared to leading proprietary models, and it consistently outperforms all existing state-of-the-art opensource models.\n\nIt is an instruct finetune of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b).\n\nTo read more about the model release, [click here](https://wizardlm.github.io/WizardLM2/).\n\n#moe", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 65535, + "maxOutputTokens": 8000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.62 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.62 + } + }, + "ownedBy": "novita-ai", + "openWeights": true + }, + { + "id": "mythomax-l2", + "name": "Mythomax L2 13B", + "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge", + "capabilities": ["structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 4096, + "maxOutputTokens": 3200, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.09 + } + }, + "ownedBy": "novita-ai", + "openWeights": true + }, + { + "id": "ernie-4-5-vl-a3b", + "name": "ERNIE-4.5-VL-28B-A3B-Thinking", + "description": "A powerful multimodal Mixture-of-Experts chat model featuring 28B total parameters with 3B activated per token, delivering exceptional text and vision understanding through its innovative heterogeneous MoE structure with modality-isolated routing. Built with scaling-efficient infrastructure for high-throughput training and inference, the model leverages advanced post-training techniques including SFT, DPO, and UPO for optimized performance, while supporting an impressive 131K context length and RLVR alignment for superior cross-modal reasoning and generation capabilities.", + "capabilities": [ + "reasoning", + "function-call", + "structured-output", + "file-input", + "image-recognition", + "video-recognition" + ], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39 + } + }, + "ownedBy": "baidu", + "openWeights": true + }, + { + "id": "ernie-4-5-a3b", + "name": "ERNIE 4.5 21B A3B", + "description": "A sophisticated text-based Mixture-of-Experts (MoE) model featuring 21B total parameters with 3B activated per token, delivering exceptional multimodal understanding and generation through heterogeneous MoE structures and modality-isolated routing. Supporting an extensive 131K token context length, the model achieves efficient inference via multi-expert parallel collaboration and quantization, while advanced post-training techniques including SFT, DPO, and UPO ensure optimized performance across diverse applications with specialized routing and balancing losses for superior task handling.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 120000, + "maxOutputTokens": 8000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.07 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.28 + } + }, + "family": "ernie", + "ownedBy": "baidu", + "openWeights": true + }, + { + "id": "qwen3-omni-a3b", + "name": "Qwen3 Omni 30B A3B Thinking", + "capabilities": [ + "reasoning", + "function-call", + "structured-output", + "file-input", + "image-recognition", + "audio-recognition", + "video-recognition" + ], + "inputModalities": ["text", "audio", "video", "image"], + "outputModalities": ["text"], + "contextWindow": 65536, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.97 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen3-omni-a3b-instruct", + "name": "Qwen3 Omni 30B A3B Instruct", + "capabilities": [ + "function-call", + "structured-output", + "file-input", + "image-recognition", + "audio-recognition", + "audio-generation", + "video-recognition" + ], + "inputModalities": ["text", "video", "audio", "image"], + "outputModalities": ["text", "audio"], + "contextWindow": 65536, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.97 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen-2-5-instruct", + "name": "Qwen 2.5 72B Instruct", + "description": "Qwen2.5 7B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.38 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.4 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen3-vl-a22b-instruct", + "name": "Qwen3 VL 235B A22B Instruct", + "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table extraction, multilingual OCR). The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows—turning sketches or mockups into code and assisting with UI debugging—while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", + "capabilities": ["function-call", "structured-output", "file-input", "image-recognition", "video-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.11 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "llama-3-instruct", + "name": "Llama3 70B Instruct", + "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "capabilities": ["structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 8000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.51 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.74 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "autoglm-phone-multilingual", + "name": "AutoGLM-Phone-9B-Multilingual", + "capabilities": ["file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 65536, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.035 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.138 + } + }, + "ownedBy": "novita-ai", + "openWeights": true + }, + { + "id": "grok-41-fast", + "name": "Grok 4.1 Fast", + "capabilities": [ + "reasoning", + "function-call", + "structured-output", + "file-input", + "image-recognition", + "web-search" + ], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.125 + } + }, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "claude-opus-45", + "name": "Claude Opus 4.5", + "capabilities": [ + "reasoning", + "function-call", + "structured-output", + "file-input", + "image-recognition", + "computer-use" + ], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 198000, + "maxOutputTokens": 49500, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 30 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"] + }, + "family": "claude-opus", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "mistral-31", + "name": "Venice Medium", + "capabilities": ["function-call", "structured-output", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "mistral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "venice-uncensored", + "name": "Venice Uncensored 1.1", + "capabilities": ["structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32000, + "maxOutputTokens": 8000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.9 + } + }, + "family": "venice", + "ownedBy": "venice", + "openWeights": true + }, + { + "id": "gpt-52", + "name": "GPT-5.2", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.19 + }, + "output": { + "currency": "USD", + "perMillionTokens": 17.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.219 + } + }, + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "minimax-m25", + "name": "MiniMax M2.5", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 198000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.6 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.04 + } + }, + "reasoning": { + "supportedEfforts": [], + "interleaved": true + }, + "family": "minimax", + "ownedBy": "minimax", + "openWeights": true + }, + { + "id": "claude-sonnet-45", + "name": "Claude Sonnet 4.5", + "capabilities": [ + "reasoning", + "function-call", + "structured-output", + "file-input", + "image-recognition", + "computer-use" + ], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 198000, + "maxOutputTokens": 49500, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3.75 + }, + "output": { + "currency": "USD", + "perMillionTokens": 18.75 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.375 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "interleaved": true + }, + "family": "claude-sonnet", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "google-gemma-3-it", + "name": "Google Gemma 3 27B Instruct", + "capabilities": ["function-call", "structured-output", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 198000, + "maxOutputTokens": 49500, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.12 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "family": "gemma", + "ownedBy": "venice", + "openWeights": true + }, + { + "id": "hermes-3-llama-3-1", + "name": "Hermes 3 Llama 3.1 405b", + "description": "Hermes 3 is a generalist language model with many improvements over [Hermes 2](/models/nousresearch/nous-hermes-2-mistral-7b-dpo), including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 70B is a competitive, if not superior finetune of the [Llama-3.1 70B foundation model](/models/meta-llama/llama-3.1-70b-instruct), focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.", + "capabilities": ["reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3 + } + }, + "family": "hermes", + "ownedBy": "nousresearch", + "openWeights": true + }, + { + "id": "olafangensan-glm-4-7-flash-heretic", + "name": "GLM 4.7 Flash Heretic", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.14 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.8 + } + }, + "family": "glm-flash", + "ownedBy": "venice", + "openWeights": true + }, + { + "id": "minimax-m21", + "name": "MiniMax M2.1", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 198000, + "maxOutputTokens": 49500, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.6 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.04 + } + }, + "reasoning": { + "supportedEfforts": [], + "interleaved": true + }, + "family": "minimax", + "ownedBy": "minimax", + "openWeights": true + }, + { + "id": "qwen3-next", + "name": "Qwen 3 Next 80b", + "capabilities": ["function-call", "structured-output", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.35 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.9 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "gpt-52-codex", + "name": "GPT-5.2 Codex", + "capabilities": ["reasoning", "function-call", "structured-output", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.19 + }, + "output": { + "currency": "USD", + "perMillionTokens": 17.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.219 + } + }, + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "family": "gpt-codex", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "ring-flash-2-0", + "name": "inclusionAI/Ring-flash-2.0", + "description": "Ring-flash-2.0 is a high-performance thinking model deeply optimized based on the Ling-flash-2.0-base. It uses a mixture-of-experts (MoE) architecture with a total of 100 billion parameters, but only activates 6.1 billion parameters per inference. The model employs the original Icepop algorithm to solve the instability issues of large MoE models during reinforcement learning (RL) training, enabling its complex reasoning capabilities to continuously improve over long training cycles. Ring-flash-2.0 has achieved significant breakthroughs on multiple high-difficulty benchmarks, including mathematics competitions, code generation, and logical reasoning. Its performance not only surpasses top dense models under 40 billion parameters but also rivals larger open-source MoE models and closed-source high-performance thinking models. Although the model focuses on complex reasoning, it also performs exceptionally well on creative writing tasks. Furthermore, thanks to its efficient architecture, Ring-flash-2.0 delivers high performance with low-latency inference, significantly reducing deployment costs in high-concurrency scenarios.", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131000, + "maxOutputTokens": 131000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.14 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.57 + } + }, + "family": "ring", + "ownedBy": "bailing", + "openWeights": false + }, + { + "id": "ling-flash-2-0", + "name": "inclusionAI/Ling-flash-2.0", + "description": "Ling-flash-2.0 is a language model from inclusionAI with a total of 100 billion parameters, of which 6.1 billion are activated per token (4.8 billion non-embedding). As part of the Ling 2.0 architecture series, it is designed as a lightweight yet powerful Mixture-of-Experts (MoE) model. It aims to deliver performance comparable to or even exceeding that of 40B-level dense models and other larger MoE models, but with a significantly smaller active parameter count. The model represents a strategy focused on achieving high performance and efficiency through extreme architectural design and training methods.", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131000, + "maxOutputTokens": 131000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.14 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.57 + } + }, + "family": "ling", + "ownedBy": "bailing", + "openWeights": false + }, + { + "id": "ling-mini-2-0", + "name": "inclusionAI/Ling-mini-2.0", + "description": "Ling-mini-2.0 is a small-sized, high-performance large language model based on the MoE architecture. It has a total of 16 billion parameters, but only activates 1.4 billion parameters per token (non-embedding 789 million), achieving extremely high generation speed. Thanks to the efficient MoE design and large-scale high-quality training data, despite activating only 1.4 billion parameters, Ling-mini-2.0 still demonstrates top-tier performance on downstream tasks comparable to dense LLMs under 10 billion parameters and even larger-scale MoE models.", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131000, + "maxOutputTokens": 131000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.07 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.28 + } + }, + "family": "ling", + "ownedBy": "bailing", + "openWeights": false + }, + { + "id": "kat-dev", + "name": "Kwaipilot/KAT-Dev", + "description": "KAT-Dev (32B) is an open-source 32B parameter model specifically designed for software engineering tasks. It achieved a 62.4% resolution rate on the SWE-Bench Verified benchmark, ranking fifth among all open-source models of various scales. The model is optimized through multiple stages, including intermediate training, supervised fine-tuning (SFT) and reinforcement fine-tuning (RFT), as well as large-scale agent reinforcement learning (RL). Based on Qwen3-32B, its training process lays the foundation for subsequent fine-tuning and reinforcement learning stages by enhancing fundamental abilities such as tool usage, multi-turn interaction, and instruction following. During the fine-tuning phase, the model not only learns eight carefully curated task types and programming scenarios but also innovatively introduces a reinforcement fine-tuning (RFT) stage guided by human engineer-annotated “teacher trajectories.” The final agent reinforcement learning phase addresses scalability challenges through multi-level prefix caching, entropy-based trajectory pruning, and efficient architecture.", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "family": "kat-coder", + "ownedBy": "streamlake", + "openWeights": false + }, + { + "id": "hunyuan-a13b-instruct", + "name": "tencent/Hunyuan-A13B-Instruct", + "description": "Hunyuan-A13B is a 13B active parameter Mixture-of-Experts (MoE) language model developed by Tencent, with a total parameter count of 80B and support for reasoning via Chain-of-Thought. It offers competitive benchmark performance across mathematics, science, coding, and multi-turn reasoning tasks, while maintaining high inference efficiency via Grouped Query Attention (GQA) and quantization support (FP8, GPTQ, etc.).", + "capabilities": ["function-call", "structured-output", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131000, + "maxOutputTokens": 131000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.14 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.57 + } + }, + "reasoning": { + "supportedEfforts": ["none"] + }, + "family": "hunyuan", + "ownedBy": "tencent", + "openWeights": false + }, + { + "id": "hunyuan-mt", + "name": "tencent/Hunyuan-MT-7B", + "description": "Hunyuan-MT-7B is a lightweight translation model with 7 billion parameters, designed to translate source text into target languages. The model supports translation among 33 languages as well as 5 Chinese minority languages. In the WMT25 International Machine Translation Competition, Hunyuan-MT-7B achieved first place in 30 out of 31 language categories it participated in, demonstrating its exceptional translation capabilities. For translation scenarios, Tencent Hunyuan proposed a complete training paradigm from pre-training to supervised fine-tuning, followed by translation reinforcement and ensemble reinforcement, enabling it to achieve industry-leading performance among models of similar scale. The model is computationally efficient, easy to deploy, and suitable for various application scenarios.", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 33000, + "maxOutputTokens": 33000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "family": "hunyuan", + "ownedBy": "tencent", + "openWeights": false + }, + { + "id": "paddleocr-vl-1-5", + "name": "PaddlePaddle/PaddleOCR-VL-1.5", + "capabilities": ["file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 16384, + "maxOutputTokens": 16384, + "ownedBy": "siliconflow-cn", + "openWeights": true + }, + { + "id": "glm-z1-0414", + "name": "THUDM/GLM-Z1-32B-0414", + "description": "GLM-Z1-32B-0414 is a reasoning-focused AI model built on GLM-4-32B-0414. It has been enhanced through cold-start methods and reinforcement learning, with a strong emphasis on math, coding, and logic tasks. Despite having only 32B parameters, it performs comparably to the 671B DeepSeek-R1 on some benchmarks. It excels in complex reasoning tasks, as shown in evaluations like AIME 24/25, LiveCodeBench, and GPQA.", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131000, + "maxOutputTokens": 131000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.14 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.57 + } + }, + "family": "glm-z", + "ownedBy": "zhipu", + "openWeights": false + }, + { + "id": "glm-4-0414", + "name": "THUDM/GLM-4-9B-0414", + "description": "GLM-4-32B-0414 is a next-generation open-source model with 32 billion parameters, delivering performance comparable to OpenAI’s GPT series and DeepSeek V3/R1. It supports smooth local deployment.\n\nThe base model was pre-trained on 15T of high-quality data, including a large amount of reasoning-focused synthetic content, setting the stage for advanced reinforcement learning.\n\nIn the post-training phase, techniques like human preference alignment, rejection sampling, and reinforcement learning were used to improve the model’s ability to follow instructions, generate code, and handle function calls—core skills needed for agent-style tasks.\n\nGLM-4-32B-0414 has shown strong results in engineering code, artifact generation, function calling, search-based QA, and report writing—sometimes matching or even surpassing larger models like GPT-4o and DeepSeek-V3 (671B) on specific benchmarks.", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 33000, + "maxOutputTokens": 33000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.086 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.086 + } + }, + "family": "glm", + "ownedBy": "zhipu", + "openWeights": false + }, + { + "id": "pangu-pro-moe", + "name": "ascend-tribe/pangu-pro-moe", + "capabilities": ["reasoning", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "family": "pangu", + "ownedBy": "siliconflow-cn", + "openWeights": false + }, + { + "id": "ernie-4-5-a47b", + "name": "baidu/ERNIE-4.5-300B-A47B", + "description": "ERNIE-4.5-300B-A47B is a 300B parameter Mixture-of-Experts (MoE) language model developed by Baidu as part of the ERNIE 4.5 series. It activates 47B parameters per token and supports text generation in both English and Chinese. Optimized for high-throughput inference and efficient scaling, it uses a heterogeneous MoE structure with advanced routing and quantization strategies, including FP8 and 2-bit formats. This version is fine-tuned for language-only tasks and supports reasoning, tool parameters, and extended context lengths up to 131k tokens. Suitable for general-purpose LLM applications with high reasoning and throughput demands.", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131000, + "maxOutputTokens": 131000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.28 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.1 + } + }, + "family": "ernie", + "ownedBy": "baidu", + "openWeights": false + }, + { + "id": "seed-oss-instruct", + "name": "ByteDance-Seed/Seed-OSS-36B-Instruct", + "description": "Seed-OSS is a series of open-source large language models developed by ByteDance's Seed team, designed specifically for powerful long-context processing, reasoning, agents, and general capabilities. Among this series, Seed-OSS-36B-Instruct is an instruction-tuned model with 36 billion parameters that natively supports ultra-long context lengths, enabling it to process massive documents or complex codebases in a single pass. This model is specially optimized for reasoning, code generation, and agent tasks (such as tool usage), while maintaining balanced and excellent general capabilities. A notable feature of this model is the \"Thinking Budget\" functionality, which allows users to flexibly adjust the inference length as needed, thereby effectively improving inference efficiency in practical applications.", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262000, + "maxOutputTokens": 262000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.21 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.57 + } + }, + "family": "seed", + "ownedBy": "bytedance", + "openWeights": false + }, + { + "id": "qwen2-5-instruct-128k", + "name": "Qwen/Qwen2.5-72B-Instruct-128K", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131000, + "maxOutputTokens": 4000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.59 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.59 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen3-omni-a3b-captioner", + "name": "Qwen/Qwen3-Omni-30B-A3B-Captioner", + "capabilities": [ + "function-call", + "structured-output", + "file-input", + "audio-recognition", + "image-recognition", + "reasoning" + ], + "inputModalities": ["audio"], + "outputModalities": ["text"], + "contextWindow": 66000, + "maxOutputTokens": 66000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.4 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "deepseek-vl2", + "name": "deepseek-ai/deepseek-vl2", + "capabilities": ["function-call", "structured-output", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 4000, + "maxOutputTokens": 4000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.15 + } + }, + "family": "deepseek", + "ownedBy": "deepseek", + "openWeights": false + }, + { + "id": "hermes-4-3", + "name": "Hermes 4.3 36B", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39 + } + }, + "family": "nousresearch", + "ownedBy": "nousresearch", + "openWeights": true + }, + { + "id": "hermes-4-fp8", + "name": "Hermes 4 405B FP8 TEE", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + }, + "family": "nousresearch", + "ownedBy": "nousresearch", + "openWeights": true + }, + { + "id": "deephermes-3-mistral-preview", + "name": "DeepHermes 3 Mistral 24B Preview", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.02 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.01 + } + }, + "family": "nousresearch", + "ownedBy": "nousresearch", + "openWeights": true + }, + { + "id": "dots.ocr", + "name": "dots.ocr", + "capabilities": ["structured-output", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.01 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.01 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.005 + } + }, + "family": "rednote", + "ownedBy": "chutes", + "openWeights": true + }, + { + "id": "kimi-k2-thinking", + "name": "Kimi K2 Thinking TEE", + "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 65535, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.75 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.14100000000000001 + } + }, + "reasoning": { + "supportedEfforts": [], + "interleaved": true + }, + "family": "kimi-thinking", + "ownedBy": "moonshot", + "openWeights": true + }, + { + "id": "nemotron-3-nano-a3b-bf16", + "name": "NVIDIA Nemotron 3 Nano 30B A3B BF16", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 262144, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.06 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.24 + } + }, + "family": "nemotron", + "ownedBy": "nvidia", + "openWeights": true + }, + { + "id": "tng-r1t-chimera-turbo", + "name": "TNG R1T Chimera Turbo", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 163840, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.22 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "ownedBy": "chutes", + "openWeights": true + }, + { + "id": "deepseek-r1t-chimera", + "name": "DeepSeek R1T Chimera", + "description": "Provided by chutes.ai\nDeepSeek-R1T-Chimera merges DeepSeek-R1’s reasoning strengths with DeepSeek-V3 (0324)’s token-efficiency improvements into a MoE Transformer optimized for general text generation. It integrates pretrained weights from both models and is released under the MIT license for research and commercial use.\n", + "capabilities": ["reasoning", "structured-output", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 163840, + "maxOutputTokens": 163840, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.15 + } + }, + "family": "tngtech", + "ownedBy": "deepseek", + "openWeights": true + }, + { + "id": "deepseek-tng-r1t2-chimera", + "name": "DeepSeek TNG R1T2 Chimera", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 163840, + "maxOutputTokens": 163840, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.85 + } + }, + "family": "tngtech", + "ownedBy": "deepseek", + "openWeights": true + }, + { + "id": "tng-r1t-chimera", + "name": "TNG R1T Chimera TEE", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 163840, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.85 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.125 + } + }, + "family": "tngtech", + "ownedBy": "chutes", + "openWeights": true + }, + { + "id": "internvl3", + "name": "InternVL3 78B TEE", + "description": "The InternVL3 series is an advanced multimodal large language model (MLLM). Compared to InternVL 2.5, InternVL3 demonstrates stronger multimodal perception and reasoning capabilities. \n\nIn addition, InternVL3 is benchmarked against the Qwen2.5 Chat models, whose pre-trained base models serve as the initialization for its language component. Benefiting from Native Multimodal Pre-Training, the InternVL3 series surpasses the Qwen2.5 series in overall text performance.", + "capabilities": ["structured-output", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.075 + } + }, + "family": "opengvlab", + "ownedBy": "intern", + "openWeights": true + }, + { + "id": "mistral-small-3-2-instruct-2506", + "name": "Mistral Small 3.2 24B Instruct 2506", + "capabilities": ["function-call", "structured-output", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.06 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.18 + } + }, + "family": "chutesai", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "mistral-nemo-instruct-2407", + "name": "Mistral Nemo Instruct 2407", + "capabilities": ["structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.02 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.04 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.01 + } + }, + "family": "unsloth", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "mistral-small-instruct-2501", + "name": "Mistral Small 24B Instruct 2501", + "description": "Mistral Small 3 is a 24B-parameter language model optimized for low-latency performance across common AI tasks. Released under the Apache 2.0 license, it features both pre-trained and instruction-tuned versions designed for efficient local deployment.\n\nThe model achieves 81% accuracy on the MMLU benchmark and performs competitively with larger models like Llama 3.3 70B and Qwen 32B, while operating at three times the speed on equivalent hardware. [Read the blog post about the model here.](https://mistral.ai/news/mistral-small-3/)", + "capabilities": ["function-call", "structured-output", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.03 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.11 + } + }, + "family": "unsloth", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "qwen3guard-gen", + "name": "Qwen3Guard Gen 0.6B", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.01 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.01 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.005 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen3-coder-a35b-instruct-fp8", + "name": "Qwen3 Coder 480B A35B Instruct FP8 TEE", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 262144, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.22 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.95 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.11 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "mirothinker-v1-5", + "name": "MiroThinker V1.5 235B", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.15 + } + }, + "ownedBy": "chutes", + "openWeights": true + }, + { + "id": "glm-4-6-fp8", + "name": "GLM 4.6 FP8", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 202752, + "maxOutputTokens": 65535, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + }, + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "ownedBy": "zhipu", + "openWeights": true + }, + { + "id": "glm-4-5-fp8", + "name": "GLM 4.5 FP8", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + }, + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "family": "glm", + "ownedBy": "zhipu", + "openWeights": true + }, + { + "id": "deepseek-v3-2-speciale", + "name": "DeepSeek V3.2 Speciale TEE", + "description": "DeepSeek-V3.2-Speciale is a high-compute variant of DeepSeek-V3.2 optimized for maximum reasoning and agentic performance. It builds on DeepSeek Sparse Attention (DSA) for efficient long-context processing, then scales post-training reinforcement learning to push capability beyond the base model. Reported evaluations place Speciale ahead of GPT-5 on difficult reasoning workloads, with proficiency comparable to Gemini-3.0-Pro, while retaining strong coding and tool-use reliability. Like V3.2, it benefits from a large-scale agentic task synthesis pipeline that improves compliance and generalization in interactive environments.", + "capabilities": ["reasoning", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 163840, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.27 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.41 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.135 + } + }, + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "family": "deepseek", + "ownedBy": "deepseek", + "openWeights": true + }, + { + "id": "k2-5", + "name": "Kimi K2.5", + "capabilities": ["reasoning", "function-call", "structured-output", "image-recognition", "video-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 32768, + "family": "kimi-thinking", + "ownedBy": "kimi-for-coding", + "openWeights": true + }, + { + "id": "nova-pro-v1", + "name": "Nova Pro 1.0", + "description": "Amazon Nova Pro 1.0 is a capable multimodal model from Amazon focused on providing a combination of accuracy, speed, and cost for a wide range of tasks. As of December 2024, it achieves state-of-the-art performance on key benchmarks including visual question answering (TextVQA) and video understanding (VATEX).\n\nAmazon Nova Pro demonstrates strong capabilities in processing both visual and textual information and at analyzing financial documents.\n\n**NOTE**: Video input is not supported at this time.", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 300000, + "maxOutputTokens": 5000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.016 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4.061 + } + }, + "family": "nova-pro", + "ownedBy": "amazon", + "openWeights": false + }, + { + "id": "claude-4-5-sonnet", + "name": "Claude 4.5 Sonnet", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 200000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3.259 + }, + "output": { + "currency": "USD", + "perMillionTokens": 16.296 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.30000000000000004 + } + }, + "family": "claude-sonnet", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "devstral-small-2512", + "name": "Devstral Small 2 2512", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 262000, + "maxOutputTokens": 262000, + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "jais-chat", + "name": "JAIS 30b Chat", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 2048, + "family": "jais", + "ownedBy": "github-models", + "openWeights": true + }, + { + "id": "command-r", + "name": "Cohere Command R", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.64 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.92 + } + }, + "family": "command-r", + "ownedBy": "cohere", + "openWeights": false + }, + { + "id": "command-r-plus", + "name": "Cohere Command R+", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3.84 + }, + "output": { + "currency": "USD", + "perMillionTokens": 19.2 + } + }, + "family": "command-r", + "ownedBy": "cohere", + "openWeights": false + }, + { + "id": "codestral-2501", + "name": "Codestral 25.01", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.9 + } + }, + "family": "codestral", + "ownedBy": "mistral", + "openWeights": false + }, + { + "id": "mistral-small-2503", + "name": "Mistral Small 3.1", + "capabilities": ["reasoning", "function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "family": "mistral-small", + "ownedBy": "mistral", + "openWeights": false + }, + { + "id": "phi-3-mini-4k-instruct", + "name": "Phi-3-mini instruct (4k)", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 4096, + "maxOutputTokens": 1024, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.13 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.52 + } + }, + "family": "phi", + "ownedBy": "microsoft", + "openWeights": true + }, + { + "id": "phi-4", + "name": "Phi-4", + "description": "[Microsoft Research](/microsoft) Phi-4 is designed to perform well in complex reasoning tasks and can operate efficiently in situations with limited memory or where quick responses are needed. \n\nAt 14 billion parameters, it was trained on a mix of high-quality synthetic datasets, data from curated websites, and academic materials. It has undergone careful improvement to follow instructions accurately and maintain strong safety standards. It works best with English language inputs.\n\nFor more information, please see [Phi-4 Technical Report](https://arxiv.org/pdf/2412.08905)\n", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 16000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.125 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "family": "phi", + "ownedBy": "microsoft", + "openWeights": true + }, + { + "id": "phi-4-mini-reasoning", + "name": "Phi-4-mini-reasoning", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.075 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "family": "phi", + "ownedBy": "microsoft", + "openWeights": true + }, + { + "id": "phi-3-5-mini-instruct", + "name": "Phi-3.5-mini instruct (128k)", + "description": "Phi-3.5-mini is a lightweight, state-of-the-art open model built upon the dataset used for Phi-3—which includes synthetic data and carefully curated publicly available websites—focusing on very high-quality, reasoning-intensive data. This model is part of the Phi-3 model family and supports a context length of 128K tokens.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.13 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.52 + } + }, + "family": "phi", + "ownedBy": "microsoft", + "openWeights": true + }, + { + "id": "phi-3-mini-128k-instruct", + "name": "Phi-3-mini instruct (128k)", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.13 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.52 + } + }, + "family": "phi", + "ownedBy": "microsoft", + "openWeights": true + }, + { + "id": "phi-4-reasoning", + "name": "Phi-4-Reasoning", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.125 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "family": "phi", + "ownedBy": "microsoft", + "openWeights": true + }, + { + "id": "mai-ds-r1", + "name": "MAI-DS-R1", + "description": "MAI-DS-R1 is a refined version of DeepSeek-R1 by Microsoft AI, designed to improve responsiveness to previously blocked topics while enhancing safety. It integrates 110k Tulu-3 SFT samples and 350k multilingual safety-alignment examples. The model retains strong reasoning and coding abilities, surpasses R1-1776 in handling sensitive queries, and reduces harmful content leakage. Based on a transformer MoE architecture, it suits general-purpose tasks—excluding legal, medical, or autonomous systems.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 65536, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.35 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5.4 + } + }, + "family": "mai", + "ownedBy": "github-models", + "openWeights": false + }, + { + "id": "o1-preview", + "name": "OpenAI o1-preview", + "description": "The latest and most powerful inference model from OpenAI; AiHubMix uses both OpenAI and Microsoft Azure OpenAI channels simultaneously to achieve high-concurrency load balancing.", + "capabilities": ["reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 16.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 66 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 8.25 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "family": "o", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "o1-mini", + "name": "OpenAI o1-mini", + "description": "o1-mini is faster and 80% cheaper, and is competitive with o1-preview on coding tasks. AiHubMix uses both OpenAI and Microsoft Azure OpenAI channels simultaneously.", + "capabilities": ["reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.55 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "family": "o-mini", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "jamba-1-5-large", + "name": "AI21 Jamba 1.5 Large", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 8.8 + } + }, + "family": "jamba", + "ownedBy": "ai21", + "openWeights": false + }, + { + "id": "jamba-1-5-mini", + "name": "AI21 Jamba 1.5 Mini", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 4096, + "family": "jamba", + "ownedBy": "ai21", + "openWeights": false + }, + { + "id": "rnj-1-instruct", + "name": "Rnj-1 Instruct", + "description": "Rnj-1 is an 8B-parameter, dense, open-weight model family developed by Essential AI and trained from scratch with a focus on programming, math, and scientific reasoning. The model demonstrates strong performance across multiple programming languages, tool-use workflows, and agentic execution environments (e.g., mini-SWE-agent). ", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.15 + } + }, + "family": "rnj", + "ownedBy": "essentialai", + "openWeights": true + }, + { + "id": "llama-3-3-instruct-turbo", + "name": "Llama 3.3 70B", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.88 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.88 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "qwen3-coder-next-fp8", + "name": "Qwen3 Coder Next FP8", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 262144, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen3-a22b-instruct-2507-tput", + "name": "Qwen3 235B A22B Instruct 2507 FP8", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 262144, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "gpt-4", + "name": "GPT-4", + "description": "OpenAI's flagship model, GPT-4 is a large-scale multimodal language model capable of solving difficult problems with greater accuracy than previous models due to its broader general knowledge and advanced reasoning capabilities. Training data: up to Sep 2021.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 60 + }, + "output": { + "currency": "USD", + "perMillionTokens": 120 + } + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "embed-v-4-0", + "name": "Embed v4", + "capabilities": ["file-input", "image-recognition", "embedding"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 1536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.12 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "cohere-embed", + "ownedBy": "cohere", + "openWeights": true + }, + { + "id": "embed-v3-multilingual", + "name": "Embed v3 Multilingual", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 512, + "maxOutputTokens": 1024, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "cohere-embed", + "ownedBy": "cohere", + "openWeights": true + }, + { + "id": "phi-4-mini", + "name": "Phi-4-mini", + "description": "Phi-4-mini-reasoning is a lightweight open model designed for advanced mathematical reasoning and logic-intensive problem-solving. It is particularly well-suited for tasks such as formal proofs, symbolic computation, and solving multi-step word problems. With its efficient architecture, the model balances high-quality reasoning performance with cost-effective deployment, making it ideal for educational applications, embedded tutoring, and lightweight edge or mobile systems.\n\nPhi-4-mini-reasoning supports a 128K token context length, enabling it to process and reason over long mathematical problems and proofs. Built on synthetic and high-quality math datasets, the model leverages advanced fine-tuning techniques such as supervised fine-tuning and preference modeling to enhance reasoning capabilities. Its training incorporates safety and alignment protocols, ensuring robust and reliable performance across supported use cases.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.075 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "family": "phi", + "ownedBy": "microsoft", + "openWeights": true + }, + { + "id": "gpt-4-32k", + "name": "GPT-4 32K", + "description": "The smartest version of GPT-4; OpenAI no longer offers it officially. All the 32k versions on this site are provided by Microsoft, deployed on Azure OpenAI by the official Microsoft service.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 60 + }, + "output": { + "currency": "USD", + "perMillionTokens": 120 + } + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-3-5-turbo-0125", + "name": "GPT-3.5 Turbo 0125", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 16384, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-3-5-turbo-0613", + "name": "GPT-3.5 Turbo 0613", + "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\n\nTraining data up to Sep 2021.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 16384, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4 + } + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "model-router", + "name": "Model Router", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.14 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "model-router", + "ownedBy": "azure", + "openWeights": false + }, + { + "id": "gpt-3-5-turbo-0301", + "name": "GPT-3.5 Turbo 0301", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 4096, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "phi-4-multimodal", + "name": "Phi-4-multimodal", + "capabilities": ["file-input", "image-recognition", "audio-recognition"], + "inputModalities": ["text", "image", "audio"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.08 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.32 + } + }, + "family": "phi", + "ownedBy": "microsoft", + "openWeights": true + }, + { + "id": "gpt-5-1-chat", + "name": "GPT-5.1 Chat", + "description": "GPT-5.1 Chat (AKA Instant is the fast, lightweight member of the 5.1 family, optimized for low-latency chat while retaining strong general intelligence. It uses adaptive reasoning to selectively “think” on harder queries, improving accuracy on math, coding, and multi-step tasks without slowing down typical conversations. The model is warmer and more conversational by default, with better instruction following and more stable short-form reasoning. GPT-5.1 Chat is designed for high-throughput, interactive workloads where responsiveness and consistency matter more than deep deliberation.\n", + "capabilities": [ + "reasoning", + "function-call", + "file-input", + "image-recognition", + "image-generation", + "audio-recognition", + "audio-generation", + "web-search" + ], + "inputModalities": ["text", "image", "audio"], + "outputModalities": ["text", "image", "audio"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.125 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"] + }, + "family": "gpt-codex", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "embed-v3-english", + "name": "Embed v3 English", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 512, + "maxOutputTokens": 1024, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "cohere-embed", + "ownedBy": "cohere", + "openWeights": true + }, + { + "id": "gpt-4-turbo-vision", + "name": "GPT-4 Turbo Vision", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 10 + }, + "output": { + "currency": "USD", + "perMillionTokens": 30 + } + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "phi-4-reasoning-plus", + "name": "Phi-4-reasoning-plus", + "capabilities": ["reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.125 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "family": "phi", + "ownedBy": "microsoft", + "openWeights": true + }, + { + "id": "gpt-3-5-turbo-1106", + "name": "GPT-3.5 Turbo 1106", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 16384, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "deepseek-v3-1-nex-n1", + "name": "nex-agi/DeepSeek-V3.1-Nex-N1", + "description": "DeepSeek V3.1 Nex-N1 is the flagship release of the Nex-N1 series — a post-trained model designed to highlight agent autonomy, tool use, and real-world productivity. \n\nNex-N1 demonstrates competitive performance across all evaluation scenarios, showing particularly strong results in practical coding and HTML generation tasks.", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131000, + "maxOutputTokens": 131000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "reasoning": { + "supportedEfforts": ["none"] + }, + "family": "deepseek", + "ownedBy": "deepseek", + "openWeights": false + }, + { + "id": "llama-prompt-guard-2-86m", + "name": "Meta Llama Prompt Guard 2 86M", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 512, + "maxOutputTokens": 2, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.01 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.01 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": false + }, + { + "id": "claude-4-5-haiku", + "name": "Anthropic: Claude 4.5 Haiku", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + } + }, + "family": "claude-haiku", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "gpt-4-1-mini-2025-04-14", + "name": "OpenAI GPT-4.1 Mini", + "capabilities": ["function-call", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 1047576, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5999999999999999 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + } + }, + "family": "gpt-mini", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "llama-prompt-guard-2-22m", + "name": "Meta Llama Prompt Guard 2 22M", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 512, + "maxOutputTokens": 2, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.01 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.01 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": false + }, + { + "id": "claude-3-5-sonnet-v2", + "name": "Anthropic: Claude 3.5 Sonnet v2", + "capabilities": ["function-call", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.30000000000000004 + } + }, + "family": "claude-sonnet", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "sonar-deep-research", + "name": "Perplexity Sonar Deep Research", + "description": "Sonar Deep Research is a research-focused model designed for multi-step retrieval, synthesis, and reasoning across complex topics. It autonomously searches, reads, and evaluates sources, refining its approach as it gathers information. This enables comprehensive report generation across domains like finance, technology, health, and current events.\n\nNotes on Pricing ([Source](https://docs.perplexity.ai/guides/pricing#detailed-pricing-breakdown-for-sonar-deep-research)) \n- Input tokens comprise of Prompt tokens (user prompt) + Citation tokens (these are processed tokens from running searches)\n- Deep Research runs multiple searches to conduct exhaustive research. Searches are priced at $5/1000 searches. A request that does 30 searches will cost $0.15 in this step.\n- Reasoning is a distinct step in Deep Research since it does extensive automated reasoning through all the material it gathers during its research phase. Reasoning tokens here are a bit different than the CoTs in the answer - these are tokens that we use to reason through the research material prior to generating the outputs via the CoTs. Reasoning tokens are priced at $3/1M tokens", + "capabilities": ["reasoning", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 127000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 8 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "family": "sonar-deep-research", + "ownedBy": "perplexity", + "openWeights": false + }, + { + "id": "kimi-k2-0711", + "name": "Kimi K2 (07/11)", + "description": "Kimi-K2 is a MoE architecture foundational model with extremely powerful coding and agent capabilities, featuring a total of 1 trillion parameters and activating 32 billion parameters. In benchmark performance tests across major categories such as general knowledge reasoning, programming, mathematics, and agents, the K2 model outperforms other mainstream open-source models.\nThe Kimi-K2 model supports a context length of 128k tokens.\nIt does not support visual capabilities.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5700000000000001 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.3 + } + }, + "family": "kimi", + "ownedBy": "moonshot", + "openWeights": false + }, + { + "id": "codex-mini-latest", + "name": "OpenAI Codex Mini Latest", + "description": "Only supports v1/responses API calls.https://docs.aihubmix.com/en/api/Responses-API\ncodex-mini-latest is a fine-tuned version of o4-mini specifically for use in Codex CLI. For direct use in the API, we recommend starting with gpt-4.1.", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 100000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 6 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.375 + } + }, + "family": "gpt-codex-mini", + "ownedBy": "helicone", + "openWeights": false + }, + { + "id": "claude-4-5-opus", + "name": "Anthropic: Claude Opus 4.5", + "capabilities": ["reasoning", "function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "family": "claude-opus", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-3-haiku-20240307", + "name": "Anthropic: Claude 3 Haiku", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.03 + } + }, + "family": "claude-haiku", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "gemini-3-pro", + "name": "Gemini 3 Pro", + "capabilities": [ + "reasoning", + "function-call", + "structured-output", + "file-input", + "image-recognition", + "audio-recognition", + "video-recognition", + "web-search" + ], + "inputModalities": ["text", "image", "video", "audio"], + "outputModalities": ["text"], + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 12 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "reasoning": { + "supportedEfforts": ["low", "high"], + "thinkingTokenLimits": { + "min": 128, + "max": 32768 + } + }, + "family": "gemini-pro", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "gemini-3-1-pro", + "name": "Gemini 3.1 Pro Preview", + "capabilities": [ + "reasoning", + "function-call", + "structured-output", + "file-input", + "image-recognition", + "audio-recognition", + "video-recognition" + ], + "inputModalities": ["text", "image", "video", "audio"], + "outputModalities": ["text"], + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 12 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"], + "thinkingTokenLimits": { + "min": 128, + "max": 32768 + } + }, + "family": "gemini-pro", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "big-pickle", + "name": "Big Pickle", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 128000, + "family": "big-pickle", + "ownedBy": "opencode", + "openWeights": false + }, + { + "id": "grok-code", + "name": "Grok Code Fast 1", + "capabilities": ["reasoning", "function-call", "file-input", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 256000, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "minimax-m2-5-highspeed", + "name": "MiniMax-M2.5-highspeed", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 204800, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.06 + } + }, + "reasoning": { + "supportedEfforts": [], + "interleaved": true + }, + "family": "minimax", + "ownedBy": "minimax", + "openWeights": true + }, + { + "id": "gemini-flash-lite-latest", + "name": "Gemini Flash-Lite Latest", + "capabilities": [ + "reasoning", + "function-call", + "structured-output", + "file-input", + "image-recognition", + "audio-recognition", + "video-recognition" + ], + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text"], + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.025 + } + }, + "family": "gemini-flash-lite", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "gemini-flash-latest", + "name": "Gemini Flash Latest", + "capabilities": [ + "reasoning", + "function-call", + "structured-output", + "file-input", + "image-recognition", + "audio-recognition", + "video-recognition" + ], + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text"], + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.075 + } + }, + "family": "gemini-flash", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "gemini-2-5-pro-preview-05-06", + "name": "Gemini 2.5 Pro Preview 05-06", + "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.", + "capabilities": [ + "reasoning", + "function-call", + "structured-output", + "file-input", + "image-recognition", + "audio-recognition", + "video-recognition", + "web-search" + ], + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text"], + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.31 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"], + "thinkingTokenLimits": { + "min": 128, + "max": 32768 + } + }, + "family": "gemini-pro", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "gemini-2-5-flash-preview-tts", + "name": "Gemini 2.5 Flash Preview TTS", + "description": "Gemini 2.5 Flash Preview TTS is a lightweight, low-latency text-to-speech model designed for real-time voice generation. It produces natural, expressive speech with accurate control over tone, style, and pacing, while dynamically adjusting speaking speed based on context and instructions. The model also maintains consistent and distinguishable voices across multi-turn and multi-speaker conversations, making it well-suited for interactive and conversational applications that require stable, high-quality audio output.", + "capabilities": ["audio-generation", "function-call", "image-recognition", "web-search", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["audio"], + "contextWindow": 8000, + "maxOutputTokens": 16000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 24576 + } + }, + "family": "gemini-flash", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "gemini-live-2-5-flash-preview-native-audio", + "name": "Gemini Live 2.5 Flash Preview Native Audio", + "capabilities": ["reasoning", "function-call", "audio-recognition", "audio-generation", "video-recognition"], + "inputModalities": ["text", "audio", "video"], + "outputModalities": ["text", "audio"], + "contextWindow": 131072, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "gemini-flash", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "gemini-3-1-pro-preview-customtools", + "name": "Gemini 3.1 Pro Preview Custom Tools", + "description": "gemini-3.1-pro-preview-customtools\n\nFor users who build applications mixing bash and custom tools, the Gemini 3.1 Pro preview provides a separate endpoint accessible via the API call gemini-3.1-pro-preview-customtools. This endpoint is better at prioritizing your custom tools (for example, view_file or search_code).\n\nPlease note that while gemini-3.1-pro-preview-customtools is optimized for agent workflows that use custom tools and Bash, you may experience quality fluctuations in some use cases that cannot benefit from these tools.", + "capabilities": [ + "reasoning", + "function-call", + "structured-output", + "file-input", + "image-recognition", + "audio-recognition", + "video-recognition" + ], + "inputModalities": ["text", "image", "video", "audio"], + "outputModalities": ["text"], + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 12 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"], + "thinkingTokenLimits": { + "min": 128, + "max": 32768 + } + }, + "family": "gemini-pro", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "gemini-live-2-5-flash", + "name": "Gemini Live 2.5 Flash", + "capabilities": [ + "reasoning", + "function-call", + "file-input", + "image-recognition", + "audio-recognition", + "audio-generation", + "video-recognition" + ], + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text", "audio"], + "contextWindow": 128000, + "maxOutputTokens": 8000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "gemini-flash", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "gemini-2-5-flash-preview-04-17", + "name": "Gemini 2.5 Flash Preview 04-17", + "capabilities": [ + "reasoning", + "function-call", + "file-input", + "image-recognition", + "audio-recognition", + "video-recognition", + "web-search" + ], + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text"], + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.0375 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 24576 + } + }, + "family": "gemini-flash", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "gemini-2-5-pro-preview-tts", + "name": "Gemini 2.5 Pro Preview TTS", + "description": "Gemini 2.5 Pro Preview TTS is a high-fidelity text-to-speech model designed for premium voice experiences and complex speech generation scenarios. It delivers highly natural and expressive audio with precise control over tone, style, and emotional nuance, while maintaining smooth, context-aware pacing across long-form content. The model excels in multi-speaker and dialogue-heavy use cases, preserving consistent character voices and conversational coherence, making it well-suited for narration, storytelling, and advanced conversational AI applications.", + "capabilities": ["audio-generation", "function-call", "image-recognition", "web-search", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["audio"], + "contextWindow": 8000, + "maxOutputTokens": 16000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 20 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"], + "thinkingTokenLimits": { + "min": 128, + "max": 32768 + } + }, + "family": "gemini-flash", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "gemini-1-5-flash", + "name": "Gemini 1.5 Flash", + "capabilities": ["function-call", "file-input", "image-recognition", "audio-recognition", "video-recognition"], + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.075 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.01875 + } + }, + "family": "gemini-flash", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "gemini-1-5-pro", + "name": "Gemini 1.5 Pro", + "capabilities": ["function-call", "file-input", "image-recognition", "audio-recognition", "video-recognition"], + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3125 + } + }, + "family": "gemini-pro", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "gpt-oss-maas", + "name": "GPT OSS 120B", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.36 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "family": "gpt-oss", + "ownedBy": "openai", + "openWeights": true + }, + { + "id": "qwen3-a22b-instruct-2507-maas", + "name": "Qwen3 235B A22B Instruct", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.22 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.88 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "llama-4-maverick-128e-instruct-maas", + "name": "Llama 4 Maverick 17B 128E Instruct", + "capabilities": ["function-call", "structured-output", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 524288, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.35 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.15 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "llama-3-3-instruct-maas", + "name": "Llama 3.3 70B Instruct", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.72 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.72 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "glm-4-7-maas", + "name": "GLM-4.7", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.2 + } + }, + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "family": "glm", + "ownedBy": "zhipu", + "openWeights": true + }, + { + "id": "glm-5-maas", + "name": "GLM-5", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 204800, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.1 + } + }, + "reasoning": { + "supportedEfforts": ["none", "auto"] + }, + "family": "glm", + "ownedBy": "zhipu", + "openWeights": true + }, + { + "id": "deepseek-v3-1-maas", + "name": "DeepSeek V3.1", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 163840, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.7 + } + }, + "reasoning": { + "supportedEfforts": ["none"] + }, + "family": "deepseek", + "ownedBy": "deepseek", + "openWeights": true + }, + { + "id": "granite-4-0-h-micro", + "name": "IBM Granite 4.0 H Micro", + "description": "Granite-4.0-H-Micro is a 3B parameter from the Granite 4 family of models. These models are the latest in a series of models released by IBM. They are fine-tuned for long context tool calling. ", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.017 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.11 + } + }, + "family": "granite", + "ownedBy": "cloudflare-workers-ai", + "openWeights": false + }, + { + "id": "bart-large-cnn", + "name": "BART Large CNN", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "family": "bart", + "ownedBy": "cloudflare-workers-ai", + "openWeights": false + }, + { + "id": "mistral-instruct-v0-1", + "name": "Mistral 7B Instruct v0.1", + "description": "A 7.3B parameter model that outperforms Llama 2 13B on all benchmarks, with optimizations for speed and context length.", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.11 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.19 + } + }, + "family": "mistral", + "ownedBy": "mistral", + "openWeights": false + }, + { + "id": "distilbert-sst-2-int8", + "name": "DistilBERT SST-2 INT8", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.026 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "distilbert", + "ownedBy": "cloudflare-workers-ai", + "openWeights": false + }, + { + "id": "melotts", + "name": "MyShell MeloTTS", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "family": "melotts", + "ownedBy": "cloudflare-workers-ai", + "openWeights": false + }, + { + "id": "plamo-embedding", + "name": "PLaMo Embedding 1B", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.019 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "plamo", + "ownedBy": "cloudflare-workers-ai", + "openWeights": false + }, + { + "id": "indictrans2-en-indic", + "name": "IndicTrans2 EN-Indic 1B", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.34 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.34 + } + }, + "family": "indictrans", + "ownedBy": "cloudflare-workers-ai", + "openWeights": false + }, + { + "id": "smart-turn-v2", + "name": "Pipecat Smart Turn v2", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "family": "smart-turn", + "ownedBy": "cloudflare-workers-ai", + "openWeights": false + }, + { + "id": "mistral-small-3-1-instruct", + "name": "Mistral Small 3.1 24B Instruct", + "description": "Mistral Small 3.1 24B Instruct is an upgraded variant of Mistral Small 3 (2501), featuring 24 billion parameters with advanced multimodal capabilities. It provides state-of-the-art performance in text-based reasoning and vision tasks, including image analysis, programming, mathematical reasoning, and multilingual support across dozens of languages. Equipped with an extensive 128k token context window and optimized for efficient local inference, it supports use cases such as conversational agents, function calling, long-document comprehension, and privacy-sensitive deployments. The updated version is [Mistral Small 3.2](mistralai/mistral-small-3.2-24b-instruct)", + "capabilities": ["function-call", "structured-output", "file-input", "image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.35 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.56 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.015 + } + }, + "family": "mistral-small", + "ownedBy": "mistral", + "openWeights": false + }, + { + "id": "aura-2-es", + "name": "Deepgram Aura 2 (ES)", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "family": "aura", + "ownedBy": "cloudflare-workers-ai", + "openWeights": false + }, + { + "id": "aura-2-en", + "name": "Deepgram Aura 2 (EN)", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "family": "aura", + "ownedBy": "cloudflare-workers-ai", + "openWeights": false + }, + { + "id": "nova-3", + "name": "Deepgram Nova 3", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "family": "nova", + "ownedBy": "amazon", + "openWeights": false + }, + { + "id": "gemma-sea-lion-v4-it", + "name": "Gemma SEA-LION v4 27B IT", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.35 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.56 + } + }, + "family": "gemma", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "llama-3-1-instruct-fp8", + "name": "Llama 3.1 8B Instruct FP8", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.29 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": false + }, + { + "id": "llama-2-chat-fp16", + "name": "Llama 2 7B Chat FP16", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.56 + }, + "output": { + "currency": "USD", + "perMillionTokens": 6.67 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": false + }, + { + "id": "m2m100", + "name": "M2M100 1.2B", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.34 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.34 + } + }, + "family": "m2m", + "ownedBy": "cloudflare-workers-ai", + "openWeights": false + }, + { + "id": "llama-3-3-instruct-fp8-fast", + "name": "Llama 3.3 70B Instruct FP8 Fast", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.29 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.25 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": false + }, + { + "id": "llama-3-instruct-awq", + "name": "Llama 3 8B Instruct AWQ", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.12 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.27 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": false + }, + { + "id": "llama-3-1-instruct-awq", + "name": "Llama 3.1 8B Instruct AWQ", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.12 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.27 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": false + }, + { + "id": "bge-m3", + "name": "BGE M3", + "description": "The bge-m3 embedding model encodes sentences, paragraphs, and long documents into a 1024-dimensional dense vector space, delivering high-quality semantic embeddings optimized for multilingual retrieval, semantic search, and large-context applications.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.012 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "bge", + "ownedBy": "baai", + "openWeights": false + }, + { + "id": "bge-base-en-v1-5", + "name": "BGE Base EN v1.5", + "description": "The bge-base-en-v1.5 embedding model converts English sentences and paragraphs into 768-dimensional dense vectors, delivering efficient, high-quality semantic embeddings optimized for retrieval, semantic search, and document-matching workflows. This version (v1.5) features improved similarity-score distribution and stronger retrieval performance out of the box.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.067 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "bge", + "ownedBy": "baai", + "openWeights": false + }, + { + "id": "bge-large-en-v1-5", + "name": "BGE Large EN v1.5", + "description": "The bge-large-en-v1.5 embedding model maps English sentences, paragraphs, and documents into a 1024-dimensional dense vector space, delivering high-fidelity semantic embeddings optimized for semantic search, document retrieval, and downstream NLP tasks in English.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "bge", + "ownedBy": "baai", + "openWeights": false + }, + { + "id": "bge-reranker-base", + "name": "BGE Reranker Base", + "capabilities": ["embedding", "rerank"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.0031 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "bge", + "ownedBy": "baai", + "openWeights": false + }, + { + "id": "bge-small-en-v1-5", + "name": "BGE Small EN v1.5", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.02 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "bge", + "ownedBy": "baai", + "openWeights": false + }, + { + "id": "mercury-coder", + "name": "Mercury Coder", + "description": "Mercury Coder is the first diffusion large language model (dLLM). Applying a breakthrough discrete diffusion approach, the model runs 5-10x faster than even speed optimized models like Claude 3.5 Haiku and GPT-4o Mini while matching their performance. Mercury Coder's speed means that developers can stay in the flow while coding, enjoying rapid chat-based iteration and responsive code completion suggestions. On Copilot Arena, Mercury Coder ranks 1st in speed and ties for 2nd in quality. Read more in the [blog post here](https://www.inceptionlabs.ai/blog/introducing-mercury).", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.25 + } + }, + "family": "mercury", + "ownedBy": "inception", + "openWeights": false + }, + { + "id": "mercury", + "name": "Mercury", + "description": "Mercury is the first diffusion large language model (dLLM). Applying a breakthrough discrete diffusion approach, the model runs 5-10x faster than even speed optimized models like GPT-4.1 Nano and Claude 3.5 Haiku while matching their performance. Mercury's speed enables developers to provide responsive user experiences, including with voice agents, search interfaces, and chatbots. Read more in the [blog post]\n(https://www.inceptionlabs.ai/blog/introducing-mercury) here. ", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.25 + } + }, + "family": "mercury", + "ownedBy": "inception", + "openWeights": false + }, + { + "id": "claude-3-sonnet", + "name": "Claude Sonnet 3", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "family": "claude-sonnet", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "gpt-5-3-codex-spark", + "name": "GPT-5.3 Codex Spark", + "capabilities": [ + "reasoning", + "function-call", + "structured-output", + "file-input", + "image-recognition", + "web-search" + ], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.75 + }, + "output": { + "currency": "USD", + "perMillionTokens": 14 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.175 + } + }, + "family": "gpt-codex-spark", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "o1-pro", + "name": "o1-pro", + "description": "The o1 series of models are trained with reinforcement learning to think before they answer and perform complex reasoning. The o1-pro model uses more compute to think harder and provide consistently better answers.", + "capabilities": ["reasoning", "function-call", "structured-output", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 100000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 150 + }, + "output": { + "currency": "USD", + "perMillionTokens": 600 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 170 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "family": "o-pro", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-4o-2024-05-13", + "name": "GPT-4o (2024-05-13)", + "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)\n\n#multimodal", + "capabilities": ["function-call", "structured-output", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 5 + } + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-4o-2024-08-06", + "name": "GPT-4o (2024-08-06)", + "description": "The 2024-08-06 version of GPT-4o offers improved performance in structured outputs, with the ability to supply a JSON schema in the respone_format. Read more [here](https://openai.com/index/introducing-structured-outputs-in-the-api/).\n\nGPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)", + "capabilities": ["function-call", "structured-output", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.25 + } + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-5-3-codex", + "name": "GPT-5.3 Codex", + "capabilities": [ + "reasoning", + "function-call", + "structured-output", + "file-input", + "image-recognition", + "web-search" + ], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 400000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.75 + }, + "output": { + "currency": "USD", + "perMillionTokens": 14 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.175 + } + }, + "family": "gpt-codex", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "o4-mini-deep-research", + "name": "o4-mini-deep-research", + "description": "o4-mini-deep-research is OpenAI's faster, more affordable deep research model—ideal for tackling complex, multi-step research tasks.\n\nNote: This model always uses the 'web_search' tool which adds additional cost.", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 100000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 8 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "reasoning": { + "supportedEfforts": ["medium"] + }, + "family": "o-mini", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "kimi-dev", + "name": "Kimi Dev 72b (free)", + "description": "Kimi-Dev-72B is a new generation open-source programming large model that achieved a leading performance of 60.4% on SWE-bench Verified. Through large-scale reinforcement learning optimization, it can automatically fix code in real Docker environments, receiving rewards only when passing the complete test suite, thereby ensuring the correctness and robustness of solutions and aligning more closely with real software development standards.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.32 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.28 + } + }, + "family": "kimi", + "ownedBy": "moonshot", + "openWeights": true + }, + { + "id": "glm-z1", + "name": "GLM Z1 32B (free)", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 32768, + "family": "glm-z", + "ownedBy": "zhipu", + "openWeights": true + }, + { + "id": "deephermes-3-llama-3-preview", + "name": "DeepHermes 3 Llama 3 8B Preview", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "family": "llama", + "ownedBy": "nousresearch", + "openWeights": true + }, + { + "id": "grok-3-beta", + "name": "Grok 3 Beta", + "description": "Grok 3 is the latest model from xAI. It's their flagship model that excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in finance, healthcare, law, and science.\n\nExcels in structured tasks and benchmarks like GPQA, LCB, and MMLU-Pro where it outperforms Grok 3 Mini even on high thinking. \n\nNote: That there are two xAI endpoints for this model. By default when using this model we will always route you to the base endpoint. If you want the fast endpoint you can add `provider: { sort: throughput}`, to sort by throughput instead. \n", + "capabilities": ["function-call", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.75 + } + }, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "grok-3-mini-beta", + "name": "Grok 3 Mini Beta", + "description": "Grok 3 Mini is a lightweight, smaller thinking model. Unlike traditional models that generate answers immediately, Grok 3 Mini thinks before responding. It’s ideal for reasoning-heavy tasks that don’t demand extensive domain knowledge, and shines in math-specific and quantitative use cases, such as solving challenging puzzles or math problems.\n\nTransparent \"thinking\" traces accessible. Defaults to low reasoning, can boost with setting `reasoning: { effort: \"high\" }`\n\nNote: That there are two xAI endpoints for this model. By default when using this model we will always route you to the base endpoint. If you want the fast endpoint you can add `provider: { sort: throughput}`, to sort by throughput instead. \n", + "capabilities": ["reasoning", "function-call", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.075 + } + }, + "reasoning": { + "supportedEfforts": ["low", "high"] + }, + "family": "grok", + "ownedBy": "xai", + "openWeights": false + }, + { + "id": "dolphin-mistral-venice-edition", + "name": "Uncensored (free)", + "description": "Venice Uncensored Dolphin Mistral 24B Venice Edition is a fine-tuned variant of Mistral-Small-24B-Instruct-2501, developed by dphn.ai in collaboration with Venice.ai. This model is designed as an “uncensored” instruct-tuned LLM, preserving user control over alignment, system prompts, and behavior. Intended for advanced and unrestricted use cases, Venice Uncensored emphasizes steerability and transparent behavior, removing default safety and alignment layers typically found in mainstream assistant models.", + "capabilities": ["structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 32768, + "family": "mistral", + "ownedBy": "dolphinai", + "openWeights": true + }, + { + "id": "dolphin3-0-mistral", + "name": "Dolphin3.0 Mistral 24B", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 8192, + "family": "mistral", + "ownedBy": "dolphinai", + "openWeights": true + }, + { + "id": "dolphin3-0-r1-mistral", + "name": "Dolphin3.0 R1 Mistral 24B", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 8192, + "family": "mistral", + "ownedBy": "dolphinai", + "openWeights": true + }, + { + "id": "riverflow-v2-max-preview", + "name": "Riverflow V2 Max Preview", + "capabilities": ["image-recognition", "image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "contextWindow": 8192, + "maxOutputTokens": 8192, + "family": "sourceful", + "ownedBy": "sourceful", + "openWeights": true + }, + { + "id": "riverflow-v2-fast-preview", + "name": "Riverflow V2 Fast Preview", + "capabilities": ["image-recognition", "image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "contextWindow": 8192, + "maxOutputTokens": 8192, + "family": "sourceful", + "ownedBy": "sourceful", + "openWeights": true + }, + { + "id": "riverflow-v2-standard-preview", + "name": "Riverflow V2 Standard Preview", + "capabilities": ["image-recognition", "image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "contextWindow": 8192, + "maxOutputTokens": 8192, + "family": "sourceful", + "ownedBy": "sourceful", + "openWeights": true + }, + { + "id": "deepseek-chat-v3-1", + "name": "DeepSeek-V3.1", + "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. \n\nIt succeeds the [DeepSeek V3-0324](/deepseek/deepseek-chat-v3-0324) model and performs well on a variety of tasks.", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 163840, + "maxOutputTokens": 163840, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.8 + } + }, + "reasoning": { + "supportedEfforts": ["none"] + }, + "family": "deepseek", + "ownedBy": "deepseek", + "openWeights": true + }, + { + "id": "deepseek-v3-base", + "name": "DeepSeek V3 Base (free)", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 163840, + "maxOutputTokens": 163840, + "reasoning": { + "supportedEfforts": ["none"] + }, + "family": "deepseek", + "ownedBy": "deepseek", + "openWeights": true + }, + { + "id": "deepseek-chat-v3-0324", + "name": "DeepSeek V3 0324", + "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", + "capabilities": ["structured-output", "reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 16384, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.87 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.095 + } + }, + "reasoning": { + "supportedEfforts": ["none"] + }, + "family": "deepseek", + "ownedBy": "deepseek", + "openWeights": true + }, + { + "id": "qwerky", + "name": "Qwerky 72B", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 8192, + "family": "qwerky", + "ownedBy": "openrouter", + "openWeights": true + }, + { + "id": "deepseek-r1t2-chimera", + "name": "DeepSeek R1T2 Chimera (free)", + "description": "DeepSeek-TNG-R1T2-Chimera is the second-generation Chimera model from TNG Tech. It is a 671 B-parameter mixture-of-experts text-generation model assembled from DeepSeek-AI’s R1-0528, R1, and V3-0324 checkpoints with an Assembly-of-Experts merge. The tri-parent design yields strong reasoning performance while running roughly 20 % faster than the original R1 and more than 2× faster than R1-0528 under vLLM, giving a favorable cost-to-intelligence trade-off. The checkpoint supports contexts up to 60 k tokens in standard use (tested to ~130 k) and maintains consistent token behaviour, making it suitable for long-context analysis, dialogue and other open-ended generation tasks.", + "capabilities": ["reasoning", "structured-output", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 163840, + "maxOutputTokens": 163840, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.85 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.125 + } + }, + "family": "deepseek-thinking", + "ownedBy": "deepseek", + "openWeights": true + }, + { + "id": "molmo-2", + "name": "Molmo2 8B (free)", + "description": "Molmo2-8B is an open vision-language model developed by the Allen Institute for AI (Ai2) as part of the Molmo2 family, supporting image, video, and multi-image understanding and grounding. It is based on Qwen3-8B and uses SigLIP 2 as its vision backbone, outperforming other open-weight, open-data models on short videos, counting, and captioning, while remaining competitive on long-video tasks.", + "capabilities": ["reasoning", "image-recognition", "video-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 36864, + "maxOutputTokens": 36864, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "family": "allenai", + "ownedBy": "ai2", + "openWeights": true + }, + { + "id": "seedream-4-5", + "name": "Seedream 4.5", + "capabilities": ["image-recognition", "image-generation"], + "inputModalities": ["image", "text"], + "outputModalities": ["image"], + "contextWindow": 4096, + "maxOutputTokens": 4096, + "family": "seed", + "ownedBy": "bytedance", + "openWeights": true + }, + { + "id": "lfm-2-5-thinking", + "name": "LFM2.5-1.2B-Thinking (free)", + "description": "LFM2.5-1.2B-Thinking is a lightweight reasoning-focused model optimized for agentic tasks, data extraction, and RAG—while still running comfortably on edge devices. It supports long context (up to 32K tokens) and is designed to provide higher-quality “thinking” responses in a small 1.2B model.", + "capabilities": ["reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 32768, + "family": "liquid", + "ownedBy": "liquidai", + "openWeights": true + }, + { + "id": "lfm-2-5-instruct", + "name": "LFM2.5-1.2B-Instruct (free)", + "description": "LFM2.5-1.2B-Instruct is a compact, high-performance instruction-tuned model built for fast on-device AI. It delivers strong chat quality in a 1.2B parameter footprint, with efficient edge inference and broad runtime support.", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 32768, + "family": "liquid", + "ownedBy": "liquidai", + "openWeights": true + }, + { + "id": "minimax-01", + "name": "MiniMax-01", + "description": "MiniMax-01 is a combines MiniMax-Text-01 for text generation and MiniMax-VL-01 for image understanding. It has 456 billion parameters, with 45.9 billion parameters activated per inference, and can handle a context of up to 4 million tokens.\n\nThe text model adopts a hybrid architecture that combines Lightning Attention, Softmax Attention, and Mixture-of-Experts (MoE). The image model adopts the “ViT-MLP-LLM” framework and is trained on top of the text model.\n\nTo read more about the release, see: https://www.minimaxi.com/en/news/minimax-01-series-2", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 1000000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.1 + } + }, + "family": "minimax", + "ownedBy": "minimax", + "openWeights": true + }, + { + "id": "gemini-2-0-flash-exp", + "name": "Gemini 2.0 Flash Experimental (free)", + "description": "https://doc.aihubmix.com/en/api/Gemini%20%E5%9B%BE%E7%89%87%E7%94%9F%E6%88%90%E5%92%8C%E7%BC%96%E8%BE%91\nInstructions:\n\nNeed to add parameters to experience new features: \"modalities\":[\"text\",\"image\"]\nImages are passed and output in Base64 encoding\nAs an experimental model, it's recommended to explicitly specify \"output image\", otherwise it might only output text\nDefault height for output images is 1024px\nPython calls require the latest OpenAI SDK, run pip install -U openai first", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 1048576, + "maxOutputTokens": 1048576, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.02 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.08 + } + }, + "family": "gemini-flash", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "gpt-5-image", + "name": "GPT-5 Image", + "description": "[GPT-5](https://openrouter.ai/openai/gpt-5) Image combines OpenAI's GPT-5 model with state-of-the-art image generation capabilities. It offers major improvements in reasoning, code quality, and user experience while incorporating GPT Image 1's superior instruction following, text rendering, and detailed image editing.", + "capabilities": [ + "reasoning", + "function-call", + "structured-output", + "file-input", + "image-recognition", + "image-generation", + "web-search" + ], + "inputModalities": ["text", "image"], + "outputModalities": ["text", "image"], + "contextWindow": 400000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.25 + } + }, + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "sherlock-think-alpha", + "name": "Sherlock Think Alpha", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 1840000, + "family": "sherlock", + "ownedBy": "openrouter", + "openWeights": false + }, + { + "id": "sherlock-dash-alpha", + "name": "Sherlock Dash Alpha", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 1840000, + "family": "sherlock", + "ownedBy": "openrouter", + "openWeights": false + }, + { + "id": "aurora-alpha", + "name": "Aurora Alpha", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 50000, + "ownedBy": "openrouter", + "openWeights": false + }, + { + "id": "qwen-2-5-coder-instruct", + "name": "Qwen2.5 Coder 32B Instruct", + "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen). Qwen2.5-Coder brings the following improvements upon CodeQwen1.5:\n\n- Significantly improvements in **code generation**, **code reasoning** and **code fixing**. \n- A more comprehensive foundation for real-world applications such as **Code Agents**. Not only enhancing coding capabilities but also maintaining its strengths in mathematics and general competencies.\n\nTo read more about its evaluation results, check out [Qwen 2.5 Coder's blog](https://qwenlm.github.io/blog/qwen2.5-coder-family/).", + "capabilities": ["structured-output", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.03 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.11 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.015 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen3-5-plus-02-15", + "name": "Qwen3.5 Plus 2026-02-15", + "description": "The Qwen3.5 native vision-language series Plus models are built on a hybrid architecture that integrates linear attention mechanisms with sparse mixture-of-experts models, achieving higher inference efficiency. In a variety of task evaluations, the 3.5 series consistently demonstrates performance on par with state-of-the-art leading models. Compared to the 3 series, these models show a leap forward in both pure-text and multimodal capabilities.", + "capabilities": [ + "reasoning", + "function-call", + "structured-output", + "file-input", + "image-recognition", + "video-recognition" + ], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.4 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen-2-5-vl-instruct", + "name": "Qwen2.5-VL 7B Instruct (free)", + "description": "Qwen2.5 VL 7B is a multimodal LLM from the Qwen Team with the following key enhancements:\n\n- SoTA understanding of images of various resolution & ratio: Qwen2.5-VL achieves state-of-the-art performance on visual understanding benchmarks, including MathVista, DocVQA, RealWorldQA, MTVQA, etc.\n\n- Understanding videos of 20min+: Qwen2.5-VL can understand videos over 20 minutes for high-quality video-based question answering, dialog, content creation, etc.\n\n- Agent that can operate your mobiles, robots, etc.: with the abilities of complex reasoning and decision making, Qwen2.5-VL can be integrated with devices like mobile phones, robots, etc., for automatic operation based on visual environment and text instructions.\n\n- Multilingual Support: to serve global users, besides English and Chinese, Qwen2.5-VL now supports the understanding of texts in different languages inside images, including most European languages, Japanese, Korean, Arabic, Vietnamese, etc.\n\nFor more details, see this [blog post](https://qwenlm.github.io/blog/qwen2-vl/) and [GitHub repo](https://github.com/QwenLM/Qwen2-VL).\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen3-a22b-07-25", + "name": "Qwen3 235B A22B Instruct 2507 (free)", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.85 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "codestral-2508", + "name": "Codestral 2508", + "description": "Mistral's cutting-edge language model for coding released end of July 2025. Codestral specializes in low-latency, high-frequency tasks such as fill-in-the-middle (FIM), code correction and test generation.\n\n[Blog Post](https://mistral.ai/news/codestral-25-08)", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 256000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.9 + } + }, + "family": "codestral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "mistral-instruct", + "name": "Mistral 7B Instruct (free)", + "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\n*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "family": "mistral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "mistral-small-3-2-instruct", + "name": "Mistral Small 3.2 24B Instruct", + "description": "Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling. Compared to the 3.1 release, version 3.2 significantly improves accuracy on WildBench and Arena Hard, reduces infinite generations, and delivers gains in tool use and structured output tasks.\n\nIt supports image and text inputs with structured outputs, function/tool calling, and strong performance across coding (HumanEval+, MBPP), STEM (MMLU, MATH, GPQA), and vision benchmarks (ChartQA, DocVQA).", + "capabilities": ["function-call", "structured-output", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 96000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.06 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.18 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.03 + } + }, + "family": "mistral-small", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "mistral-medium-3", + "name": "Mistral Medium 3", + "description": "Mistral Medium 3 is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances state-of-the-art reasoning and multimodal performance with 8× lower cost compared to traditional large models, making it suitable for scalable deployments across professional and industrial use cases.\n\nThe model excels in domains such as coding, STEM reasoning, and enterprise adaptation. It supports hybrid, on-prem, and in-VPC deployments and is optimized for integration into custom workflows. Mistral Medium 3 offers competitive accuracy relative to larger models like Claude Sonnet 3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining broad compatibility across cloud environments.", + "capabilities": ["function-call", "structured-output", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "mistral-medium", + "ownedBy": "mistral", + "openWeights": false + }, + { + "id": "mistral-medium-3-1", + "name": "Mistral Medium 3.1", + "description": "Mistral Medium 3.1 is an updated version of Mistral Medium 3, which is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances state-of-the-art reasoning and multimodal performance with 8× lower cost compared to traditional large models, making it suitable for scalable deployments across professional and industrial use cases.\n\nThe model excels in domains such as coding, STEM reasoning, and enterprise adaptation. It supports hybrid, on-prem, and in-VPC deployments and is optimized for integration into custom workflows. Mistral Medium 3.1 offers competitive accuracy relative to larger models like Claude Sonnet 3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining broad compatibility across cloud environments.", + "capabilities": ["function-call", "structured-output", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 262144, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "mistral-medium", + "ownedBy": "mistral", + "openWeights": false + }, + { + "id": "reka-flash-3", + "name": "Reka Flash 3", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 8192, + "family": "reka", + "ownedBy": "openrouter", + "openWeights": true + }, + { + "id": "sarvam-m", + "name": "Sarvam-M (free)", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 32768, + "family": "sarvam", + "ownedBy": "openrouter", + "openWeights": true + }, + { + "id": "flux.2-klein", + "name": "FLUX.2 Klein 4B", + "capabilities": ["image-recognition", "image-generation"], + "inputModalities": ["image", "text"], + "outputModalities": ["image"], + "contextWindow": 40960, + "maxOutputTokens": 40960, + "family": "flux", + "ownedBy": "bfl", + "openWeights": true + }, + { + "id": "flux.2-max", + "name": "FLUX.2 Max", + "capabilities": ["image-recognition", "image-generation"], + "inputModalities": ["image", "text"], + "outputModalities": ["image"], + "contextWindow": 46864, + "maxOutputTokens": 46864, + "family": "flux", + "ownedBy": "bfl", + "openWeights": false + }, + { + "id": "flux.2-pro", + "name": "FLUX.2 Pro", + "capabilities": ["image-recognition", "image-generation"], + "inputModalities": ["image", "text"], + "outputModalities": ["image"], + "contextWindow": 46864, + "maxOutputTokens": 46864, + "family": "flux", + "ownedBy": "bfl", + "openWeights": false + }, + { + "id": "flux.2-flex", + "name": "FLUX.2 Flex", + "capabilities": ["image-recognition", "image-generation"], + "inputModalities": ["image", "text"], + "outputModalities": ["image"], + "contextWindow": 67344, + "maxOutputTokens": 67344, + "family": "flux", + "ownedBy": "bfl", + "openWeights": false + }, + { + "id": "step-3", + "name": "Step-3", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["image", "text"], + "outputModalities": ["text"], + "contextWindow": 65536, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.21 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.57 + } + }, + "ownedBy": "stepfun", + "openWeights": false + }, + { + "id": "minimax-m2-5-lightning", + "name": "MiniMax: MiniMax M2.5 highspeed", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 204800, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4.8 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.06 + } + }, + "reasoning": { + "supportedEfforts": [], + "interleaved": true + }, + "ownedBy": "minimax", + "openWeights": false + }, + { + "id": "doubao-seed-code", + "name": "Doubao-Seed-Code", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.17 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.12 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.03 + } + }, + "reasoning": { + "supportedEfforts": ["none", "high"] + }, + "ownedBy": "bytedance", + "openWeights": false + }, + { + "id": "doubao-seed-2-0-mini", + "name": "Doubao-Seed-2.0-mini", + "description": "Doubao 2.0 series is designed for low-latency, high-concurrency, and cost-sensitive scenarios, emphasizing fast responses and flexible inference deployment. Model performance is comparable to Doubao-Seed-1.6. It supports a 256k context window, four levels of thinking length, and multimodal understanding, making it suitable for lightweight tasks that prioritize cost and speed.", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "video-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.03 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.28 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.01 + } + }, + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "bytedance", + "openWeights": false + }, + { + "id": "doubao-seed-2-0-pro", + "name": "Doubao-Seed-2.0-pro", + "description": "Doubao flagship all-purpose general model, targeting complex reasoning and long-chain task execution scenarios in the Agent era. It emphasizes multimodal understanding, long-context reasoning, structured generation, and tool-augmented execution. It excels at handling complex instructions and multi-constraint execution, reliably addressing multi-step complex planning, intricate image-text reasoning, video content understanding, and high-difficulty analysis scenarios.", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "video-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.45 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.24 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.09 + } + }, + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "bytedance", + "openWeights": false + }, + { + "id": "doubao-seed-1-8", + "name": "Doubao-Seed-1.8", + "description": "Doubao's strongest multimodal Agent model Seed1.8 has powerful multimodal capabilities, supports image and text input, and can efficiently and accurately complete tasks in scenarios such as information retrieval, code generation, GUI interaction, and complex workflows, meeting increasingly diverse technical demands.", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "video-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.11 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.28 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.02 + } + }, + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "bytedance", + "openWeights": false + }, + { + "id": "doubao-seed-2-0-lite", + "name": "Doubao-Seed-2.0-lite", + "description": "The Doubao 2.0 series is a balanced family of models targeted at high-frequency enterprise scenarios, balancing performance and cost, with overall capabilities surpassing the previous generation Doubao-Seed-1.8. They are suited for production tasks such as unstructured information processing, content creation, search and recommendation, and data analysis. The models support long-context processing, multi-source information fusion, multi-step instruction execution, and high-fidelity structured output. While ensuring stable performance, they significantly optimize costs.", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "video-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.51 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.02 + } + }, + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "bytedance", + "openWeights": false + }, + { + "id": "ernie-5-0-thinking-preview", + "name": "ERNIE 5.0", + "description": "The new generation Wenxin model, Wenxin 5.0, is a native full-modal large model that adopts native full-modal unified modeling technology, jointly modeling text, images, audio, and video, possessing comprehensive full-modal capabilities. Wenxin 5.0's basic abilities are comprehensively upgraded, performing excellently on benchmark test sets, especially in multimodal understanding, instruction compliance, creative writing, factual accuracy, intelligent agent planning, and tool application.", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "video-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.84 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.37 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.822 + } + }, + "ownedBy": "baidu", + "openWeights": false + }, + { + "id": "mixtral-8x7b-instruct-v0-1", + "name": "Mixtral-8x7B-Instruct-v0.1", + "capabilities": ["structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.7 + } + }, + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "mistral-instruct-v0-3", + "name": "Mistral-7B-Instruct-v0.3", + "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct v0.2](/models/mistralai/mistral-7b-instruct-v0.2), with the following changes:\n\n- Extended vocabulary to 32768\n- Supports v3 Tokenizer\n- Supports function calling\n\nNOTE: Support for function calling depends on the provider.", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 65536, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.11 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.11 + } + }, + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "llama-3_3-instruct", + "name": "Meta-Llama-3_3-70B-Instruct", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.74 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.74 + } + }, + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "v0-1-5-lg", + "name": "v0-1.5-lg", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 512000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 75 + } + }, + "family": "v0", + "ownedBy": "v0", + "openWeights": false + }, + { + "id": "qwen3-a22b-instruct", + "name": "Qwen3-235B-A22B-Instruct", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 64000, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "kimi-k2-5-nvfp4", + "name": "Kimi K2.5 (NVFP4)", + "capabilities": ["reasoning", "function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.55 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.19 + } + }, + "reasoning": { + "supportedEfforts": ["none", "auto"] + }, + "family": "kimi", + "ownedBy": "moonshot", + "openWeights": true + }, + { + "id": "qwen3-coder-a35b-instruct-turbo", + "name": "Qwen3 Coder 480B A35B Instruct Turbo", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 66536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "claude-4-opus", + "name": "Claude Opus 4", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 16.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 82.5 + } + }, + "family": "claude-opus", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-3-7-sonnet-latest", + "name": "Claude Sonnet 3.7 (Latest)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 16.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.33 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-sonnet", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "palmyra-x5", + "name": "Writer: Palmyra X5", + "description": "Palmyra X5 is Writer's most advanced model, purpose-built for building and scaling AI agents across the enterprise. It delivers industry-leading speed and efficiency on context windows up to 1 million tokens, powered by a novel transformer architecture and hybrid attention mechanisms. This enables faster inference and expanded memory for processing large volumes of enterprise data, critical for scaling AI agents.", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 1040000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 6 + } + }, + "ownedBy": "google", + "openWeights": false + }, + { + "id": "gemini-2-5-pro-preview", + "name": "Google: Gemini 2.5 Pro Preview 06-05", + "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.\n", + "capabilities": [ + "reasoning", + "function-call", + "file-input", + "image-recognition", + "audio-recognition", + "web-search" + ], + "inputModalities": ["image", "text", "audio"], + "outputModalities": ["text"], + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.125 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"], + "thinkingTokenLimits": { + "min": 128, + "max": 32768 + } + }, + "ownedBy": "google", + "openWeights": false + }, + { + "id": "gemini-2-0-flash-lite-001", + "name": "Google: Gemini 2.0 Flash Lite", + "description": "Gemini 2.0 Flash Lite offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5), all at extremely economical token prices.", + "capabilities": [ + "function-call", + "file-input", + "image-recognition", + "audio-recognition", + "video-recognition", + "web-search" + ], + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text"], + "contextWindow": 1048576, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.075 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.076 + } + }, + "ownedBy": "google", + "openWeights": false + }, + { + "id": "auto", + "name": "Kilo: Auto", + "description": "Your prompt will be processed by a meta-model and routed to one of dozens of models (see below), optimizing for the best possible output.\n\nTo see which model was used, visit [Activity](/activity), or read the `model` attribute of the response. Your response will be priced at the same rate as the routed model.\n\nLearn more, including how to customize the models for routing, in our [docs](/docs/guides/routing/routers/auto-router).\n\nRequests will be routed to the following models:\n- [anthropic/claude-haiku-4.5](/anthropic/claude-haiku-4.5)\n- [anthropic/claude-opus-4.6](/anthropic/claude-opus-4.6)\n- [anthropic/claude-sonnet-4.5](/anthropic/claude-sonnet-4.5)\n- [deepseek/deepseek-r1](/deepseek/deepseek-r1)\n- [google/gemini-2.5-flash-lite](/google/gemini-2.5-flash-lite)\n- [google/gemini-3-flash-preview](/google/gemini-3-flash-preview)\n- [google/gemini-3-pro-preview](/google/gemini-3-pro-preview)\n- [meta-llama/llama-3.3-70b-instruct](/meta-llama/llama-3.3-70b-instruct)\n- [mistralai/codestral-2508](/mistralai/codestral-2508)\n- [mistralai/mistral-large](/mistralai/mistral-large)\n- [mistralai/mistral-medium-3.1](/mistralai/mistral-medium-3.1)\n- [mistralai/mistral-small-3.2-24b-instruct-2506](/mistralai/mistral-small-3.2-24b-instruct-2506)\n- [moonshotai/kimi-k2-thinking](/moonshotai/kimi-k2-thinking)\n- [moonshotai/kimi-k2.5](/moonshotai/kimi-k2.5)\n- [openai/gpt-5](/openai/gpt-5)\n- [openai/gpt-5-mini](/openai/gpt-5-mini)\n- [openai/gpt-5-nano](/openai/gpt-5-nano)\n- [openai/gpt-5.1](/openai/gpt-5.1)\n- [openai/gpt-5.2](/openai/gpt-5.2)\n- [openai/gpt-5.2-pro](/openai/gpt-5.2-pro)\n- [openai/gpt-oss-120b](/openai/gpt-oss-120b)\n- [perplexity/sonar](/perplexity/sonar)\n- [qwen/qwen3-235b-a22b](/qwen/qwen3-235b-a22b)\n- [x-ai/grok-3](/x-ai/grok-3)\n- [x-ai/grok-3-mini](/x-ai/grok-3-mini)\n- [x-ai/grok-4](/x-ai/grok-4)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1 + } + }, + "family": "auto", + "ownedBy": "kilo", + "openWeights": false + }, + { + "id": "qwen3-a22b-2507", + "name": "Qwen: Qwen3 235B A22B Instruct 2507", + "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 52429, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.071 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.1 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "devstral", + "name": "Mistral: Devstral Medium", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 26215, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "mistral-large", + "name": "Mistral Large", + "description": "This is Mistral AI's flagship model, Mistral Large 2 (version `mistral-large-2407`). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 25600, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 6 + } + }, + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "llama-3_3-nemotron-super-v1_5", + "name": "Llama 3 3 Nemotron Super 49B V1 5", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "minimax-m2-5-official", + "name": "MiniMax M2.5", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 204800, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + }, + "reasoning": { + "supportedEfforts": [], + "interleaved": true + }, + "family": "minimax", + "ownedBy": "minimax", + "openWeights": false + }, + { + "id": "glm-5-original", + "name": "GLM 5 Original Thinking", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.8 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.56 + } + }, + "reasoning": { + "supportedEfforts": ["none", "auto"] + }, + "family": "glm", + "ownedBy": "zhipu", + "openWeights": true + }, + { + "id": "multilingual-e5-large", + "name": "Multilingual-E5-large", + "description": "The multilingual-e5-large embedding model encodes sentences, paragraphs, and documents across over 90 languages into a 1024-dimensional dense vector space, delivering robust semantic embeddings optimized for multilingual retrieval, cross-language similarity, and large-scale data search.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 512, + "maxOutputTokens": 1024, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.02 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "family": "text-embedding", + "ownedBy": "berget", + "openWeights": true + }, + { + "id": "bge-reranker-v2-m3", + "name": "bge-reranker-v2-m3", + "description": "BAAI/bge-reranker-v2-m3 is a lightweight multilingual reranking model. It is developed based on the bge-m3 model, offering strong multilingual capabilities, easy deployment, and fast inference. The model takes a query and documents as input and directly outputs similarity scores instead of embedding vectors. It is suitable for multilingual scenarios and performs particularly well in both Chinese and English processing.", + "capabilities": ["embedding", "rerank"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 512, + "maxOutputTokens": 512, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.1 + } + }, + "family": "bge", + "ownedBy": "baai", + "openWeights": true + }, + { + "id": "mistral-nemo-instruct-2407-fp8", + "name": "Mistral Nemo", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.49 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.71 + } + }, + "family": "mistral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "llama-3-3-instruct-fp8-dynamic", + "name": "Llama 3.3 70B", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.49 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.71 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "qwen3-vl-embedding", + "name": "Qwen3-VL Embedding 8B", + "capabilities": ["file-input", "image-recognition", "function-call", "embedding", "reasoning"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 32000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.09 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "qwen3-vl-a22b-instruct-fp8", + "name": "Qwen3-VL 235B", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 218000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.64 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.91 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "mistral-nemo-instruct", + "name": "Mistral Nemo 12B Instruct", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 16000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.038 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.1 + } + }, + "family": "mistral-nemo", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "osmosis-structure", + "name": "Osmosis Structure 0.6B", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 4000, + "maxOutputTokens": 2048, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "family": "osmosis", + "ownedBy": "inference", + "openWeights": true + }, + { + "id": "qwen-2-5-vision-instruct", + "name": "Qwen 2.5 7B Vision Instruct", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 125000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "gemini-3-0-flash-preview", + "name": "Gemini 3.0 Flash Preview", + "capabilities": [ + "reasoning", + "function-call", + "file-input", + "image-recognition", + "audio-recognition", + "video-recognition" + ], + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 64000, + "ownedBy": "google", + "openWeights": false + }, + { + "id": "qwen-max-2025-01-25", + "name": "Qwen2.5-Max-2025-01-25", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "claude-4-1-opus", + "name": "Claude 4.1 Opus", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 32000, + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "doubao-1-5-thinking-pro", + "name": "Doubao 1.5 Thinking Pro", + "description": "Doubao-1.5 is a brand-new deep thinking model that excels in specialized fields such as mathematics, programming, scientific reasoning, and general tasks like creative writing. It achieves or approaches the top-tier industry level on multiple authoritative benchmarks including AIME 2024, Codeforces, and GPQA. It supports a 128k context window and 16k output.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.62 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.48 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.62 + } + }, + "reasoning": { + "supportedEfforts": ["none", "high"] + }, + "ownedBy": "bytedance", + "openWeights": false + }, + { + "id": "kling-v2-6", + "name": "Kling-V2 6", + "capabilities": ["file-input", "image-recognition", "video-recognition", "video-generation"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["video"], + "contextWindow": 99999999, + "maxOutputTokens": 99999999, + "ownedBy": "kling", + "openWeights": false + }, + { + "id": "gemini-3-0-pro-preview", + "name": "Gemini 3.0 Pro Preview", + "capabilities": [ + "reasoning", + "function-call", + "file-input", + "image-recognition", + "audio-recognition", + "video-recognition" + ], + "inputModalities": ["text", "image", "video", "audio"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 64000, + "ownedBy": "google", + "openWeights": false + }, + { + "id": "doubao-seed-1-6-flash", + "name": "Doubao-Seed 1.6 Flash", + "description": "Doubao-Seed-1.6-flash is an extremely fast multimodal deep thinking model, with TPOT requiring only 10ms. It supports both text and visual understanding, with its text comprehension skills surpassing the previous generation lite model and its visual understanding on par with competitor's pro series models. It supports a 256k context window and an output length of up to 16k tokens.", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "video-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.044 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.44 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.0088 + } + }, + "ownedBy": "bytedance", + "openWeights": false + }, + { + "id": "claude-4-0-opus", + "name": "Claude 4.0 Opus", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 32000, + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "doubao-seed-1-6", + "name": "Doubao-Seed 1.6", + "description": "The Doubao-Seed-1.6-thinking model has significantly enhanced reasoning capabilities. Compared with Doubao-1.5-thinking-pro, it has further improvements in fundamental abilities such as coding, mathematics, and logical reasoning, and now also supports visual understanding. It supports a 256k context window, with output length supporting up to 16k tokens.", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "video-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.18 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.8 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.036 + } + }, + "reasoning": { + "supportedEfforts": ["none", "high"] + }, + "ownedBy": "bytedance", + "openWeights": false + }, + { + "id": "doubao-1-5-pro-32k", + "name": "Doubao 1.5 Pro 32k", + "description": "Doubao-1.5-pro, a brand-new generation of flagship model, features comprehensive performance upgrades and excels in knowledge, coding, reasoning, and other aspects. It supports a 32k context window and an output length of up to 12k tokens.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 12000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.134 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.335 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.0268 + } + }, + "ownedBy": "bytedance", + "openWeights": false + }, + { + "id": "gemini-3-0-pro-image-preview", + "name": "Gemini 3.0 Pro Image Preview", + "capabilities": ["file-input", "image-recognition", "image-generation", "function-call"], + "inputModalities": ["text", "image"], + "outputModalities": ["text", "image"], + "contextWindow": 32768, + "maxOutputTokens": 8192, + "ownedBy": "google", + "openWeights": false + }, + { + "id": "claude-4-0-sonnet", + "name": "Claude 4.0 Sonnet", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "doubao-1-5-vision-pro", + "name": "Doubao 1.5 Vision Pro", + "capabilities": ["file-input", "image-recognition", "video-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16000, + "ownedBy": "bytedance", + "openWeights": false + }, + { + "id": "qwen-vl-max-2025-01-25", + "name": "Qwen VL-MAX-2025-01-25", + "capabilities": ["function-call", "file-input", "image-recognition", "audio-recognition", "video-recognition"], + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "deepseek-math-v2", + "name": "Deepseek/Deepseek-Math-V2", + "description": "The mathematical reasoning of large language models has shifted from pursuing correct answers to ensuring rigorous processes. Research proposes a new paradigm of \"self-verification,\" training specialized verifiers to evaluate proof steps and using this to train generators for self-error correction. The two co-evolve, pushing the boundaries of capability. Ultimately, the model achieves gold medal level in top competitions like the IMO, demonstrating the great potential of deep reasoning.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 160000, + "maxOutputTokens": 160000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.492 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.968 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.0984 + } + }, + "ownedBy": "deepseek", + "openWeights": false + }, + { + "id": "deepseek-v3-2-251201", + "name": "Deepseek/DeepSeek-V3.2", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 32000, + "reasoning": { + "supportedEfforts": ["none"] + }, + "ownedBy": "deepseek", + "openWeights": false + }, + { + "id": "gelab-zero-preview", + "name": "Stepfun-Ai/Gelab Zero 4b Preview", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 4096, + "ownedBy": "qiniu-ai", + "openWeights": false + }, + { + "id": "autoglm-phone", + "name": "Z-Ai/Autoglm Phone 9b", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 12800, + "maxOutputTokens": 4096, + "ownedBy": "qiniu-ai", + "openWeights": false + }, + { + "id": "qwen3-a3b-2507", + "name": "Qwen3 30B A3B 2507", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 16384, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "k-exaone-a23b", + "name": "K EXAONE 236B A23B", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 262144, + "family": "exaone", + "ownedBy": "friendli", + "openWeights": true + }, + { + "id": "exaone-4-0-1", + "name": "EXAONE 4.0.1 32B", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1 + } + }, + "family": "exaone", + "ownedBy": "friendli", + "openWeights": true + }, + { + "id": "anthropic--claude-4-5-opus", + "name": "anthropic--claude-4.5-opus", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "family": "claude-opus", + "ownedBy": "sap-ai-core", + "openWeights": false + }, + { + "id": "anthropic--claude-3-5-sonnet", + "name": "anthropic--claude-3.5-sonnet", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "family": "claude-sonnet", + "ownedBy": "sap-ai-core", + "openWeights": false + }, + { + "id": "anthropic--claude-4-5-haiku", + "name": "anthropic--claude-4.5-haiku", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.1 + } + }, + "family": "claude-haiku", + "ownedBy": "sap-ai-core", + "openWeights": false + }, + { + "id": "anthropic--claude-4-opus", + "name": "anthropic--claude-4-opus", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 75 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "family": "claude-opus", + "ownedBy": "sap-ai-core", + "openWeights": false + }, + { + "id": "anthropic--claude-3-haiku", + "name": "anthropic--claude-3-haiku", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.03 + } + }, + "family": "claude-haiku", + "ownedBy": "sap-ai-core", + "openWeights": false + }, + { + "id": "anthropic--claude-3-sonnet", + "name": "anthropic--claude-3-sonnet", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "family": "claude-sonnet", + "ownedBy": "sap-ai-core", + "openWeights": false + }, + { + "id": "anthropic--claude-3-7-sonnet", + "name": "anthropic--claude-3.7-sonnet", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-sonnet", + "ownedBy": "sap-ai-core", + "openWeights": false + }, + { + "id": "anthropic--claude-4-5-sonnet", + "name": "anthropic--claude-4.5-sonnet", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "family": "claude-sonnet", + "ownedBy": "sap-ai-core", + "openWeights": false + }, + { + "id": "anthropic--claude-3-opus", + "name": "anthropic--claude-3-opus", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 75 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "family": "claude-opus", + "ownedBy": "sap-ai-core", + "openWeights": false + }, + { + "id": "anthropic--claude-4-sonnet", + "name": "anthropic--claude-4-sonnet", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "family": "claude-sonnet", + "ownedBy": "sap-ai-core", + "openWeights": false + }, + { + "id": "claude-opus-4-0", + "name": "Claude Opus 4 (latest)", + "description": "Alias \nclaude-opus-4-20250514", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 75 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 32000 + } + }, + "family": "claude-opus", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-3-5-sonnet-20241022", + "name": "Claude Sonnet 3.5 v2", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "family": "claude-sonnet", + "ownedBy": "anthropic", + "openWeights": false, + "alias": ["claude-3-5-sonnet", "claude-3-5-sonnet-latest"] + }, + { + "id": "claude-3-5-haiku-latest", + "name": "Claude Haiku 3.5 (latest)", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.8 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.08 + } + }, + "family": "claude-haiku", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-3-opus-20240229", + "name": "Claude Opus 3", + "description": "Claude’s previous generation strongest model", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 75 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "family": "claude-opus", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-3-5-haiku-20241022", + "name": "Claude Haiku 3.5", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.8 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.08 + } + }, + "family": "claude-haiku", + "ownedBy": "anthropic", + "openWeights": false, + "alias": ["claude-3-5-haiku", "claude-3-5-haiku-latest"] + }, + { + "id": "claude-sonnet-4-0", + "name": "Claude Sonnet 4 (latest)", + "description": "Claude Sonnet 4 is a significant upgrade to Sonnet 3.7, delivering superior performance in coding and reasoning with enhanced precision and control. Achieving a state-of-the-art 72.7% on SWE-bench, the model expertly balances advanced capability with computational efficiency. Key improvements include more reliable codebase navigation and complex instruction following, making it ideal for a wide range of applications, from routine coding to complex software development projects.", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "cacheWrite": { + "currency": "USD", + "perMillionTokens": 4.125 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-sonnet", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-3-sonnet-20240229", + "name": "Claude Sonnet 3", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "family": "claude-sonnet", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "deepseek-v3-2-fast", + "name": "DeepSeek-V3.2-Fast", + "description": "SophNet's exclusively developed DeepSeek V3.2 Fast is the high-TPS, high-speed version of DeepSeek V3.2, achieving up to 100t/s with faster response!", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.29 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.096 + } + }, + "reasoning": { + "supportedEfforts": ["none"] + }, + "family": "deepseek", + "ownedBy": "deepseek", + "openWeights": true + }, + { + "id": "qwen3-max-2026-01-23", + "name": "Qwen3 Max", + "description": "The snapshot version of the Tongyi Qianwen 3 series Max model is from January 23, 2026. By default, it does not require thinking, but thinking mode can be enabled through the enable_thinking parameter, as detailed in the code example. (After enabling thinking by passing parameters, it becomes: Qwen3-Max-Thinking). This model has a total parameter count exceeding one trillion (1T) and a pre-training data volume of up to 36T Tokens, making it the largest and most powerful reasoning model from Alibaba to date.", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.34 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.37 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.34246 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "duo-chat-gpt-5-1", + "name": "Agentic Chat (GPT-5.1)", + "capabilities": [ + "reasoning", + "function-call", + "structured-output", + "file-input", + "image-recognition", + "web-search" + ], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 400000, + "maxOutputTokens": 128000, + "family": "gpt", + "ownedBy": "gitlab", + "openWeights": false + }, + { + "id": "duo-chat-sonnet-4-6", + "name": "Agentic Chat (Claude Sonnet 4.6)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "family": "claude-sonnet", + "ownedBy": "gitlab", + "openWeights": false + }, + { + "id": "duo-chat-opus-4-5", + "name": "Agentic Chat (Claude Opus 4.5)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "family": "claude-opus", + "ownedBy": "gitlab", + "openWeights": false + }, + { + "id": "duo-chat-sonnet-4-5", + "name": "Agentic Chat (Claude Sonnet 4.5)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "family": "claude-sonnet", + "ownedBy": "gitlab", + "openWeights": false + }, + { + "id": "duo-chat-gpt-5-mini", + "name": "Agentic Chat (GPT-5 Mini)", + "capabilities": [ + "reasoning", + "function-call", + "structured-output", + "file-input", + "image-recognition", + "web-search" + ], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 400000, + "maxOutputTokens": 128000, + "family": "gpt-mini", + "ownedBy": "gitlab", + "openWeights": false + }, + { + "id": "duo-chat-opus-4-6", + "name": "Agentic Chat (Claude Opus 4.6)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "family": "claude-opus", + "ownedBy": "gitlab", + "openWeights": false + }, + { + "id": "duo-chat-gpt-5-2", + "name": "Agentic Chat (GPT-5.2)", + "capabilities": [ + "reasoning", + "function-call", + "structured-output", + "file-input", + "image-recognition", + "web-search" + ], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 400000, + "maxOutputTokens": 128000, + "family": "gpt", + "ownedBy": "gitlab", + "openWeights": false + }, + { + "id": "duo-chat-gpt-5-2-codex", + "name": "Agentic Chat (GPT-5.2 Codex)", + "capabilities": [ + "reasoning", + "function-call", + "structured-output", + "file-input", + "image-recognition", + "web-search" + ], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 400000, + "maxOutputTokens": 128000, + "family": "gpt-codex", + "ownedBy": "gitlab", + "openWeights": false + }, + { + "id": "duo-chat-gpt-5-codex", + "name": "Agentic Chat (GPT-5 Codex)", + "capabilities": ["reasoning", "function-call", "structured-output", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 400000, + "maxOutputTokens": 128000, + "family": "gpt-codex", + "ownedBy": "gitlab", + "openWeights": false + }, + { + "id": "duo-chat-haiku-4-5", + "name": "Agentic Chat (Claude Haiku 4.5)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "family": "claude-haiku", + "ownedBy": "gitlab", + "openWeights": false + }, + { + "id": "magistral-small-2506", + "name": "Magistral Small 2506", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.25 + } + }, + "family": "magistral-small", + "ownedBy": "mistral", + "openWeights": false + }, + { + "id": "mistral-large-instruct-2411", + "name": "Mistral Large Instruct 2411", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 6 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1 + } + }, + "family": "mistral-large", + "ownedBy": "mistral", + "openWeights": false + }, + { + "id": "qwen3-coder-a35b-instruct-int4-mixed-ar", + "name": "Qwen 3 Coder 480B", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 106000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.22 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.95 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.11 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "llama-4-scout-16e-instruct-fp8", + "name": "Llama-4-Scout-17B-16E-Instruct-FP8", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "family": "llama", + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "cerebras-llama-4-scout-16e-instruct", + "name": "Cerebras-Llama-4-Scout-17B-16E-Instruct", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "family": "llama", + "ownedBy": "llama", + "openWeights": true + }, + { + "id": "cerebras-llama-4-maverick-128e-instruct", + "name": "Cerebras-Llama-4-Maverick-17B-128E-Instruct", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "family": "llama", + "ownedBy": "llama", + "openWeights": true + }, + { + "id": "pixtral-2409", + "name": "Pixtral 12B 2409", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "family": "pixtral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "us.anthropic.claude-sonnet-4-5-20250929-v1:0", + "name": "Claude Sonnet 4.5 (US)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-sonnet", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "cohere.command-r-plus-v1:0", + "name": "Command R+", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + }, + "family": "command-r", + "ownedBy": "amazon-bedrock", + "openWeights": true + }, + { + "id": "anthropic.claude-v2", + "name": "Claude 2", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 100000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 8 + }, + "output": { + "currency": "USD", + "perMillionTokens": 24 + } + }, + "family": "claude", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "anthropic.claude-3-7-sonnet-20250219-v1:0", + "name": "Claude Sonnet 3.7", + "capabilities": ["function-call", "file-input", "image-recognition", "reasoning", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-sonnet", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "anthropic.claude-sonnet-4-20250514-v1:0", + "name": "Claude Sonnet 4", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-sonnet", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "qwen.qwen3-coder-a3b-v1:0", + "name": "Qwen3 Coder 30B A3B Instruct", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "google.gemma-3-it", + "name": "Gemma 3 4B IT", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.04 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.08 + } + }, + "family": "gemma", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "minimax.minimax-m2", + "name": "MiniMax M2", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 204608, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + }, + "family": "minimax", + "ownedBy": "minimax", + "openWeights": true + }, + { + "id": "zai.glm-4-7", + "name": "GLM-4.7", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 204800, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.2 + } + }, + "family": "glm", + "ownedBy": "amazon-bedrock", + "openWeights": true + }, + { + "id": "meta.llama3-2-instruct-v1:0", + "name": "Llama 3.2 11B Instruct", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.16 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.16 + } + }, + "family": "llama", + "ownedBy": "amazon-bedrock", + "openWeights": true + }, + { + "id": "qwen.qwen3-next-a3b", + "name": "Qwen/Qwen3-Next-80B-A3B-Instruct", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262000, + "maxOutputTokens": 262000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.14 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.4 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "anthropic.claude-3-haiku-20240307-v1:0", + "name": "Claude Haiku 3", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.25 + } + }, + "family": "claude-haiku", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "qwen.qwen3-vl-a22b", + "name": "Qwen/Qwen3-VL-235B-A22B-Instruct", + "capabilities": ["function-call", "structured-output", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 262000, + "maxOutputTokens": 262000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "us.anthropic.claude-opus-4-6-v1", + "name": "Claude Opus 4.6 (US)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 32000 + } + }, + "family": "claude-opus", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "anthropic.claude-v2:1", + "name": "Claude 2.1", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 8 + }, + "output": { + "currency": "USD", + "perMillionTokens": 24 + } + }, + "family": "claude", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "deepseek.v3-v1:0", + "name": "DeepSeek-V3.1", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 163840, + "maxOutputTokens": 81920, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.58 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.68 + } + }, + "family": "deepseek", + "ownedBy": "deepseek", + "openWeights": true + }, + { + "id": "anthropic.claude-opus-4-5-20251101-v1:0", + "name": "Claude Opus 4.5", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-opus", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "cohere.command-light-text-v14", + "name": "Command Light", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 4096, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "family": "command-light", + "ownedBy": "amazon-bedrock", + "openWeights": true + }, + { + "id": "mistral.mistral-large-2402-v1:0", + "name": "Mistral Large (24.02)", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "family": "mistral-large", + "ownedBy": "mistral", + "openWeights": false + }, + { + "id": "anthropic.claude-sonnet-4-6", + "name": "Claude Sonnet 4.6", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-sonnet", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "nvidia.nemotron-nano-v2", + "name": "NVIDIA Nemotron Nano 12B v2 VL BF16", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "family": "nemotron", + "ownedBy": "nvidia", + "openWeights": false + }, + { + "id": "ai21.jamba-1-5-large-v1:0", + "name": "Jamba 1.5 Large", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 8 + } + }, + "family": "jamba", + "ownedBy": "amazon-bedrock", + "openWeights": true + }, + { + "id": "eu.anthropic.claude-haiku-4-5-20251001-v1:0", + "name": "Claude Haiku 4.5 (EU)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.1 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-haiku", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "us.anthropic.claude-sonnet-4-20250514-v1:0", + "name": "Claude Sonnet 4 (US)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-sonnet", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "global.anthropic.claude-sonnet-4-20250514-v1:0", + "name": "Claude Sonnet 4 (Global)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-sonnet", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "global.anthropic.claude-haiku-4-5-20251001-v1:0", + "name": "Claude Haiku 4.5 (Global)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.1 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-haiku", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "meta.llama3-3-instruct-v1:0", + "name": "Llama 3.3 70B Instruct", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.72 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.72 + } + }, + "family": "llama", + "ownedBy": "amazon-bedrock", + "openWeights": true + }, + { + "id": "us.anthropic.claude-opus-4-5-20251101-v1:0", + "name": "Claude Opus 4.5 (US)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-opus", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "anthropic.claude-opus-4-6-v1", + "name": "Claude Opus 4.6", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 32000 + } + }, + "family": "claude-opus", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "anthropic.claude-3-opus-20240229-v1:0", + "name": "Claude Opus 3", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 75 + } + }, + "family": "claude-opus", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "writer.palmyra-x4-v1:0", + "name": "Palmyra X4", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 122880, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + }, + "family": "palmyra", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "moonshotai.kimi-k2-5", + "name": "Kimi K2.5", + "capabilities": ["reasoning", "function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 256000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3 + } + }, + "reasoning": { + "supportedEfforts": ["none", "auto"] + }, + "ownedBy": "moonshot", + "openWeights": true + }, + { + "id": "amazon.nova-pro-v1:0", + "name": "Nova Pro", + "capabilities": ["function-call", "file-input", "image-recognition", "video-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 300000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.8 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "family": "nova-pro", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "eu.anthropic.claude-sonnet-4-6", + "name": "Claude Sonnet 4.6 (EU)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-sonnet", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "global.anthropic.claude-sonnet-4-6", + "name": "Claude Sonnet 4.6 (Global)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-sonnet", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "meta.llama3-1-instruct-v1:0", + "name": "Llama 3.1 8B Instruct", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.22 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.22 + } + }, + "family": "llama", + "ownedBy": "amazon-bedrock", + "openWeights": true + }, + { + "id": "us.anthropic.claude-opus-4-1-20250805-v1:0", + "name": "Claude Opus 4.1 (US)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 75 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 32000 + } + }, + "family": "claude-opus", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "openai.gpt-oss-1:0", + "name": "gpt-oss-120b", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "family": "gpt-oss", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "qwen.qwen3-v1:0", + "name": "Qwen3 32B (dense)", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 16384, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "name": "Claude Sonnet 3.5", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "family": "claude-sonnet", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "anthropic.claude-haiku-4-5-20251001-v1:0", + "name": "Claude Haiku 4.5", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.1 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-haiku", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "cohere.command-r-v1:0", + "name": "Command R", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "family": "command-r", + "ownedBy": "amazon-bedrock", + "openWeights": true + }, + { + "id": "mistral.voxtral-small-2507", + "name": "Voxtral Small 24B 2507", + "capabilities": ["function-call", "file-input", "audio-recognition"], + "inputModalities": ["text", "audio"], + "outputModalities": ["text"], + "contextWindow": 32000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.35 + } + }, + "family": "mistral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "amazon.nova-micro-v1:0", + "name": "Nova Micro", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.035 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.14 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.00875 + } + }, + "family": "nova-micro", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "meta.llama3-instruct-v1:0", + "name": "Llama 3 70B Instruct", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 2048, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.65 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.5 + } + }, + "family": "llama", + "ownedBy": "amazon-bedrock", + "openWeights": true + }, + { + "id": "global.anthropic.claude-opus-4-6-v1", + "name": "Claude Opus 4.6 (Global)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 32000 + } + }, + "family": "claude-opus", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "deepseek.r1-v1:0", + "name": "DeepSeek-R1", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.35 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5.4 + } + }, + "family": "deepseek-thinking", + "ownedBy": "deepseek", + "openWeights": false + }, + { + "id": "anthropic.claude-3-5-sonnet-20241022-v2:0", + "name": "Claude Sonnet 3.5 v2", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "family": "claude-sonnet", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "minimax.minimax-m2-1", + "name": "MiniMax M2.1", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 204800, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + }, + "family": "minimax", + "ownedBy": "minimax", + "openWeights": true + }, + { + "id": "mistral.ministral-3-instruct", + "name": "Ministral 3 8B", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.15 + } + }, + "family": "ministral", + "ownedBy": "mistral", + "openWeights": false + }, + { + "id": "writer.palmyra-x5-v1:0", + "name": "Palmyra X5", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 1040000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 6 + } + }, + "family": "palmyra", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "cohere.command-text-v14", + "name": "Command", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 4096, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "command", + "ownedBy": "amazon-bedrock", + "openWeights": true + }, + { + "id": "us.anthropic.claude-haiku-4-5-20251001-v1:0", + "name": "Claude Haiku 4.5 (US)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.1 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-haiku", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "anthropic.claude-opus-4-20250514-v1:0", + "name": "Claude Opus 4", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 75 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 32000 + } + }, + "family": "claude-opus", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "mistral.voxtral-mini-2507", + "name": "Voxtral Mini 3B 2507", + "capabilities": ["function-call", "audio-recognition"], + "inputModalities": ["audio", "text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.04 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.04 + } + }, + "family": "mistral", + "ownedBy": "mistral", + "openWeights": false + }, + { + "id": "global.anthropic.claude-sonnet-4-5-20250929-v1:0", + "name": "Claude Sonnet 4.5 (Global)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-sonnet", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "global.anthropic.claude-opus-4-5-20251101-v1:0", + "name": "Claude Opus 4.5 (Global)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-opus", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "amazon.nova-2-lite-v1:0", + "name": "Nova 2 Lite", + "capabilities": ["function-call", "image-recognition", "video-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.33 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.75 + } + }, + "family": "nova", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "qwen.qwen3-coder-a35b-v1:0", + "name": "Qwen3 Coder 480B A35B Instruct", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.22 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.8 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "anthropic.claude-sonnet-4-5-20250929-v1:0", + "name": "Claude Sonnet 4.5", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-sonnet", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "openai.gpt-oss-safeguard", + "name": "GPT OSS Safeguard 20B", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.07 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "family": "gpt-oss", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "anthropic.claude-instant-v1", + "name": "Claude Instant", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 100000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.8 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.4 + } + }, + "family": "claude", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "amazon.nova-premier-v1:0", + "name": "Nova Premier", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "video-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 12.5 + } + }, + "family": "nova", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "mistral.mistral-instruct-v0:2", + "name": "Mistral-7B-Instruct-v0.3", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 127000, + "maxOutputTokens": 127000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.11 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.11 + } + }, + "family": "mistral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "eu.anthropic.claude-sonnet-4-20250514-v1:0", + "name": "Claude Sonnet 4 (EU)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-sonnet", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "mistral.mixtral-8x7b-instruct-v0:1", + "name": "Mixtral-8x7B-Instruct-v0.1", + "capabilities": ["structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.7 + } + }, + "family": "mixtral", + "ownedBy": "mistral", + "openWeights": true + }, + { + "id": "anthropic.claude-opus-4-1-20250805-v1:0", + "name": "Claude Opus 4.1", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 75 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 32000 + } + }, + "family": "claude-opus", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "meta.llama4-scout-instruct-v1:0", + "name": "Llama 4 Scout 17B Instruct", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 3500000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.17 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.66 + } + }, + "family": "llama", + "ownedBy": "amazon-bedrock", + "openWeights": true + }, + { + "id": "ai21.jamba-1-5-mini-v1:0", + "name": "Jamba 1.5 Mini", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.4 + } + }, + "family": "jamba", + "ownedBy": "amazon-bedrock", + "openWeights": true + }, + { + "id": "amazon.titan-text-express-v1:0:8k", + "name": "Titan Text G1 - Express", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "family": "titan", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "anthropic.claude-3-sonnet-20240229-v1:0", + "name": "Claude Sonnet 3", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + }, + "family": "claude-sonnet", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "eu.anthropic.claude-sonnet-4-5-20250929-v1:0", + "name": "Claude Sonnet 4.5 (EU)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-sonnet", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "amazon.titan-text-express-v1", + "name": "Titan Text G1 - Express", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "family": "titan", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "meta.llama4-maverick-instruct-v1:0", + "name": "Llama 4 Maverick 17B Instruct", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.24 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.97 + } + }, + "family": "llama", + "ownedBy": "amazon-bedrock", + "openWeights": true + }, + { + "id": "us.anthropic.claude-sonnet-4-6", + "name": "Claude Sonnet 4.6 (US)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-sonnet", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "qwen.qwen3-a22b-2507-v1:0", + "name": "Qwen3 235B A22B 2507", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.22 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.88 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "us.anthropic.claude-opus-4-20250514-v1:0", + "name": "Claude Opus 4 (US)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 75 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 32000 + } + }, + "family": "claude-opus", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "amazon.nova-lite-v1:0", + "name": "Nova Lite", + "capabilities": ["function-call", "file-input", "image-recognition", "video-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 300000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.06 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.24 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.015 + } + }, + "family": "nova-lite", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "anthropic.claude-3-5-haiku-20241022-v1:0", + "name": "Claude Haiku 3.5", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.8 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.08 + } + }, + "family": "claude-haiku", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "moonshot.kimi-k2", + "name": "Kimi K2 Thinking", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 256000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.5 + } + }, + "ownedBy": "moonshot", + "openWeights": true + }, + { + "id": "zai.glm-4-7-flash", + "name": "GLM-4.7-Flash", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.07 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.4 + } + }, + "family": "glm-flash", + "ownedBy": "amazon-bedrock", + "openWeights": true + }, + { + "id": "eu.anthropic.claude-opus-4-5-20251101-v1:0", + "name": "Claude Opus 4.5 (EU)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "family": "claude-opus", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "deepseek.v3-2-v1:0", + "name": "DeepSeek-V3.2", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 163840, + "maxOutputTokens": 81920, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.62 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.85 + } + }, + "family": "deepseek", + "ownedBy": "deepseek", + "openWeights": true + }, + { + "id": "eu.anthropic.claude-opus-4-6-v1", + "name": "Claude Opus 4.6 (EU)", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 32000 + } + }, + "family": "claude-opus", + "ownedBy": "amazon-bedrock", + "openWeights": false + }, + { + "id": "ideogram", + "name": "Ideogram", + "capabilities": ["function-call", "file-input", "image-recognition", "image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "contextWindow": 150, + "family": "ideogram", + "ownedBy": "ideogram", + "openWeights": false + }, + { + "id": "ideogram-v2a", + "name": "Ideogram-v2a", + "capabilities": ["function-call", "file-input", "image-generation"], + "inputModalities": ["text"], + "outputModalities": ["image"], + "contextWindow": 150, + "family": "ideogram", + "ownedBy": "ideogram", + "openWeights": false + }, + { + "id": "ideogram-v2a-turbo", + "name": "Ideogram-v2a-Turbo", + "capabilities": ["function-call", "file-input", "image-generation"], + "inputModalities": ["text"], + "outputModalities": ["image"], + "contextWindow": 150, + "family": "ideogram", + "ownedBy": "ideogram", + "openWeights": false + }, + { + "id": "ideogram-v2", + "name": "Ideogram-v2", + "capabilities": ["function-call", "file-input", "image-recognition", "image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "contextWindow": 150, + "family": "ideogram", + "ownedBy": "ideogram", + "openWeights": false + }, + { + "id": "runway", + "name": "Runway", + "capabilities": ["function-call", "file-input", "image-recognition", "video-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["video"], + "contextWindow": 256, + "family": "runway", + "ownedBy": "runway", + "openWeights": false + }, + { + "id": "runway-gen-4-turbo", + "name": "Runway-Gen-4-Turbo", + "capabilities": ["function-call", "file-input", "image-recognition", "video-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["video"], + "contextWindow": 256, + "family": "runway", + "ownedBy": "runway", + "openWeights": false + }, + { + "id": "claude-code", + "name": "claude-code", + "capabilities": ["reasoning", "function-call", "file-input"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "elevenlabs-v3", + "name": "ElevenLabs-v3", + "capabilities": ["function-call", "file-input", "audio-generation"], + "inputModalities": ["text"], + "outputModalities": ["audio"], + "contextWindow": 128000, + "family": "elevenlabs", + "ownedBy": "elevenlabs", + "openWeights": false + }, + { + "id": "elevenlabs-music", + "name": "ElevenLabs-Music", + "capabilities": ["function-call", "file-input", "audio-generation"], + "inputModalities": ["text"], + "outputModalities": ["audio"], + "contextWindow": 2000, + "family": "elevenlabs", + "ownedBy": "elevenlabs", + "openWeights": false + }, + { + "id": "elevenlabs-v2-5-turbo", + "name": "ElevenLabs-v2.5-Turbo", + "capabilities": ["function-call", "file-input", "audio-generation"], + "inputModalities": ["text"], + "outputModalities": ["audio"], + "contextWindow": 128000, + "family": "elevenlabs", + "ownedBy": "elevenlabs", + "openWeights": false + }, + { + "id": "gemini-deep-research", + "name": "gemini-deep-research", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition", "video-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 1048576, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 9.6 + } + }, + "ownedBy": "google", + "openWeights": false + }, + { + "id": "nano-banana", + "name": "Nano-Banana", + "capabilities": ["function-call", "file-input", "image-recognition", "image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["text", "image"], + "contextWindow": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.21 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.8 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.021 + } + }, + "family": "nano-banana", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "imagen-4", + "name": "Imagen-4", + "capabilities": ["function-call", "file-input", "image-generation"], + "inputModalities": ["text"], + "outputModalities": ["image"], + "contextWindow": 480, + "family": "imagen", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "imagen-3", + "name": "Imagen-3", + "capabilities": ["function-call", "file-input", "image-generation", "video-generation"], + "inputModalities": ["text"], + "outputModalities": ["image"], + "contextWindow": 480, + "family": "imagen", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "imagen-4-ultra", + "name": "Imagen-4-Ultra", + "capabilities": ["function-call", "file-input", "image-generation"], + "inputModalities": ["text"], + "outputModalities": ["image"], + "contextWindow": 480, + "family": "imagen", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "veo-3-1", + "name": "Veo-3.1", + "capabilities": ["function-call", "file-input", "video-generation"], + "inputModalities": ["text"], + "outputModalities": ["video"], + "contextWindow": 480, + "family": "veo", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "imagen-3-fast", + "name": "Imagen-3-Fast", + "capabilities": ["function-call", "file-input", "image-generation", "video-generation"], + "inputModalities": ["text"], + "outputModalities": ["image"], + "contextWindow": 480, + "family": "imagen", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "lyria", + "name": "Lyria", + "capabilities": ["function-call", "file-input", "audio-generation"], + "inputModalities": ["text"], + "outputModalities": ["audio"], + "family": "lyria", + "ownedBy": "poe", + "openWeights": false + }, + { + "id": "veo-3", + "name": "Veo-3", + "description": "veo3 reverse access with a total cost of just $0.41 per video generation., OpenAI chat port compatible format.", + "capabilities": ["function-call", "file-input", "video-generation"], + "inputModalities": ["text"], + "outputModalities": ["video"], + "contextWindow": 480, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "veo", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "veo-3-fast", + "name": "Veo-3-Fast", + "capabilities": ["function-call", "file-input", "video-generation"], + "inputModalities": ["text"], + "outputModalities": ["video"], + "contextWindow": 480, + "family": "veo", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "imagen-4-fast", + "name": "Imagen-4-Fast", + "capabilities": ["function-call", "file-input", "image-generation"], + "inputModalities": ["text"], + "outputModalities": ["image"], + "contextWindow": 480, + "family": "imagen", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "veo-2", + "name": "Veo-2", + "capabilities": ["function-call", "file-input", "video-generation"], + "inputModalities": ["text"], + "outputModalities": ["video"], + "contextWindow": 480, + "family": "veo", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "nano-banana-pro", + "name": "Nano-Banana-Pro", + "capabilities": ["function-call", "file-input", "image-recognition", "image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "contextWindow": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 12 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "family": "nano-banana", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "veo-3-1-fast", + "name": "Veo-3.1-Fast", + "capabilities": ["function-call", "file-input", "image-recognition", "video-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["video"], + "contextWindow": 480, + "family": "veo", + "ownedBy": "google", + "openWeights": false + }, + { + "id": "gpt-5-2-instant", + "name": "GPT-5.2-Instant", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search", "reasoning"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 13 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.16 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high", "max"] + }, + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "sora-2", + "name": "Sora-2", + "description": "Sora-2 is the next-generation text-to-video model evolved from Sora, optimized for higher visual realism, stronger physical consistency, and longer temporal coherence. It delivers more stable character consistency, complex motion rendering, camera control, and narrative continuity, while supporting higher resolutions and minute-level video generation for film production, advertising, virtual content creation, and creative multimedia workflows.", + "capabilities": ["function-call", "file-input", "image-recognition", "video-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["video"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "sora", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-3-5-turbo-raw", + "name": "GPT-3.5-Turbo-Raw", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 4524, + "maxOutputTokens": 2048, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.45 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.4 + } + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-4o-search", + "name": "GPT-4o-Search", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 9 + } + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-image-1-5", + "name": "gpt-image-1.5", + "description": "GPT Image 1.5 is a new image generation model powered by OpenAI’s flagship visual capabilities, comprehensively upgraded for high-quality creative and production workflows. It delivers significant improvements in instruction understanding, fine-grained image editing, and detail preservation, while achieving up to 4× faster generation compared to previous versions — reducing latency without compromising quality.\n\nGPT Image 1.5 is well suited for image generation, precise visual editing, and professional content creation, balancing performance with efficiency.", + "capabilities": ["function-call", "file-input", "image-recognition", "image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "contextWindow": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 5 + } + }, + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-image-1-mini", + "name": "GPT-Image-1-Mini", + "description": "OpenAI image generation model gpt-image-1-mini\nBefore use, please run pip install -U openai to upgrade to the latest openai package.", + "capabilities": ["function-call", "file-input", "image-recognition", "image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 40 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 5 + } + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-4o-aug", + "name": "GPT-4o-Aug", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 9 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.1 + } + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-image-1", + "name": "GPT-Image-1", + "description": "Azure OpenAI’s gpt-image-1 image generation API offers both text-to-image generation and image-to-image editing with text guidance capabilities.\nBefore using this API, please ensure you have the latest OpenAI package installed by running pip install -U openai.", + "capabilities": ["function-call", "file-input", "image-recognition", "image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "contextWindow": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 40 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 5 + } + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-4-classic-0314", + "name": "GPT-4-Classic-0314", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 27 + }, + "output": { + "currency": "USD", + "perMillionTokens": 54 + } + }, + "family": "gpt", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "dall-e-3", + "name": "DALL-E-3", + "description": "dall-e-3 is an AI image generation model that converts natural language prompts into realistic visuals and artistic content. It delivers accurate semantic understanding, supports customizable output resolutions, and produces high-quality images across a wide range of styles, making it well-suited for concept design, creative prototyping, and professional content workflows.", + "capabilities": ["function-call", "file-input", "image-generation"], + "inputModalities": ["text"], + "outputModalities": ["image"], + "contextWindow": 800, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 40 + }, + "output": { + "currency": "USD", + "perMillionTokens": 40 + } + }, + "family": "dall-e", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "sora-2-pro", + "name": "Sora-2-Pro", + "description": "OpenAI video model Sora2-pro official API.", + "capabilities": ["function-call", "file-input", "image-recognition", "video-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["video"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "family": "sora", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "gpt-4o-mini-search", + "name": "GPT-4o-mini-Search", + "capabilities": ["function-call", "file-input", "image-recognition", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.14 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.54 + } + }, + "family": "gpt-mini", + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "stablediffusionxl", + "name": "StableDiffusionXL", + "capabilities": ["function-call", "file-input", "image-recognition", "image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "contextWindow": 200, + "family": "stable-diffusion", + "ownedBy": "poe", + "openWeights": false + }, + { + "id": "topazlabs", + "name": "TopazLabs", + "capabilities": ["function-call", "file-input", "image-generation"], + "inputModalities": ["text"], + "outputModalities": ["image"], + "contextWindow": 204, + "family": "topazlabs", + "ownedBy": "poe", + "openWeights": false + }, + { + "id": "ray2", + "name": "Ray2", + "capabilities": ["function-call", "file-input", "image-recognition", "video-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["video"], + "contextWindow": 5000, + "family": "ray", + "ownedBy": "poe", + "openWeights": false + }, + { + "id": "claude-sonnet-3-7", + "name": "Claude-Sonnet-3.7", + "capabilities": ["reasoning", "function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 196608, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 13 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.26 + } + }, + "family": "claude-sonnet", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-haiku-3", + "name": "Claude-Haiku-3", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 189096, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.21 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.1 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.021 + } + }, + "family": "claude-haiku", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-sonnet-3-5", + "name": "Claude-Sonnet-3.5", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 189096, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 13 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.26 + } + }, + "family": "claude-sonnet", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-haiku-3-5", + "name": "Claude-Haiku-3.5", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 189096, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.68 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.068 + } + }, + "family": "claude-haiku", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "claude-sonnet-3-5-june", + "name": "Claude-Sonnet-3.5-June", + "capabilities": ["function-call", "file-input", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 189096, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 13 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.26 + } + }, + "family": "claude-sonnet", + "ownedBy": "anthropic", + "openWeights": false + }, + { + "id": "tako", + "name": "Tako", + "capabilities": ["function-call", "file-input"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 2048, + "family": "tako", + "ownedBy": "poe", + "openWeights": false + }, + { + "id": "glm-4-7-n", + "name": "glm-4.7-n", + "capabilities": ["reasoning", "function-call", "file-input"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 205000, + "maxOutputTokens": 131072, + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "ownedBy": "zhipu", + "openWeights": false + }, + { + "id": "llama-3-1-cs", + "name": "llama-3.1-8b-cs", + "capabilities": ["function-call", "file-input"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "ownedBy": "meta", + "openWeights": false + }, + { + "id": "gpt-oss-cs", + "name": "gpt-oss-120b-cs", + "capabilities": ["reasoning", "function-call", "file-input"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "openai", + "openWeights": false + }, + { + "id": "qwen3-2507-cs", + "name": "qwen3-235b-2507-cs", + "capabilities": ["reasoning", "function-call", "file-input"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "qwen3-cs", + "name": "qwen3-32b-cs", + "capabilities": ["reasoning", "function-call", "file-input"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba", + "openWeights": false + }, + { + "id": "llama-3-3-cs", + "name": "llama-3.3-70b-cs", + "capabilities": ["file-input"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "ownedBy": "meta", + "openWeights": false + }, + { + "id": "qwen-3-a22b-instruct-2507", + "name": "Qwen 3 235B Instruct", + "description": "cerebras", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + }, + "family": "qwen", + "ownedBy": "alibaba", + "openWeights": true + }, + { + "id": "llama3-1", + "name": "Llama 3.1 8B", + "description": "cerebras", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32000, + "maxOutputTokens": 8000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.1 + } + }, + "family": "llama", + "ownedBy": "meta", + "openWeights": true + }, + { + "id": "doubao-seed-2-0-code-preview", + "description": "The Doubao 2.0 series is a coding model optimized for real programming environments, capable of reliably invoking tools in common IDEs such as Claude Code. The model is specially optimized for frontend capabilities and performs well with common frontend frameworks. The model supports using Skills and can work with various custom skills.", + "capabilities": ["reasoning", "web-search", "function-call", "structured-output"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.4822 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.411 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.09644 + } + }, + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "bytedance" + }, + { + "id": "gpt-4-1-nano-free", + "description": "This free model API comes from the OpenAI model deployed on Azure. To prevent abuse, the external content filter provided by Azure has been enforced, which will result in additional delays. If you want to experience the full version of the model API without filters, please use the paid version and request the model name ID without \"-free\".", + "capabilities": ["function-call", "structured-output", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 1047576, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "openai" + }, + { + "id": "qwen3-vl-flash", + "description": "The Qwen3 series of compact visual-understanding models achieves an effective fusion of thinking mode and non-thinking mode, outperforming the open-source Qwen3-VL-30B-A3B with faster response speeds. It comprehensively upgrades image and video understanding, supporting ultra-long contexts such as long videos and long documents, spatial awareness, and universal object recognition; it also possesses visual 2D/3D localization capabilities and is capable of handling complex real-world tasks.", + "capabilities": ["function-call", "structured-output", "image-recognition", "reasoning"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 254000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.0206 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.206 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.00412 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-vl-flash-2026-01-22", + "description": "The Qwen3 series of compact visual-understanding models achieves an effective fusion of thinking mode and non-thinking mode, outperforming the open-source Qwen3-VL-30B-A3B with faster response speeds. It comprehensively upgrades image and video understanding, supporting ultra-long contexts such as long videos and long documents, spatial awareness, and universal object recognition; it also possesses visual 2D/3D localization capabilities and is capable of handling complex real-world tasks.", + "capabilities": ["function-call", "structured-output", "image-recognition", "reasoning"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 254000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.0206 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.206 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.0206 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "doubao-seedream-4-5", + "description": "Seedream 4.5 is ByteDance's latest multimodal image model, integrating capabilities such as text-to-image, image-to-image, and multi-image output, along with incorporating common sense and reasoning abilities. Compared to the previous 4.0 model, it significantly improves generation quality, offering better editing consistency and multi-image fusion effects, with more precise control over image details. The generation of small text and small faces is more natural.", + "capabilities": ["image-generation", "function-call"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "wan2-6-i2v", + "description": "Wan 2.6 - Text-to-Video generation features intelligent storyboard scheduling supporting multi-shot narration, higher quality sound generation, stable multi-person dialogue, more natural and realistic voice tones, and supports video generation up to 15 seconds in length.", + "capabilities": ["video-generation"], + "inputModalities": ["image", "text"], + "outputModalities": ["video"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "wan2-6-t2v", + "description": "Wan 2.6 - Text-to-Video generation features intelligent storyboard scheduling supporting multi-shot narration, higher quality sound generation, stable multi-person dialogue, more natural and realistic voice tones, and supports video generation up to 15 seconds in length.", + "capabilities": ["video-generation"], + "inputModalities": ["text"], + "outputModalities": ["video"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "gpt-4o-audio-preview", + "name": "OpenAI: GPT-4o Audio", + "description": "The gpt-4o-audio-preview model adds support for audio inputs as prompts. This enhancement allows the model to detect nuances within audio recordings and add depth to generated user experiences. Audio outputs are currently not supported. Audio tokens are priced at $40 per million input and $80 per million output audio tokens.", + "capabilities": [ + "function-call", + "structured-output", + "audio-recognition", + "audio-generation", + "image-recognition", + "web-search" + ], + "inputModalities": ["audio", "text"], + "outputModalities": ["text", "audio"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + }, + "ownedBy": "openai" + }, + { + "id": "gpt-4o-mini-audio-preview", + "capabilities": ["function-call", "image-recognition", "web-search"], + "inputModalities": ["text", "audio"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "ownedBy": "openai" + }, + { + "id": "flux-2-flex", + "description": "FLUX.2 is purpose-built for real-world creative production workflows. It delivers high-quality images while maintaining character and style consistency across multiple reference images, shows exceptional understanding and execution of structured prompts, and supports complex text reading and writing. It also adheres to brand guidelines, handles lighting, layout, and logo elements with stability, and enables image editing at resolutions up to 4MP — all while preserving fine details, striking a balance between creativity and professional-grade visual output.", + "capabilities": ["image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "bfl" + }, + { + "id": "flux-2-pro", + "description": "FLUX.2 is purpose-built for real-world creative production workflows. It delivers high-quality images while maintaining character and style consistency across multiple reference images, shows exceptional understanding and execution of structured prompts, and supports complex text reading and writing. It also adheres to brand guidelines, handles lighting, layout, and logo elements with stability, and enables image editing at resolutions up to 4MP — all while preserving fine details, striking a balance between creativity and professional-grade visual output.", + "capabilities": ["image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "bfl" + }, + { + "id": "jimeng-3-0-1080p", + "description": "DreamVideo 3.0 Pro is a professional-grade text-to-video and image-to-video model built on the Dream framework, delivering a major breakthrough in video generation quality. This version demonstrates strong performance across multiple dimensions, including narrative coherence, instruction following, dynamic fluidity, and visual detail. It supports multi-shot storytelling and generates 1080P high-definition videos with a professional cinematic texture. The model also enables diverse and expressive stylistic rendering, making it well suited for creative production and visual storytelling.", + "capabilities": ["video-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["video"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "id": "jimeng-3-0-720p", + "description": "DreamVideo 3.0 Pro is a professional-grade text-to-video and image-to-video model built on the Dream framework, delivering a major breakthrough in video generation quality. This version demonstrates strong performance across multiple dimensions, including narrative coherence, instruction following, dynamic fluidity, and visual detail. It supports multi-shot storytelling and generates 1080P high-definition videos with a professional cinematic texture. The model also enables diverse and expressive stylistic rendering, making it well suited for creative production and visual storytelling.", + "capabilities": ["video-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["video"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "id": "jimeng-3-0-pro", + "description": "DreamVideo 3.0 Pro is a professional-grade text-to-video and image-to-video model built on the Dream framework, delivering a major breakthrough in video generation quality. This version demonstrates strong performance across multiple dimensions, including narrative coherence, instruction following, dynamic fluidity, and visual detail. It supports multi-shot storytelling and generates 1080P high-definition videos with a professional cinematic texture. The model also enables diverse and expressive stylistic rendering, making it well suited for creative production and visual storytelling.", + "capabilities": ["video-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["video"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "id": "kimi-for-coding", + "description": "kimi-for-coding-free is a free and open version offered by AIHubMix specifically for Kimi users. To maintain stable service operations, the following usage limits apply: a maximum of 5 requests per minute 500 total requests per day, and a daily quota of 1 million tokens.", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 256000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.02 + } + }, + "ownedBy": "moonshot" + }, + { + "id": "qianfan-ocr", + "description": "Qianfan-OCR-Fast is a multimodal large model specialized for OCR, trained primarily on OCR-domain data while retaining appropriate general multimodal capabilities, and it outperforms Qianfan-OCR.", + "capabilities": ["file-input"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 32000, + "maxOutputTokens": 28000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.062 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.248 + } + } + }, + { + "id": "qianfan-ocr-fast", + "description": "Qianfan-OCR-Fast is a multimodal large model specialized for OCR, trained primarily on OCR-domain data while retaining appropriate general multimodal capabilities, and it outperforms Qianfan-OCR.", + "capabilities": ["file-input"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 32000, + "maxOutputTokens": 28000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.664 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.738336 + } + } + }, + { + "id": "wan2-2-i2v-plus", + "description": "The newly upgraded Tongyi Wanxiang 2.2 text-to-video offers higher video quality. It optimizes video generation stability and success rate, features stronger instruction-following capabilities, consistently maintains image text, portrait, and product consistency, and provides precise camera motion control.", + "capabilities": ["video-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["video"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "wan2-2-t2v-plus", + "description": "The newly upgraded Tongyi Wanxiang 2.2 text-to-video offers higher video quality. It can stably generate large-scale complex motions, supports cinematic-level visual performance and control, and features enhanced instruction-following capabilities to achieve realistic physical world reproduction.", + "capabilities": ["video-generation"], + "inputModalities": ["text"], + "outputModalities": ["video"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "wan2-5-i2v-preview", + "description": "Tongyi Wanxiang 2.5 - Text-to-Video Preview features a newly upgraded technical architecture, supporting sound generation synchronized with visuals, 10-second long video generation, stronger instruction-following capabilities, and further improvements in motion ability and visual quality.", + "capabilities": ["video-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["video"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "wan2-5-t2v-preview", + "description": "Tongyi Wanxiang 2.5 - Text-to-Video Preview, newly upgraded model architecture, supports sound generation synchronized with visuals, supports 10-second long video generation, enhanced instruction compliance, improved motion capability, and further enhanced visual quality.", + "capabilities": ["video-generation"], + "inputModalities": ["text"], + "outputModalities": ["video"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "deepseek-v3-1-fast", + "description": "The model provider is the Sophon platform. DeepSeek V3.1 Fast is the high-TPS speed version of DeepSeek V3.1.\nHybrid thinking mode: By modifying the chat template, a single model can simultaneously support both thinking and non-thinking modes.\nSmarter tool usage: Through post-training optimization, the model’s performance in tool utilization and agent tasks has improved significantly.", + "capabilities": ["function-call", "structured-output", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 163000, + "maxOutputTokens": 163000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.096 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.288 + } + }, + "reasoning": { + "supportedEfforts": ["none"] + }, + "ownedBy": "deepseek" + }, + { + "id": "deepseek-v3-fast", + "description": "V3 Ultra-Fast Version,The current price is a limited-time 50% discount and will return to the original price on July 31st. The original price is: input: $0.55/M, output: $2.2/M. The model provider is the Sophnet platform. DeepSeek V3 Fast is a high-TPS, ultra-fast version of DeepSeek V3 0324, featuring full-precision (non-quantized) performance, enhanced code and math capabilities, and faster responses!\n\nDeepSeek V3 0324 is a powerful Mixture-of-Experts (MoE) model with a total parameter count of 671B, activating 37B parameters per token.\nIt adopts Multi-Head Latent Attention (MLA) and the DeepSeekMoE architecture to achieve efficient inference and economical training costs.\nIt innovatively implements a load balancing strategy without auxiliary loss and sets multi-token prediction training targets to enhance performance.\nThe model is pre-trained on 14.8 trillion diverse, high-quality tokens and further optimized through supervised fine-tuning and reinforcement learning stages to fully realize its capabilities.\nComprehensive evaluations show that DeepSeek V3 outperforms other open-source models and rivals leading closed-source models in performance.\nThe entire training process only requires 2.788M H800 GPU hours and remains highly stable, with no irrecoverable loss spikes or rollbacks.", + "capabilities": ["function-call", "structured-output", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.56 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.24 + } + }, + "reasoning": { + "supportedEfforts": ["none"] + }, + "ownedBy": "deepseek" + }, + { + "id": "veo-2-0-generate-001", + "description": "Veo 2.0 is an advanced video generation model capable of producing high-quality videos based on text or image prompts. It excels in understanding real-world physics and human motion, resulting in fluid character movements and lifelike scenes. Veo 2.0 supports various visual styles and camera control options, including lens types, angles, and motion effects. Users can generate 8-second video clips at 720p resolution.", + "capabilities": ["video-generation"], + "inputModalities": ["video"], + "outputModalities": ["video"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "ownedBy": "google" + }, + { + "id": "veo3-1", + "description": "veo3.1 reverse model, and other available model names that can be requested include: veo3.1-pro and veo3.1-components. The price is currently tentatively set to be calculated per token, approximately $0.05 per request.", + "capabilities": ["video-generation"], + "inputModalities": ["text"], + "outputModalities": ["video"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 200 + }, + "output": { + "currency": "USD", + "perMillionTokens": 200 + } + }, + "ownedBy": "google" + }, + { + "id": "imagen-4-0", + "description": "Imagen 4 is a high-quality text-to-image model developed by Google, designed for strong visual fidelity, diverse artistic styles, and precise controllability. It delivers near photographic realism with sharp details and natural lighting while significantly reducing common artifacts such as distorted hands. The model supports a wide range of styles including photorealistic, illustration, anime, oil painting, and pixel art, and offers flexible aspect ratios for use cases from content covers to mobile wallpapers. It also enables image editing and secondary creation on existing images, provides fast and stable generation, and offers strong commercial usability with high visual quality and reliable content safety.", + "capabilities": ["image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "ownedBy": "google" + }, + { + "id": "imagen-4-0-fast-generate-preview-06-06", + "capabilities": ["image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "ownedBy": "google" + }, + { + "id": "imagen-4-0-ultra", + "capabilities": ["image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "ownedBy": "google" + }, + { + "id": "pp-structurev3", + "description": "PP-StructureV3 is an efficient and comprehensive document parsing solution that can effectively convert document images and PDF files into structured content (such as Markdown format). It features powerful capabilities including layout area detection, table recognition, formula recognition, chart understanding, and multi-column reading order recovery. This tool performs excellently across various document types and can handle complex document data.", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "id": "veo-3-0-generate-preview", + "description": "Veo 3.0 Generate Preview is an advanced AI video generation model that supports text-to-video creation with synchronized audio, featuring excellent physical simulation and lip-sync capabilities. Users can generate vivid video clips from short story prompts. 🎟️ Limited-Time Deal: Save 10% Now.", + "capabilities": ["video-generation"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["video"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "ownedBy": "google" + }, + { + "id": "veo-3-1-fast-generate-preview", + "description": "Veo 3.1 is Google's state-of-the-art model for generating high-fidelity, 8-second 720p , 1080p or 4k videos featuring stunning realism and natively generated audio.", + "capabilities": ["video-generation"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["video"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "google" + }, + { + "id": "veo-3-1-generate-preview", + "description": "Veo 3.1 is Google's state-of-the-art model for generating high-fidelity, 8-second 720p , 1080p or 4k videos featuring stunning realism and natively generated audio.", + "capabilities": ["video-generation"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["video"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "ownedBy": "google" + }, + { + "id": "ernie-5-0-thinking-exp", + "description": "ERNIE 5.0 is the next-generation natively multimodal foundation model in the ERNIE family. Built on a unified multimodal architecture, it jointly learns from text, images, audio, and video to deliver broad multimodal capabilities.\n\nERNIE 5.0 features significantly upgraded core capabilities and shows strong performance across benchmarks, with notable gains in multimodal understanding, instruction following, creative writing, factual accuracy, and agent planning with tool use.", + "capabilities": ["reasoning"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 119000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.82192 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.28768 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.82192 + } + }, + "ownedBy": "baidu" + }, + { + "id": "router", + "name": "Switchpoint Router", + "description": "Switchpoint AI's router instantly analyzes your request and directs it to the optimal AI from an ever-evolving library. \n\nAs the world of LLMs advances, our router gets smarter, ensuring you always benefit from the industry's newest models without changing your workflow.\n\nThis model is configured for a simple, flat rate per response here on OpenRouter. It's powered by the full routing engine from [Switchpoint AI](https://www.switchpoint.dev).", + "capabilities": ["reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.85 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.1 + } + } + }, + { + "id": "gemini-2-5-pro-preview-03-25", + "description": "Supports high concurrency. \nThe Gemini 2.5 Pro preview version is here, with higher limits for production testing. \nGoogle's latest and most powerful model;", + "capabilities": ["reasoning", "function-call", "structured-output", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.125 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"], + "thinkingTokenLimits": { + "min": 128, + "max": 32768 + } + }, + "ownedBy": "google" + }, + { + "id": "qwen3-coder-plus-2025-07-22", + "description": "The code generation model based on Qwen3 has powerful Coding Agent capabilities, excels in tool invocation and environment interaction, and can achieve autonomous programming with outstanding coding abilities while also possessing general capabilities.The model adopts tiered pricing.", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.54 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.16 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.54 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "imagen-4-0-ultra-generate-exp-05-20", + "description": "Image 4.0 Beta version, for testing purposes only. For production environment, it is recommended to use imagen-4.0-generate-preview-05-20.", + "capabilities": ["image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "ownedBy": "google" + }, + { + "id": "jina-embeddings-v5-text-nano", + "description": "A 3.8-billion-parameter general vector model (embedding model) for state-of-the-art multilingual embeddings for edge deployment.", + "capabilities": ["embedding"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.05 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "ownedBy": "jina" + }, + { + "id": "jina-embeddings-v5-text-small", + "description": "A 3.8-billion-parameter general vector (embedding) model providing state-of-the-art multilingual embeddings with task-specific adapters.", + "capabilities": ["embedding"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.05 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "ownedBy": "jina" + }, + { + "id": "doubao-seedream-4-0", + "description": "Seedream 4.0 is a SOTA-level multimodal image creation model based on leading architecture. It breaks the creative boundaries of traditional text-to-image models by natively supporting text, single image, and multiple image inputs. Users can freely combine text and images to achieve various creative styles within the same model, such as multi-image fusion creation based on subject consistency, image editing, and set image generation, making image creation more flexible and controllable.\nSeedream 4.0 supports composite editing with up to 10 images in a single input. Through deep reasoning of prompt words, it automatically adapts the optimal image aspect ratio and generation quantity, enabling continuous output of up to 15 content-related images at one time. Additionally, the model significantly improves the accuracy and content diversity of Chinese generation, supports 4K ultra-high-definition output, and provides a one-stop solution from generation to editing for professional image creation.", + "capabilities": ["image-generation", "function-call"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "embedding-v1", + "description": "Embedding-V1 is a text representation model based on Baidu's Wenxin large model technology, capable of converting text into numerical vector forms for applications such as text retrieval, information recommendation, and knowledge mining. Embedding-V1 provides an Embeddings interface that generates corresponding vector representations based on the input content. By calling this interface, you can input text into the model and obtain the corresponding vector representations for subsequent text processing and analysis.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.068 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.068 + } + } + }, + { + "id": "ernie-4-5-turbo-latest", + "description": "Wenxin 4.5 Turbo also has significant improvements in hallucination reduction, logical reasoning, and coding capabilities. Compared to Wenxin 4.5, it is faster and more affordable.", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 135000, + "maxOutputTokens": 12000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.11 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.44 + } + }, + "ownedBy": "baidu" + }, + { + "id": "ernie-irag-edit", + "description": "Baidu's self-developed ERNIE iRAG Edit image editing model supports operations based on images such as erase (object removal), repaint (object redrawing), and variation (variant generation).", + "capabilities": ["function-call", "structured-output", "image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "baidu" + }, + { + "id": "glm-4-5-x", + "description": "GLM-4.5-X is the high-speed version of GLM-4.5, offering powerful performance with a generation speed of up to 100 tokens per second.", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 8.91 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.44 + } + }, + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "ownedBy": "zhipu" + }, + { + "id": "gme-qwen2-vl-instruct", + "description": "The GME-Qwen2VL series is a unified multimodal Embedding model trained based on the Qwen2-VL multimodal large language model (MLLMs). The GME model supports three types of inputs: text, images, and image-text pairs. All these input types can generate universal vector representations and exhibit excellent retrieval performance.", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.138 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.138 + } + } + }, + { + "id": "gte-rerank-v2", + "description": "gte-rerank-v2 is a multilingual unified text ranking model developed by Tongyi Lab, covering multiple major languages worldwide and providing high-quality text ranking services. It is typically used in scenarios such as semantic retrieval and RAG, and can simply and effectively improve text retrieval performance. Given a query and a set of candidate texts (documents), the model ranks the candidates from highest to lowest based on their semantic relevance to the query.", + "capabilities": ["embedding", "rerank"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.11 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.11 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "irag-1-0", + "description": "Baidu's self-developed ERNIE iRAG (ERNIE image-based RAG), a retrieval-augmented text-to-image technology, combines Baidu Search's hundreds of millions of image resources with powerful foundational model capabilities to generate various ultra-realistic images. The overall effect far surpasses native text-to-image systems, eliminating the typical AI feel while maintaining low costs. ERNIE iRAG features no hallucinations, ultra-realism, and instant usability.", + "capabilities": ["image-generation"], + "inputModalities": ["text"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "id": "jina-deepsearch-v1", + "description": "DeepSearch combines search, reading, and reasoning capabilities to pursue the best possible answer. It's fully compatible with OpenAI's Chat API format—just replace api.openai.com with aihubmix.com to get started. \nThe stream will return the thinking process.", + "capabilities": ["reasoning", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.05 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "ownedBy": "jina" + }, + { + "id": "jina-embeddings-v4", + "description": "A general-purpose vector model with 3.8 billion parameters, used for multimodal and multilingual retrieval, supporting both unidirectional and multi-vector embedding outputs.", + "capabilities": ["embedding"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.05 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "ownedBy": "jina" + }, + { + "id": "jina-reranker-v3", + "description": "Multimodal multilingual document reranker, 131K context, 0.6B parameters, for visual document sorting.", + "capabilities": ["rerank"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 131000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.05 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "ownedBy": "jina" + }, + { + "id": "qwen-image", + "description": "Qwen-Image is a foundational image generation model in the Qwen series, achieving significant progress in complex text rendering and precise image editing. Experiments show that the model has strong general capabilities in image generation and editing, especially excelling in Chinese text rendering.", + "capabilities": ["image-generation", "function-call"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-image-edit", + "description": "Qwen-Image-Edit is the image editing version of Qwen-Image. Based on the 20B Qwen-Image model, Qwen-Image-Edit successfully extends Qwen-Image's unique text rendering capabilities to image editing tasks, achieving precise text editing. Additionally, Qwen-Image-Edit can input the same image into Qwen2.5-VL (for visual semantic control) and the VAE encoder (for visual appearance control), enabling both semantic and appearance editing functionalities.", + "capabilities": ["image-generation", "function-call"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-image-plus", + "description": "Qwen-Image is a foundational image generation model in the Qwen series, achieving significant progress in complex text rendering and precise image editing. Experiments show that the model has strong general capabilities in image generation and editing, especially excelling in Chinese text rendering.", + "capabilities": ["image-generation", "function-call"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-reranker", + "description": "Based on the dense foundational model of the Qwen3 series, it is specifically designed for ranking tasks. It inherits the base model’s outstanding multilingual capabilities, long-text understanding, and reasoning skills, achieving significant advancements in ranking tasks.", + "capabilities": ["function-call", "rerank", "reasoning"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 16000, + "maxOutputTokens": 8000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.11 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.11 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "tao-8k", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.068 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.068 + } + } + }, + { + "id": "bce-reranker-base", + "description": "Based on the dense foundational model of the Qwen3 series, it is specifically designed for ranking tasks. It inherits the base model’s outstanding multilingual capabilities, long-text understanding, and reasoning skills, achieving significant advancements in ranking tasks.", + "capabilities": ["rerank"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.068 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.068 + } + } + }, + { + "id": "jina-clip-v2", + "description": "Multi-modal Embeddings Model, multilingual, 1024-dimensional, 865M parameters.", + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.05 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "ownedBy": "jina" + }, + { + "id": "jina-reranker-m0", + "description": "Multimodal multilingual document reranker, 10K context, 2.4B parameters, for visual document sorting.", + "capabilities": ["rerank"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.05 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "ownedBy": "jina" + }, + { + "id": "jina-colbert-v2", + "description": "Multi-language ColBERT embeddings model, 560M parameters, used for embedding and reranking.", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.05 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "ownedBy": "jina" + }, + { + "id": "gpt-4o-search-preview", + "name": "OpenAI: GPT-4o Search Preview", + "description": "GPT-4o Search Previewis a specialized model for web search in Chat Completions. It is trained to understand and execute web search queries.", + "capabilities": ["structured-output", "web-search", "function-call", "image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.25 + } + }, + "ownedBy": "openai" + }, + { + "id": "jina-embeddings-v3", + "description": "Text Embeddings Model, multilingual, 1024-dimensional, 570M parameters.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.05 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "ownedBy": "jina" + }, + { + "id": "ernie-4-5", + "description": "Wenxin Large Model 4.5 is a next-generation native multimodal foundational model independently developed by Baidu. It achieves collaborative optimization through joint modeling of multiple modalities, demonstrating excellent multimodal understanding capabilities; it possesses more advanced language abilities, with comprehensive improvements in comprehension, generation, logic, and memory, as well as significant enhancements in hallucination reduction, logical reasoning, and coding capabilities.ERNIE-4.5-21B-A3B is an aligned open-source model with a MoE structure, having a total of 21 billion parameters and 3 billion activated parameters.", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 160000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.068 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.272 + } + }, + "ownedBy": "baidu" + }, + { + "id": "ernie-4-5-turbo-vl", + "description": "The new version of the Wenxin Yiyan large model significantly improves capabilities in image understanding, creation, translation, and coding. It supports a context length of up to 32K tokens for the first time, with a notable reduction in the latency of the first token.", + "capabilities": ["function-call", "structured-output", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 139000, + "maxOutputTokens": 16000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + }, + "ownedBy": "baidu" + }, + { + "id": "gemini-2-0-flash-preview-image-generation", + "description": "Gemini 2.0 Flash EXP is the official preview version of the drawing model. Compared to Imagen 3.0, Gemini’s image generation is better suited for scenarios that require contextual understanding and reasoning, rather than the pursuit of ultimate artistic performance and visual quality.", + "capabilities": ["image-generation", "function-call", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.4 + } + }, + "ownedBy": "google" + }, + { + "id": "flux.1-kontext-pro", + "description": "Generate and edit images through both text and image prompts. Flux.1 Kontext is a multimodal flow matching model that enables both text-to-image generation and in-context image editing. Modify images while maintaining character consistency and performing local edits up to 8x faster than other leading models.", + "capabilities": ["image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 40 + }, + "output": { + "currency": "USD", + "perMillionTokens": 40 + } + }, + "ownedBy": "bfl" + }, + { + "id": "flux-1-1-pro", + "description": "FLUX-1.1-pro is an AI image generation tool for professional creators and content workflows. It understands complex semantic and structural instructions to deliver high consistency, multi-image coherence, and style customization from text prompts.", + "capabilities": ["image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 40 + }, + "output": { + "currency": "USD", + "perMillionTokens": 40 + } + }, + "ownedBy": "bfl" + }, + { + "id": "doubao-seed-1-6-lite", + "description": "Doubao-Seed-1.6-lite is a brand new multimodal deep reasoning model that supports adjustable reasoning effort, with four modes: Minimal, Low, Medium, and High. It offers better cost performance, making it the best choice for common tasks, with a context window of up to 256k.", + "capabilities": ["function-call", "structured-output", "reasoning", "image-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.082 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.656 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.0164 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "qwen-3-a22b-thinking-2507", + "description": "cerebras", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.28 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.8 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen2-vl-instruct", + "description": "The model provider is the Sophnet platform. Qwen2-VL-72B-Instruct is the latest iteration in the Qwen2-VL series launched by Alibaba Cloud, representing nearly a year of innovative achievements. This model has 72 billion parameters and can understand images of various resolutions and aspect ratios. Additionally, it supports video understanding of over 20 minutes, enabling high-quality video question answering, dialogue, and content creation, along with complex reasoning and decision-making capabilities.\n\n- State-of-the-art image understanding: capable of processing images of various resolutions and aspect ratios, performing excellently across multiple visual understanding benchmarks.\n- Long video understanding: supports video comprehension exceeding 20 minutes, enabling high-quality video Q&A, dialogues, and content creation.\n- Agent operation capability: equipped with complex reasoning and decision-making abilities, it can integrate with devices such as phones and robots to perform automated operations based on visual environments and textual instructions.\n- Multilingual support: in addition to English and Chinese, it supports understanding text in images in multiple languages, including most European languages, Japanese, Korean, Arabic, Vietnamese, and more.\n- Supports a maximum context length of 128K tokens, offering powerful processing capabilities.", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.18 + }, + "output": { + "currency": "USD", + "perMillionTokens": 6.54 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "embedding-2", + "description": "A text vector model that converts input text information into vector representations so that, in conjunction with a vector database, it provides an external knowledge base for the large model, thereby improving the accuracy of the model’s reasoning.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.0686 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.0686 + } + } + }, + { + "id": "embedding-3", + "description": "A text vector model that converts input text into vector representations to work with a vector database and provide an external knowledge base for a large model. The model supports custom vector dimensions; it is recommended to choose 256, 512, 1024, or 2048 dimensions.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.0686 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.0686 + } + } + }, + { + "id": "doubao-seed-1-6-250615", + "description": "Doubao-Seed-1.6 is a brand new multimodal deep reasoning model that supports four types of reasoning effort: minimal, low, medium, and high. It offers stronger model performance, serving complex tasks and challenging scenarios. It supports a 256k context window, with output length up to a maximum of 32k tokens.", + "capabilities": ["reasoning", "function-call", "image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.18 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.52 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.036 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "doubao-seed-1-6-flash-250615", + "description": "Doubao-Seed-1.6-flash is an extremely fast multimodal deep thinking model, with TPOT requiring only 10ms. It supports both text and visual understanding, with its text comprehension skills surpassing the previous generation lite model and its visual understanding on par with competitor's pro series models. It supports a 256k context window and an output length of up to 16k tokens.", + "capabilities": ["reasoning", "function-call", "image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.044 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.44 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.0088 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "doubao-seed-1-6-thinking-250615", + "description": "The Doubao-Seed-1.6-thinking model has significantly enhanced reasoning capabilities. Compared with Doubao-1.5-thinking-pro, it has further improvements in fundamental abilities such as coding, mathematics, and logical reasoning, and now also supports visual understanding. It supports a 256k context window, with output length supporting up to 16k tokens.", + "capabilities": ["reasoning", "function-call", "image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.18 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.52 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.036 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "gpt-4o-image-vip", + "description": "First Taste of GPT-4o's Image Generation API: Perfectly mirrors the web version's raw image creation capabilities, supporting both text-to-image and image+text-to-image generation. Each creation costs as little as $0.009.", + "capabilities": ["image-generation", "function-call"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 7 + }, + "output": { + "currency": "USD", + "perMillionTokens": 7 + } + }, + "ownedBy": "openai" + }, + { + "id": "gpt-4o-image", + "description": "First Taste of GPT-4o's Image Generation API: Perfectly mirrors the web version's raw image creation capabilities, supporting both text-to-image and image+text-to-image generation. Each creation costs as little as $0.005.", + "capabilities": ["image-generation", "function-call"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3 + } + }, + "ownedBy": "openai" + }, + { + "id": "gpt-4o-mini-tts", + "description": "OpenAI’s latest TTS model, gpt-4o-mini-tts, uses the same API endpoint (/v1/audio/speech) as tts-1. However, OpenAI introduced a new pricing method without providing billing details via API, causing discrepancies between official pricing and aihubmix’s charges—some requests may cost more, others less. Avoid using this model if precise billing accuracy is essential.", + "capabilities": ["function-call", "image-recognition", "web-search"], + "inputModalities": ["audio"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 12 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "ownedBy": "openai" + }, + { + "id": "gemini-2-0-flash-thinking-exp-01-21", + "description": "The latest version, Gemini 2.0 Flash Thinking mode, is an experimental model designed to generate the \"thought process\" that the model goes through during its responses. Therefore, Gemini 2.0 Flash Thinking mode has stronger reasoning capabilities in its responses compared to the base Gemini 2.0 Flash model.", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.076 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.304 + } + }, + "ownedBy": "google" + }, + { + "id": "ernie-x1-1-preview", + "description": "The Wenxin large model X1.1 has made significant improvements in question answering, tool invocation, intelligent agents, instruction following, logical reasoning, mathematics, and coding tasks, with notable enhancements in factual accuracy. The context length has been extended to 64K tokens, supporting longer inputs and dialogue history, which improves the coherence of long-chain reasoning while maintaining response speed.", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 119000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.136 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.544 + } + }, + "ownedBy": "baidu" + }, + { + "id": "bge-large-en", + "description": "bge-large-en, open-sourced by the Beijing Academy of Artificial Intelligence (BAAI), is currently the most powerful vector representation model for Chinese tasks, with its semantic representation capabilities comprehensively surpassing those of similar open-source models.", + "capabilities": ["function-call", "structured-output", "embedding"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.068 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.068 + } + }, + "ownedBy": "baai" + }, + { + "id": "bge-large-zh", + "description": "bge-large-zh, open-sourced by the Beijing Academy of Artificial Intelligence (BAAI), is currently the most powerful vector representation model for Chinese tasks, with its semantic representation capabilities comprehensively surpassing those of similar open-source models.", + "capabilities": ["function-call", "structured-output", "embedding"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.068 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.068 + } + }, + "ownedBy": "baai" + }, + { + "id": "ernie-4-5-turbo-128k-preview", + "description": "Wenxin 4.5 Turbo also shows significant enhancements in reducing hallucinations, logical reasoning, and coding capabilities. Compared to Wenxin 4.5, it is faster and more cost-effective.", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.108 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.432 + } + }, + "ownedBy": "baidu" + }, + { + "id": "ernie-x1-turbo", + "description": "Wenxin Large Model X1 possesses enhanced abilities in understanding, planning, reflection, and evolution. As a more comprehensive deep-thinking model, Wenxin X1 combines accuracy, creativity, and literary elegance, excelling particularly in Chinese knowledge Q&A, literary creation, document writing, daily conversations, logical reasoning, complex calculations, and tool invocation.", + "capabilities": ["reasoning", "function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 50500, + "maxOutputTokens": 28000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.136 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.544 + } + }, + "ownedBy": "baidu" + }, + { + "id": "moonlight-a3b-instruct", + "description": "Provided by chutes.ai.", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + } + } + }, + { + "id": "o1-global", + "description": "OpenAI new model", + "capabilities": ["reasoning", "function-call", "image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 60 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 7.5 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "qianfan-qi-vl", + "description": "The Qianfan-QI-VL model is a proprietary image quality inspection and visual understanding large model (Quality Inspection Large Vision Language Model, Qianfan-QI-VL) developed by Baidu Cloud’s Qianfan platform. It is designed for quality inspection of product images uploaded in e-commerce scenarios, with detection capabilities including AIGC human defect detection, mosaic recognition, watermark recognition, and trademark detection.", + "capabilities": ["image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + } + }, + { + "id": "gemini-exp-1206", + "description": "Google's latest experimental model, currently Google's most powerful model.", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + } + }, + "ownedBy": "google" + }, + { + "id": "gpt-4o-zh", + "capabilities": ["function-call", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + }, + "ownedBy": "openai" + }, + { + "id": "qwen-max-0125", + "description": "Qwen 2.5-Max latest model", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.38 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.52 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "bge-large-zh-v1-5", + "description": "BAAI/bge-large-zh-v1.5 is a large Chinese text embedding model and part of the BGE (BAAI General Embedding) series. It performs excellently on the C-MTEB benchmark, achieving an average score of 64.53 across 31 datasets, with outstanding results in tasks such as retrieval, semantic similarity, and text pair classification. The model supports a maximum input length of 512 tokens and is suitable for various Chinese natural language processing tasks, such as text retrieval and semantic similarity computation.", + "capabilities": ["function-call", "structured-output", "embedding"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.034 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.034 + } + }, + "ownedBy": "baai" + }, + { + "id": "gemini-2-0-flash-lite-preview-02-05", + "description": "Gemini 2.0 Flash lightweight version", + "capabilities": ["function-call", "image-recognition", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.075 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.075 + } + }, + "ownedBy": "google" + }, + { + "id": "v3", + "description": "Fast and high-quality — top image quality in just 11 seconds per piece, with almost no extra time for batch generation.\nFlexible ratios — supports ultra-wide and tall formats like 3:1, 2:1, offering diverse perspectives.\nUnique strengths — outstanding design capabilities in the V3 and V2 series, with powerful text rendering (Chinese support coming soon).\nPrecise local editing — fine-tuned mask control for area redrawing (edit) and easy background replacement (replace-background).", + "capabilities": ["image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + } + }, + { + "id": "v_2", + "description": "The Ideogram AI drawing interface is now live. This model boasts powerful text-to-image capabilities, supporting endpoints are: /generate, /remix, /edit.\nThis model is the stable V_2 version, highly recommended for editing.\nUS $0.08/ 1 IMG.\nFor usage examples and pricing details, refer to the documentation at https://docs.aihubmix.com/cn/api/IdeogramAI.", + "capabilities": ["image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + } + }, + { + "id": "v_2_turbo", + "description": "The Ideogram AI drawing interface is now live. This model boasts powerful text-to-image capabilities, supporting endpoints are: /generate, /remix, /edit.\nThis model is the fast version of V_2, offering increased speed at the slight expense of quality.\nUS $0.05/ IMG.\nFor usage examples and pricing details, refer to the documentation at https://docs.aihubmix.com/cn/api/IdeogramAI.", + "capabilities": ["image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + } + }, + { + "id": "v_2a", + "description": "The Ideogram AI drawing interface is now live. This model boasts powerful text-to-image capabilities, supporting endpoints are: /generate, /remix.\nThis model is the fast version of V_2, faster and cheaper.\nUS $0.04/ IMG.\nFor usage examples and pricing details, refer to the documentation at https://docs.aihubmix.com/cn/api/IdeogramAI.", + "capabilities": ["image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + } + }, + { + "id": "v_2a_turbo", + "description": "The Ideogram AI drawing interface is now live. This model boasts powerful text-to-image capabilities, supporting endpoints are: /generate, /remix.\nThis model is the ultra-fast version of V_2, delivering the highest speed while slightly reducing quality.\nUS $0.025/ IMG.\nFor usage examples and pricing details, refer to the documentation at https://docs.aihubmix.com/cn/api/IdeogramAI.", + "capabilities": ["image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + } + }, + { + "id": "v_1", + "description": "V_1 is a text-to-image model in the Ideogram series. It delivers strong text rendering capabilities, high photorealistic image quality, and precise prompt adherence. The model also introduces Magic Prompt, a new feature that automatically refines input prompts to generate more detailed and creative visuals.", + "capabilities": ["image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + } + }, + { + "id": "v_1_turbo", + "description": "The Ideogram AI drawing interface is now live. This model boasts powerful text-to-image capabilities, supporting endpoints are: /generate, /remix.\nThis model is the ultra-fast version of the original V_1, offering increased speed at the slight expense of quality.\nUS $0.02/ IMG.\nFor usage examples and pricing details, refer to the documentation at https://docs.aihubmix.com/cn/api/IdeogramAI.", + "capabilities": ["image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + } + }, + { + "id": "doubao-embedding-large-text-240915", + "description": "doubao-embedding-large-text-240915\nDoubao Embedding is a semantic vectorization model developed by ByteDance, primarily designed for vector search scenarios. It supports both Chinese and English languages and has a maximum context length of approximately 4K tokens.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.1 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "kimi-thinking-preview", + "description": "The latest kimi model.", + "capabilities": ["reasoning", "image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 30 + }, + "output": { + "currency": "USD", + "perMillionTokens": 30 + } + }, + "ownedBy": "moonshot" + }, + { + "id": "qwen-plus-2025-07-28", + "name": "Qwen: Qwen Plus 0728", + "description": "Qwen Plus 0728, based on the Qwen3 foundation model, is a 1 million context hybrid reasoning model with a balanced performance, speed, and cost combination.", + "capabilities": ["function-call", "structured-output", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.11 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-plus-latest", + "description": "The Qwen series models with balanced capabilities have inference performance and speed between Qwen-Max and Qwen-Turbo, making them suitable for moderately complex tasks. This model is a dynamically updated version, and updates will not be announced in advance. The current version is qwen-plus-2025-04-28.The model adopts tiered pricing.", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.11 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.275 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.11 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "step3", + "description": "Step3 is a multimodal reasoning model released by StepFun. It uses a Mixture‑of‑Experts (MoE) architecture with 321 billion total parameters and 38 billion activation parameters. The model follows an end‑to‑end design that reduces decoding cost while delivering top‑tier performance on vision‑language reasoning tasks. Thanks to the combined use of Multi‑Head Factorized Attention (MFA) and Attention‑FFN Decoupling (AFD), Step3 remains highly efficient on both flagship and low‑end accelerators. During pre‑training, it processed over 20 trillion text tokens and 4 trillion image‑text mixed tokens, covering more than ten languages. On benchmarks for mathematics, code, and multimodal tasks, Step3 consistently outperforms other open‑source models.", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.75 + } + }, + "ownedBy": "stepfun" + }, + { + "id": "text-embedding-v4", + "description": "This is the Tongyi Laboratory's multilingual unified text vector model trained based on Qwen3, which significantly improves performance in text retrieval, clustering, and classification compared to version V3; it achieves a 15% to 40% improvement on evaluation tasks such as MTEB multilingual, Chinese-English, and code retrieval; supports user-defined vector dimensions ranging from 64 to 2048.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.08 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.08 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-turbo-latest", + "description": "The Qwen series model with the fastest speed and lowest cost, suitable for simple tasks. This model is a dynamically updated version, and updates will not be announced in advance. The model's overall Chinese and English abilities have been significantly improved, human preference alignment has been greatly enhanced, inference capability and complex instruction understanding have been substantially strengthened, performance on difficult tasks is better, and mathematics and coding skills have been significantly improved. The current version is qwen-turbo-2025-04-28.", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.046 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.92 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "doubao-embedding-text-240715", + "description": "doubao-embedding-text-240715\nDoubao Embedding is a semantic vectorization model developed by ByteDance, primarily designed for vector search scenarios. It supports both Chinese and English languages and has a maximum context length of approximately 4K tokens.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.7 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "deepseek-r1-zero", + "description": "Openly deployed by chutes.ai; inference with FP8; zero is the initial preliminary version of R1 without optimizations and is not recommended for use unless for research purposes.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.2 + } + }, + "ownedBy": "deepseek" + }, + { + "id": "grok-3-fast-beta", + "capabilities": ["function-call", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 27.5 + } + }, + "ownedBy": "xai" + }, + { + "id": "qwen-turbo-2025-04-28", + "description": "The Qwen3 series Turbo model effectively integrates thinking and non-thinking modes, allowing seamless switching between modes during conversations. With a smaller parameter size, its reasoning ability rivals that of QwQ-32B, and its general capabilities significantly surpass those of Qwen2.5-Turbo, reaching state-of-the-art (SOTA) levels among models of the same scale. This version is a snapshot model as of April 28, 2025.", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.046 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.92 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "grok-3-mini-fast-beta", + "capabilities": ["reasoning", "function-call", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.33 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.20011 + } + }, + "reasoning": { + "supportedEfforts": ["low", "high"] + }, + "ownedBy": "xai" + }, + { + "id": "qwen-plus-2025-04-28", + "description": "The Qwen3 series Plus model effectively integrates thinking and non-thinking modes, allowing for mode switching during conversations. Its reasoning abilities significantly surpass those of QwQ, and its general capabilities are markedly superior to Qwen2.5-Plus, reaching state-of-the-art (SOTA) levels among models of the same scale. This version is a snapshot model as of April 28, 2025.", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.13 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.6 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "glm-4-1v", + "description": "GLM-4.1V-9B-Thinking is an open-source Vision Language Model (VLM) jointly released by Zhipu AI and the KEG Laboratory at Tsinghua University, designed specifically for handling complex multimodal cognitive tasks. Based on the GLM-4-9B-0414 foundation model, it significantly enhances cross-modal reasoning ability and stability by introducing the “Chain-of-Thought” reasoning mechanism and using reinforcement learning strategies. As a lightweight model with 9 billion parameters, it strikes a balance between deployment efficiency and performance. In 28 authoritative benchmark evaluations, it matched or even outperformed the 72-billion-parameter Qwen-2.5-VL-72B model in 18 tasks. The model excels not only in image-text understanding, mathematical and scientific reasoning, and video understanding, but also supports images up to 4K resolution and inputs of arbitrary aspect ratios.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.04 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.16 + } + }, + "ownedBy": "zhipu" + }, + { + "id": "text-embedding-004", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.02 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.02 + } + }, + "ownedBy": "google" + }, + { + "id": "doubao-seed-code-preview-latest", + "description": "claude code ", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "reasoning": { + "supportedEfforts": ["none", "high"] + }, + "ownedBy": "bytedance" + }, + { + "id": "glm-zero-preview", + "description": "Simply put, it is the intelligent enhanced version of O1.", + "capabilities": ["reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "ownedBy": "zhipu" + }, + { + "id": "janus-pro", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + } + }, + { + "id": "gemini-2-0-flash-thinking-exp-1219", + "description": "The Gemini 2.0 Flash Thinking mode is an experimental model designed to generate the \"thinking process\" that the model undergoes during its response. Therefore, the Gemini 2.0 Flash Thinking mode possesses stronger reasoning capabilities in its responses compared to the base Gemini 2.0 Flash model.", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.076 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.304 + } + }, + "ownedBy": "google" + }, + { + "id": "o1-preview-2024-09-12", + "capabilities": ["reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 60 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 7.5 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "qvq-preview", + "capabilities": ["reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwq-preview", + "capabilities": ["reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.16 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.16 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "llama-3-1-sonar-huge-128k", + "description": "On February 22, 2025, this model will be officially discontinued. The Perplexity AI official fine-tuned LLMA internet-connected interface is currently only supported at the api.aihubmix.com address.", + "capabilities": ["web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5.6 + } + }, + "ownedBy": "meta" + }, + { + "id": "llama-3-1-sonar-large-128k", + "description": "On February 22, 2025, this model will be officially discontinued; Perplexity AI's official fine-tuned LLMA internet-connected interface is currently only supported at the api.aihubmix.com address.", + "capabilities": ["web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + }, + "ownedBy": "meta" + }, + { + "id": "mistral-large-2407", + "name": "Mistral Large 2407", + "description": "This is Mistral AI's flagship model, Mistral Large 2 (version mistral-large-2407). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.\n", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 6 + } + }, + "ownedBy": "mistral" + }, + { + "id": "gemini-2-0-flash-thinking-exp", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.076 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.304 + } + }, + "ownedBy": "google" + }, + { + "id": "gpt-image-test", + "capabilities": ["image-generation"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 40 + } + }, + "ownedBy": "openai" + }, + { + "id": "imagen-3-0-generate-002", + "description": "Imagen 3.0 is Google's latest text-to-image generation model, capable of creating high-quality images from natural language prompts. Compared to its predecessors, Imagen 3.0 offers significant improvements in detail, lighting, and reduced visual artifacts. It supports rendering in various artistic styles, from photorealism to impressionism, as well as abstract and anime styles.", + "capabilities": ["image-generation", "video-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "ownedBy": "google" + }, + { + "id": "describe", + "description": "This endpoint is used to describe an image.\nSupported image formats include JPEG, PNG, and WebP.\nUS $0.01/ IMG.\nFor usage examples and pricing details, refer to the documentation at https://docs.aihubmix.com/cn/api/IdeogramAI.", + "capabilities": ["image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + } + }, + { + "id": "upscale", + "description": "The super-resolution upscale interface of the Ideogram AI drawing model is designed to enlarge low-resolution images into high-resolution ones, redrawing details (with controllable similarity and detail proportions).\nUS $0.06/ IMG.\nFor usage examples and pricing details, refer to the documentation at https://docs.aihubmix.com/cn/api/IdeogramAI.", + "capabilities": ["image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + } + }, + { + "id": "computer-use-preview", + "capabilities": ["computer-use"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 12 + } + } + }, + { + "id": "crush-glm-4-6", + "description": "just for crush", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "id": "o1-2024-12-17", + "capabilities": ["reasoning", "function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 60 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 7.5 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "llama2-4096", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "ownedBy": "meta" + }, + { + "id": "llama2-40960", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "ownedBy": "meta" + }, + { + "id": "llama2-2048", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.1 + } + }, + "ownedBy": "meta" + }, + { + "id": "llama3-8192(33)", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.65 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.65 + } + }, + "ownedBy": "meta" + }, + { + "id": "llama3-groq-8192-tool-use-preview", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.00089 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.00089 + } + }, + "ownedBy": "meta" + }, + { + "id": "llama3-chat", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "ownedBy": "meta" + }, + { + "id": "moonshot-kimi-k2-5", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.105 + } + }, + "ownedBy": "moonshot" + }, + { + "id": "moonshot-v1-128k", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 10 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + }, + "ownedBy": "moonshot" + }, + { + "id": "moonshot-v1-128k-vision-preview", + "capabilities": ["image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 10 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + }, + "ownedBy": "moonshot" + }, + { + "id": "moonshot-v1-32k", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4 + } + }, + "ownedBy": "moonshot" + }, + { + "id": "moonshot-v1-32k-vision-preview", + "capabilities": ["image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4 + } + }, + "ownedBy": "moonshot" + }, + { + "id": "moonshot-v1-8k", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "ownedBy": "moonshot" + }, + { + "id": "moonshot-v1-8k-vision-preview", + "capabilities": ["image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "ownedBy": "moonshot" + }, + { + "id": "o1-mini-2024-09-12", + "capabilities": ["reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 12 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "omni-moderation-latest", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.02 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.02 + } + } + }, + { + "id": "qwen-flash-2025-07-28", + "description": "The model adopts tiered pricing.", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.02 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.02 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-max-longcontext", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 7 + }, + "output": { + "currency": "USD", + "perMillionTokens": 21 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-turbo-2024-11-01", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.36 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.08 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "text-ada-001", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.4 + } + } + }, + { + "id": "text-babbage-001", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + } + } + }, + { + "id": "text-curie-001", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + } + }, + { + "id": "text-davinci-002", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 20 + }, + "output": { + "currency": "USD", + "perMillionTokens": 20 + } + } + }, + { + "id": "text-davinci-003", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 20 + }, + "output": { + "currency": "USD", + "perMillionTokens": 20 + } + } + }, + { + "id": "text-davinci-edit-001", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 20 + }, + "output": { + "currency": "USD", + "perMillionTokens": 20 + } + } + }, + { + "id": "text-embedding-v1", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.1 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "text-moderation-007", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + } + } + }, + { + "id": "text-moderation-latest", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + } + } + }, + { + "id": "text-moderation-stable", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + } + } + }, + { + "id": "text-search-ada-doc-001", + "capabilities": ["web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 20 + }, + "output": { + "currency": "USD", + "perMillionTokens": 20 + } + } + }, + { + "id": "tts-1", + "capabilities": ["audio-generation"], + "inputModalities": ["audio"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + }, + "ownedBy": "openai" + }, + { + "id": "tts-1-1106", + "capabilities": ["audio-generation"], + "inputModalities": ["audio"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + }, + "ownedBy": "openai" + }, + { + "id": "tts-1-hd", + "capabilities": ["audio-generation"], + "inputModalities": ["audio"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 30 + }, + "output": { + "currency": "USD", + "perMillionTokens": 30 + } + }, + "ownedBy": "openai" + }, + { + "id": "tts-1-hd-1106", + "capabilities": ["audio-generation"], + "inputModalities": ["audio"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 30 + }, + "output": { + "currency": "USD", + "perMillionTokens": 30 + } + }, + "ownedBy": "openai" + }, + { + "id": "veo3", + "description": "veo3 reverse access with a total cost of just $0.41 per video generation., OpenAI chat port compatible format.\nNote that this is a reverse interface, and charges are based on the number of requests. As long as a request is initiated, even if it returns a failure, you will be charged. If you cannot accept this, please do not use it.", + "capabilities": ["video-generation"], + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["video"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "ownedBy": "google" + }, + { + "id": "whisper-1", + "description": "Ignore the displayed price on the page; the actual charge for this model request is consistent with the official, so you can use it with confidence.", + "capabilities": ["audio-transcript"], + "inputModalities": ["audio"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 100 + }, + "output": { + "currency": "USD", + "perMillionTokens": 100 + } + }, + "ownedBy": "openai" + }, + { + "id": "whisper-large-v3-turbo", + "capabilities": ["audio-transcript"], + "inputModalities": ["audio"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5.556 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5.556 + } + }, + "ownedBy": "openai" + }, + { + "id": "yi-large", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3 + } + }, + "ownedBy": "01ai" + }, + { + "id": "yi-large-rag", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4 + } + }, + "ownedBy": "01ai" + }, + { + "id": "yi-large-turbo", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.8 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.8 + } + }, + "ownedBy": "01ai" + }, + { + "id": "yi-lightning", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "ownedBy": "01ai" + }, + { + "id": "yi", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.4 + } + } + }, + { + "id": "yi-vl-plus", + "capabilities": ["image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.000852 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.000852 + } + }, + "ownedBy": "01ai" + }, + { + "id": "fx-flux-2-pro", + "capabilities": ["image-generation"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "id": "gemini-2-0-flash-exp-image-generation", + "capabilities": ["function-call", "image-recognition", "image-generation", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.4 + } + }, + "ownedBy": "google" + }, + { + "id": "gemini-2-5-pro-exp-03-25", + "description": "Google’s latest experimental model, highly unstable, for experience only.\nIt boasts strong reasoning and coding capabilities, able to \"think\" before responding, enhancing performance and accuracy in complex tasks. It supports multimodal inputs (text, audio, images, video) and a 1 million token context window, suitable for advanced programming, math, and science tasks.\n\nThis means Gemini 2.5 can handle more complex problems in coding, science and math, and support more context-aware agents.", + "capabilities": ["structured-output", "function-call", "reasoning", "image-recognition", "web-search"], + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.125 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"], + "thinkingTokenLimits": { + "min": 128, + "max": 32768 + } + }, + "ownedBy": "google" + }, + { + "id": "gemini-embedding-exp-03-07", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.02 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.02 + } + }, + "ownedBy": "google" + }, + { + "id": "gemini-exp-1114", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + } + }, + "ownedBy": "google" + }, + { + "id": "gemini-exp-1121", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + } + }, + "ownedBy": "google" + }, + { + "id": "gemini-pro", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "ownedBy": "google" + }, + { + "id": "gemini-pro-vision", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1 + } + }, + "ownedBy": "google" + }, + { + "id": "gemma-it", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.1 + } + }, + "ownedBy": "google" + }, + { + "id": "glm-3-turbo", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.71 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.71 + } + }, + "ownedBy": "zhipu" + }, + { + "id": "glm-4", + "name": "Z.ai: GLM 4 32B ", + "description": "GLM 4 32B is a cost-effective foundation language model.\n\nIt can efficiently perform complex tasks and has significantly enhanced capabilities in tool use, online search, and code-related intelligent tasks.\n\nIt is made by the same lab behind the thudm models.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + } + }, + "ownedBy": "zhipu" + }, + { + "id": "glm-4-flash", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.1 + } + }, + "ownedBy": "zhipu" + }, + { + "id": "glm-4-plus", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 8 + }, + "output": { + "currency": "USD", + "perMillionTokens": 8 + } + }, + "ownedBy": "zhipu" + }, + { + "id": "glm-4-5-airx", + "description": "GLM-4.5-AirX is the high-speed version of GLM-4.5-Air, with faster response times, specifically designed for large-scale high-speed demands.", + "capabilities": ["function-call", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4.51 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.22 + } + }, + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "ownedBy": "zhipu" + }, + { + "id": "glm-4v", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 14.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 14.2 + } + }, + "ownedBy": "zhipu" + }, + { + "id": "glm-4v-plus", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "ownedBy": "zhipu" + }, + { + "id": "gpt-3-5-turbo-16k", + "name": "OpenAI: GPT-3.5 Turbo 16k", + "description": "This model offers four times the context length of gpt-3.5-turbo, allowing it to support approximately 20 pages of text in a single request at a higher cost. Training data: up to Sep 2021.", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 16385, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4 + } + }, + "ownedBy": "openai" + }, + { + "id": "gpt-3-5-turbo-16k-0613", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4 + } + }, + "ownedBy": "openai" + }, + { + "id": "gpt-4-0125-preview", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 10 + }, + "output": { + "currency": "USD", + "perMillionTokens": 30 + } + }, + "ownedBy": "openai" + }, + { + "id": "gpt-4-0314", + "name": "OpenAI: GPT-4 (older v0314)", + "description": "GPT-4-0314 is the first version of GPT-4 released, with a context length of 8,192 tokens, and was supported until June 14. Training data: up to Sep 2021.", + "capabilities": ["function-call", "structured-output", "image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8191, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 30 + }, + "output": { + "currency": "USD", + "perMillionTokens": 60 + } + }, + "ownedBy": "openai" + }, + { + "id": "gpt-4-0613", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 30 + }, + "output": { + "currency": "USD", + "perMillionTokens": 60 + } + }, + "ownedBy": "openai" + }, + { + "id": "gpt-4-1106-preview", + "name": "OpenAI: GPT-4 Turbo (older v1106)", + "description": "The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.\n\nTraining data: up to April 2023.", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 10 + }, + "output": { + "currency": "USD", + "perMillionTokens": 30 + } + }, + "ownedBy": "openai" + }, + { + "id": "gpt-4-32k-0314", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 60 + }, + "output": { + "currency": "USD", + "perMillionTokens": 120 + } + }, + "ownedBy": "openai" + }, + { + "id": "gpt-4-32k-0613", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 60 + }, + "output": { + "currency": "USD", + "perMillionTokens": 120 + } + }, + "ownedBy": "openai" + }, + { + "id": "gpt-4-turbo-2024-04-09", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 10 + }, + "output": { + "currency": "USD", + "perMillionTokens": 30 + } + }, + "ownedBy": "openai" + }, + { + "id": "gpt-4-turbo-preview", + "name": "OpenAI: GPT-4 Turbo Preview", + "description": "The preview GPT-4 model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Training data: up to Dec 2023.\n\n**Note:** heavily rate limited by OpenAI while in preview.", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 10 + }, + "output": { + "currency": "USD", + "perMillionTokens": 30 + } + }, + "ownedBy": "openai" + }, + { + "id": "gpt-4-vision-preview", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 10 + }, + "output": { + "currency": "USD", + "perMillionTokens": 30 + } + }, + "ownedBy": "openai" + }, + { + "id": "gpt-4o-mini-2024-07-18", + "name": "OpenAI: GPT-4o-mini (2024-07-18)", + "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.\n\nAs their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/models/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.\n\nGPT-4o mini achieves an 82% score on MMLU and presently ranks higher than GPT-4 on chat preferences [common leaderboards](https://arena.lmsys.org/).\n\nCheck out the [launch announcement](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) to learn more.\n\n#multimodal", + "capabilities": ["function-call", "structured-output", "image-recognition", "web-search"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.075 + } + }, + "ownedBy": "openai" + }, + { + "id": "imagen-4-0-generate-preview-05-20", + "description": "Google's latest raw image model", + "capabilities": ["image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "ownedBy": "google" + }, + { + "id": "jina-embeddings-v2-base-code", + "description": "Model optimized for code and document search, 768-dimensional, 137M parameters.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.05 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.05 + } + }, + "ownedBy": "jina" + }, + { + "id": "learnlm-1-5-pro-experimental", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + } + }, + "ownedBy": "google" + }, + { + "id": "llama-3-1-versatile", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "ownedBy": "meta" + }, + { + "id": "llama-3-1-sonar-small-128k", + "description": "On February 22, 2025, this model will be officially discontinued. The Perplexity AI official fine-tuned LLMA online interface is currently supported only at the api.aihubmix.com address.", + "capabilities": ["web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "ownedBy": "meta" + }, + { + "id": "llama-3-2-vision-preview", + "capabilities": ["image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "ownedBy": "meta" + }, + { + "id": "llama-3-2-preview", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "ownedBy": "meta" + }, + { + "id": "baichuan3-turbo", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.9 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.9 + } + }, + "ownedBy": "baichuan" + }, + { + "id": "baichuan3-turbo-128k", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3.8 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.8 + } + }, + "ownedBy": "baichuan" + }, + { + "id": "baichuan4", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 16 + }, + "output": { + "currency": "USD", + "perMillionTokens": 16 + } + }, + "ownedBy": "baichuan" + }, + { + "id": "baichuan4-air", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.16 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.16 + } + }, + "ownedBy": "baichuan" + }, + { + "id": "baichuan4-turbo", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.4 + } + }, + "ownedBy": "baichuan" + }, + { + "id": "doubao-1-5-lite-32k", + "description": "Doubao-1.5-lite, a brand-new generation of lightweight model, offers exceptional response speed with both performance and latency reaching world-class levels. It supports a 32k context window and an output length of up to 12k tokens.", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.05 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.1 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.01 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "doubao-1-5-pro-256k", + "description": "Doubao-1.5-pro-256k, a fully upgraded version based on Doubao-1.5-Pro, delivers an overall performance improvement of 10%. It supports inference with a 256k context window and an output length of up to 12k tokens. With higher performance, larger window size, and exceptional cost-effectiveness, it is suitable for a wider range of application scenarios.", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.8 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.44 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.8 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "doubao-1-5-vision-pro-32k", + "description": "Doubao-1.5-vision-pro is a newly upgraded multimodal large model that supports image recognition at any resolution and extreme aspect ratios. It enhances visual reasoning, document recognition, detailed information understanding, and instruction-following capabilities. It supports a 32k context window and an output length of up to 12k tokens.", + "capabilities": ["image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.46 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.38 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "doubao-lite-128k", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.14 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.28 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.14 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "doubao-lite-32k", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.06 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.12 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.012 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "doubao-lite-4k", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.06 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.12 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.06 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "doubao-pro-128k", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.8 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.44 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "doubao-pro-256k", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.8 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.44 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.8 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "doubao-pro-32k", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.14 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.35 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.028 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "doubao-pro-4k", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.14 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.35 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "minimax-text-01", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.14 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.12 + } + }, + "ownedBy": "minimax" + }, + { + "id": "qwen2-instruct", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen2-a14b-instruct", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.24 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.24 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "stable-diffusion-3-5-large", + "description": "Stable Diffusion 3.5 Large, developed by Stability AI, is a text-to-image generation model that supports high-quality image creation with excellent prompt responsiveness and customization, suitable for professional applications.", + "capabilities": ["image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4 + } + }, + "ownedBy": "stability" + }, + { + "id": "wizardcoder-python-v1-0", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.9 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.9 + } + } + }, + { + "id": "phi-3-medium-128k", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 18 + } + }, + "ownedBy": "microsoft" + }, + { + "id": "phi-3-medium-4k", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3 + } + }, + "ownedBy": "microsoft" + }, + { + "id": "phi-3-small-128k", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3 + } + }, + "ownedBy": "microsoft" + }, + { + "id": "cohere-command-r", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.64 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.92 + } + } + }, + { + "id": "llama-3-2-vision", + "capabilities": ["image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.4 + } + }, + "ownedBy": "meta" + }, + { + "id": "cerebras-llama-3-3", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + } + }, + { + "id": "chatglm_lite", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2858 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2858 + } + } + }, + { + "id": "chatglm_pro", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.4286 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.4286 + } + } + }, + { + "id": "chatglm_std", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7144 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.7144 + } + } + }, + { + "id": "chatglm_turbo", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7144 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.7144 + } + } + }, + { + "id": "claude-2", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 8.8 + }, + "output": { + "currency": "USD", + "perMillionTokens": 8.8 + } + }, + "ownedBy": "anthropic" + }, + { + "id": "claude-2-0", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 8.8 + }, + "output": { + "currency": "USD", + "perMillionTokens": 39.6 + } + }, + "ownedBy": "anthropic" + }, + { + "id": "claude-2-1", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 8.8 + }, + "output": { + "currency": "USD", + "perMillionTokens": 39.6 + } + }, + "ownedBy": "anthropic" + }, + { + "id": "claude-3-5-sonnet@20240620", + "capabilities": ["function-call", "image-recognition", "web-search", "computer-use"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 16.5 + } + }, + "ownedBy": "anthropic" + }, + { + "id": "claude-3-haiku-20240229", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.275 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.275 + } + }, + "ownedBy": "anthropic" + }, + { + "id": "claude-3-haiku@20240307", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.275 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.375 + } + }, + "ownedBy": "anthropic" + }, + { + "id": "claude-3-opus@20240229", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 16.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 82.5 + } + }, + "ownedBy": "anthropic" + }, + { + "id": "claude-instant-1", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.793 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.793 + } + }, + "ownedBy": "anthropic" + }, + { + "id": "claude-instant-1-2", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.88 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.96 + } + }, + "ownedBy": "anthropic" + }, + { + "id": "code-davinci-edit-001", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 20 + }, + "output": { + "currency": "USD", + "perMillionTokens": 20 + } + } + }, + { + "id": "cogview-3", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 35.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 35.5 + } + }, + "ownedBy": "zhipu" + }, + { + "id": "cogview-3-plus", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 10 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + }, + "ownedBy": "zhipu" + }, + { + "id": "command", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "ownedBy": "cohere" + }, + { + "id": "command-light", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "ownedBy": "cohere" + }, + { + "id": "command-light-nightly", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "ownedBy": "cohere" + }, + { + "id": "command-nightly", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "ownedBy": "cohere" + }, + { + "id": "dall-e-2", + "capabilities": ["image-generation"], + "inputModalities": ["text", "image"], + "outputModalities": ["image"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 16 + }, + "output": { + "currency": "USD", + "perMillionTokens": 16 + } + }, + "ownedBy": "openai" + }, + { + "id": "daocloud-deepseek-v3-2", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.22 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.33 + } + } + }, + { + "id": "davinci", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 20 + }, + "output": { + "currency": "USD", + "perMillionTokens": 20 + } + }, + "ownedBy": "openai" + }, + { + "id": "davinci-002", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "ownedBy": "openai" + }, + { + "id": "llama-3-3-instant-turbo", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.11 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.352 + } + }, + "ownedBy": "meta" + }, + { + "id": "deepseek-coder-v2-instruct", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.16 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.32 + } + }, + "ownedBy": "deepseek" + }, + { + "id": "deepseek-v2-chat", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.16 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.32 + } + }, + "ownedBy": "deepseek" + }, + { + "id": "deepseek-v2-5", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.16 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.32 + } + }, + "ownedBy": "deepseek" + }, + { + "id": "deepseek-llm-chat", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.16 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.16 + } + }, + "ownedBy": "deepseek" + }, + { + "id": "distil-whisper-large-v3-en", + "capabilities": ["audio-transcript"], + "inputModalities": ["audio"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5.556 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5.556 + } + } + }, + { + "id": "doubao-1-5-thinking-vision-pro-250428", + "description": "Deep Thinking \nImage Understanding \nVisual Localization \nVideo Understanding \nTool Invocation \nStructured Output", + "capabilities": ["reasoning", "image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "reasoning": { + "supportedEfforts": ["none", "high"] + }, + "ownedBy": "bytedance" + }, + { + "id": "deepseek-r1-distill-qianfan-llama", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.137 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.548 + } + }, + "ownedBy": "deepseek" + }, + { + "id": "doubao-1-5-pro-256k-250115", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.684 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2312 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "doubao-1-5-pro-32k-250115", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.108 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.27 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "gpt-4o-2024-08-06-global", + "capabilities": ["function-call", "image-recognition", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.25 + } + }, + "ownedBy": "openai" + }, + { + "id": "gpt-4o-mini-global", + "capabilities": ["function-call", "image-recognition", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.075 + } + }, + "ownedBy": "openai" + }, + { + "id": "llama-3", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 4.795 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4.795 + } + }, + "ownedBy": "meta" + }, + { + "id": "o3-global", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 8 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "ownedBy": "openai" + }, + { + "id": "o3-mini-global", + "capabilities": ["reasoning", "function-call", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.55 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "o3-pro-global", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 20 + }, + "output": { + "currency": "USD", + "perMillionTokens": 80 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "qianfan-chinese-llama-2", + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.822 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.822 + } + } + }, + { + "id": "qianfan-llama-vl", + "capabilities": ["image-recognition"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.274 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.685 + } + } + }, + { + "id": "qwen3-max-thinking", + "name": "Qwen: Qwen3 Max Thinking", + "description": "Qwen3-Max-Thinking is the flagship reasoning model in the Qwen3 series, designed for high-stakes cognitive tasks that require deep, multi-step reasoning. By significantly scaling model capacity and reinforcement learning compute, it delivers major gains in factual accuracy, complex reasoning, instruction following, alignment with human preferences, and agentic behavior.", + "capabilities": ["function-call", "structured-output", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 6 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "alibaba" + }, + { + "id": "free", + "name": "Free Models Router", + "description": "The simplest way to get free inference. openrouter/free is a router that selects free models at random from the models available on OpenRouter. The router smartly filters for models that support features needed for your request such as image understanding, tool calling, structured outputs and more. ", + "capabilities": ["function-call", "structured-output", "reasoning", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000 + }, + { + "id": "solar-pro-3", + "name": "Upstage: Solar Pro 3 (free)", + "description": "Solar Pro 3 is Upstage's powerful Mixture-of-Experts (MoE) language model. With 102B total parameters and 12B active parameters per forward pass, it delivers exceptional performance while maintaining computational efficiency. Optimized for Korean with English and Japanese support.", + "capabilities": ["function-call", "structured-output", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "ownedBy": "upstageai" + }, + { + "id": "minimax-m2-her", + "name": "MiniMax: MiniMax M2-her", + "description": "MiniMax M2-her is a dialogue-first large language model built for immersive roleplay, character-driven chat, and expressive multi-turn conversations. Designed to stay consistent in tone and personality, it supports rich message roles (user_system, group, sample_message_user, sample_message_ai) and can learn from example dialogue to better match the style and pacing of your scenario, making it a strong choice for storytelling, companions, and conversational experiences where natural flow and vivid interaction matter most.", + "capabilities": ["reasoning", "function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 65536, + "maxOutputTokens": 2048, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.03 + } + }, + "reasoning": { + "supportedEfforts": [], + "interleaved": true + }, + "ownedBy": "minimax" + }, + { + "id": "gpt-audio", + "name": "OpenAI: GPT Audio", + "description": "The gpt-audio model is OpenAI's first generally available audio model. The new snapshot features an upgraded decoder for more natural sounding voices and maintains better voice consistency. Audio is priced at $32 per million input tokens and $64 per million output tokens.", + "capabilities": ["structured-output", "audio-recognition", "audio-generation"], + "inputModalities": ["text", "audio"], + "outputModalities": ["text", "audio"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + }, + "ownedBy": "openai" + }, + { + "id": "gpt-audio-mini", + "name": "OpenAI: GPT Audio Mini", + "description": "A cost-efficient version of GPT Audio. The new snapshot features an upgraded decoder for more natural sounding voices and maintains better voice consistency. Input is priced at $0.60 per million tokens and output is priced at $2.40 per million tokens.", + "capabilities": ["structured-output", "audio-recognition", "audio-generation"], + "inputModalities": ["text", "audio"], + "outputModalities": ["text", "audio"], + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.4 + } + }, + "ownedBy": "openai" + }, + { + "id": "olmo-3-1-instruct", + "name": "AllenAI: Olmo 3.1 32B Instruct", + "description": "Olmo 3.1 32B Instruct is a large-scale, 32-billion-parameter instruction-tuned language model engineered for high-performance conversational AI, multi-turn dialogue, and practical instruction following. As part of the Olmo 3.1 family, this variant emphasizes responsiveness to complex user directions and robust chat interactions while retaining strong capabilities on reasoning and coding benchmarks. Developed by Ai2 under the Apache 2.0 license, Olmo 3.1 32B Instruct reflects the Olmo initiative’s commitment to openness and transparency.", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "ownedBy": "ai2" + }, + { + "id": "seed-1-6-flash", + "name": "ByteDance Seed: Seed 1.6 Flash", + "description": "Seed 1.6 Flash is an ultra-fast multimodal deep thinking model by ByteDance Seed, supporting both text and visual understanding. It features a 256k context window and can generate outputs of up to 16k tokens.", + "capabilities": ["function-call", "structured-output", "reasoning", "image-recognition", "video-recognition"], + "inputModalities": ["image", "text", "video"], + "outputModalities": ["text"], + "contextWindow": 262144, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.075 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "mistral-small-creative", + "name": "Mistral: Mistral Small Creative", + "description": "Mistral Small Creative is an experimental small model designed for creative writing, narrative generation, roleplay and character-driven dialogue, general-purpose instruction following, and conversational agents.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "ownedBy": "mistral" + }, + { + "id": "olmo-3-1-think", + "name": "AllenAI: Olmo 3.1 32B Think", + "description": "Olmo 3.1 32B Think is a large-scale, 32-billion-parameter model designed for deep reasoning, complex multi-step logic, and advanced instruction following. Building on the Olmo 3 series, version 3.1 delivers refined reasoning behavior and stronger performance across demanding evaluations and nuanced conversational tasks. Developed by Ai2 under the Apache 2.0 license, Olmo 3.1 32B Think continues the Olmo initiative’s commitment to openness, providing full transparency across model weights, code, and training methodology.", + "capabilities": ["structured-output", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 65536, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "ownedBy": "ai2" + }, + { + "id": "relace-search", + "name": "Relace: Relace Search", + "description": "The relace-search model uses 4-12 `view_file` and `grep` tools in parallel to explore a codebase and return relevant files to the user request. \n\nIn contrast to RAG, relace-search performs agentic multi-step reasoning to produce highly precise results 4x faster than any frontier model. It's designed to serve as a subagent that passes its findings to an \"oracle\" coding agent, who orchestrates/performs the rest of the coding task.\n\nTo use relace-search you need to build an appropriate agent harness, and parse the response for relevant information to hand off to the oracle. Read more about it in the [Relace documentation](https://docs.relace.ai/docs/fast-agentic-search/agent).", + "capabilities": ["function-call", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3 + } + }, + "ownedBy": "relace" + }, + { + "id": "bodybuilder", + "name": "Body Builder (beta)", + "description": "Transform your natural language requests into structured OpenRouter API request objects. Describe what you want to accomplish with AI models, and Body Builder will construct the appropriate API calls. Example: \"count to 10 using gemini and opus.\"\n\nThis is useful for creating multi-model requests, custom model routers, or programmatic generation of API calls from human descriptions.\n\n**BETA NOTICE**: Body Builder is in beta, and currently free. Pricing and functionality may change in the future.", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000 + }, + { + "id": "olmo-3-think", + "name": "AllenAI: Olmo 3 32B Think", + "description": "Olmo 3 32B Think is a large-scale, 32-billion-parameter model purpose-built for deep reasoning, complex logic chains and advanced instruction-following scenarios. Its capacity enables strong performance on demanding evaluation tasks and highly nuanced conversational reasoning. Developed by Ai2 under the Apache 2.0 license, Olmo 3 32B Think embodies the Olmo initiative’s commitment to openness, offering full transparency across weights, code and training methodology.", + "capabilities": ["structured-output", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 65536, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "ownedBy": "ai2" + }, + { + "id": "olmo-3-instruct", + "name": "AllenAI: Olmo 3 7B Instruct", + "description": "Olmo 3 7B Instruct is a supervised instruction-fine-tuned variant of the Olmo 3 7B base model, optimized for instruction-following, question-answering, and natural conversational dialogue. By leveraging high-quality instruction data and an open training pipeline, it delivers strong performance across everyday NLP tasks while remaining accessible and easy to integrate. Developed by Ai2 under the Apache 2.0 license, the model offers a transparent, community-friendly option for instruction-driven applications.", + "capabilities": ["structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 65536, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + } + }, + "ownedBy": "ai2" + }, + { + "id": "cogito-v2-1", + "name": "Deep Cogito: Cogito v2.1 671B", + "description": "Cogito v2.1 671B MoE represents one of the strongest open models globally, matching performance of frontier closed and open models. This model is trained using self play with reinforcement learning to reach state-of-the-art performance on multiple categories (instruction following, coding, longer queries and creative writing). This advanced system demonstrates significant progress toward scalable superintelligence through policy improvement.", + "capabilities": ["structured-output", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.25 + } + }, + "ownedBy": "cogito" + }, + { + "id": "nova-premier-v1", + "name": "Amazon: Nova Premier 1.0", + "description": "Amazon Nova Premier is the most capable of Amazon’s multimodal models for complex reasoning tasks and for use as the best teacher for distilling custom models.", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 1000000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 12.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.625 + } + }, + "ownedBy": "amazon" + }, + { + "id": "sonar-pro-search", + "name": "Perplexity: Sonar Pro Search", + "description": "Exclusively available on the OpenRouter API, Sonar Pro's new Pro Search mode is Perplexity's most advanced agentic search system. It is designed for deeper reasoning and analysis. Pricing is based on tokens plus $18 per thousand requests. This model powers the Pro Search mode on the Perplexity platform.\n\nSonar Pro Search adds autonomous, multi-step reasoning to Sonar Pro. So, instead of just one query + synthesis, it plans and executes entire research workflows using tools.", + "capabilities": ["structured-output", "reasoning", "web-search", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 8000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + }, + "ownedBy": "perplexity" + }, + { + "id": "lfm2-a1b", + "name": "LiquidAI: LFM2-8B-A1B", + "description": "LFM2-8B-A1B is an efficient on-device Mixture-of-Experts (MoE) model from Liquid AI’s LFM2 family, built for fast, high-quality inference on edge hardware. It uses 8.3B total parameters with only ~1.5B active per token, delivering strong performance while keeping compute and memory usage low—making it ideal for phones, tablets, and laptops.", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.01 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.02 + } + }, + "ownedBy": "liquidai" + }, + { + "id": "lfm-2-2", + "name": "LiquidAI: LFM2-2.6B", + "description": "LFM2 is a new generation of hybrid models developed by Liquid AI, specifically designed for edge AI and on-device deployment. It sets a new standard in terms of quality, speed, and memory efficiency.", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.01 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.02 + } + }, + "ownedBy": "liquidai" + }, + { + "id": "gpt-5-image-mini", + "name": "OpenAI: GPT-5 Image Mini", + "description": "GPT-5 Image Mini combines OpenAI's advanced language capabilities, powered by [GPT-5 Mini](https://openrouter.ai/openai/gpt-5-mini), with GPT Image 1 Mini for efficient image generation. This natively multimodal model features superior instruction following, text rendering, and detailed image editing with reduced latency and cost. It excels at high-quality visual creation while maintaining strong text understanding, making it ideal for applications that require both efficient image generation and text processing at scale.", + "capabilities": [ + "function-call", + "structured-output", + "reasoning", + "web-search", + "image-recognition", + "image-generation" + ], + "inputModalities": ["image", "text"], + "outputModalities": ["image", "text"], + "contextWindow": 400000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.25 + } + }, + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "qwen3-vl-thinking", + "name": "Qwen: Qwen3 VL 8B Thinking", + "description": "Qwen3-VL-8B-Thinking is the reasoning-optimized variant of the Qwen3-VL-8B multimodal model, designed for advanced visual and textual reasoning across complex scenes, documents, and temporal sequences. It integrates enhanced multimodal alignment and long-context processing (native 256K, expandable to 1M tokens) for tasks such as scientific visual analysis, causal inference, and mathematical reasoning over image or video inputs.\n\nCompared to the Instruct edition, the Thinking version introduces deeper visual-language fusion and deliberate reasoning pathways that improve performance on long-chain logic tasks, STEM problem-solving, and multi-step video understanding. It achieves stronger temporal grounding via Interleaved-MRoPE and timestamp-aware embeddings, while maintaining robust OCR, multilingual comprehension, and text generation on par with large text-only LLMs.", + "capabilities": ["function-call", "structured-output", "reasoning", "image-recognition"], + "inputModalities": ["image", "text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.117 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.365 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "ernie-4-5-a3b-thinking", + "name": "Baidu: ERNIE 4.5 21B A3B Thinking", + "description": "ERNIE-4.5-21B-A3B-Thinking is Baidu's upgraded lightweight MoE model, refined to boost reasoning depth and quality for top-tier performance in logical puzzles, math, science, coding, text generation, and expert-level academic benchmarks.", + "capabilities": ["reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.07 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.28 + } + }, + "ownedBy": "baidu" + }, + { + "id": "qwen3-vl-a3b-thinking", + "name": "Qwen: Qwen3 VL 30B A3B Thinking", + "description": "Qwen3-VL-30B-A3B-Thinking is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Thinking variant enhances reasoning in STEM, math, and complex tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", + "capabilities": ["function-call", "structured-output", "reasoning", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 32768, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "cydonia-v4-1", + "name": "TheDrummer: Cydonia 24B V4.1", + "description": "Uncensored and creative writing model based on Mistral Small 3.2 24B with good recall, prompt adherence, and intelligence.", + "capabilities": ["structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + } + } + }, + { + "id": "relace-apply-3", + "name": "Relace: Relace Apply 3", + "description": "Relace Apply 3 is a specialized code-patching LLM that merges AI-suggested edits straight into your source files. It can apply updates from GPT-4o, Claude, and others into your files at 10,000 tokens/sec on average.\n\nThe model requires the prompt to be in the following format: \n{instruction}\n{initial_code}\n{edit_snippet}\n\nZero Data Retention is enabled for Relace. Learn more about this model in their [documentation](https://docs.relace.ai/api-reference/instant-apply/apply)", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.85 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.25 + } + }, + "ownedBy": "relace" + }, + { + "id": "qwen3-vl-a22b-thinking", + "name": "Qwen: Qwen3 VL 235B A22B Thinking", + "description": "Qwen3-VL-235B-A22B Thinking is a multimodal model that unifies strong text generation with visual understanding across images and video. The Thinking model is optimized for multimodal reasoning in STEM and math. The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows, turning sketches or mockups into code and assisting with UI debugging, while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", + "capabilities": ["function-call", "structured-output", "reasoning", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 32768, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "tongyi-deepresearch-a3b", + "name": "Tongyi DeepResearch 30B A3B", + "description": "Tongyi DeepResearch is an agentic large language model developed by Tongyi Lab, with 30 billion total parameters activating only 3 billion per token. It's optimized for long-horizon, deep information-seeking tasks and delivers state-of-the-art performance on benchmarks like Humanity's Last Exam, BrowserComp, BrowserComp-ZH, WebWalkerQA, GAIA, xbench-DeepSearch, and FRAMES. This makes it superior for complex agentic search, reasoning, and multi-step problem-solving compared to prior models.\n\nThe model includes a fully automated synthetic data pipeline for scalable pre-training, fine-tuning, and reinforcement learning. It uses large-scale continual pre-training on diverse agentic data to boost reasoning and stay fresh. It also features end-to-end on-policy RL with a customized Group Relative Policy Optimization, including token-level gradients and negative sample filtering for stable training. The model supports ReAct for core ability checks and an IterResearch-based 'Heavy' mode for max performance through test-time scaling. It's ideal for advanced research agents, tool use, and heavy inference workflows.", + "capabilities": ["function-call", "structured-output", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.44999999999999996 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.09 + } + } + }, + { + "id": "qwen3-next-a3b-thinking", + "name": "Qwen: Qwen3 Next 80B A3B Thinking", + "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\n\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.", + "capabilities": ["function-call", "structured-output", "reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "jamba-large-1-7", + "name": "AI21: Jamba Large 1.7", + "description": "Jamba Large 1.7 is the latest model in the Jamba open family, offering improvements in grounding, instruction-following, and overall efficiency. Built on a hybrid SSM-Transformer architecture with a 256K context window, it delivers more accurate, contextually grounded responses and better steerability than previous versions.", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 256000, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 8 + } + }, + "ownedBy": "ai21" + }, + { + "id": "ui-tars-1-5", + "name": "ByteDance: UI-TARS 7B ", + "description": "UI-TARS-1.5 is a multimodal vision-language agent optimized for GUI-based environments, including desktop interfaces, web browsers, mobile systems, and games. Built by ByteDance, it builds upon the UI-TARS framework with reinforcement learning-based reasoning, enabling robust action planning and execution across virtual interfaces.\n\nThis model achieves state-of-the-art results on a range of interactive and grounding benchmarks, including OSworld, WebVoyager, AndroidWorld, and ScreenSpot. It also demonstrates perfect task completion across diverse Poki games and outperforms prior models in Minecraft agent tasks. UI-TARS-1.5 supports thought decomposition during inference and shows strong scaling across variants, with the 1.5 version notably exceeding the performance of earlier 72B and 7B checkpoints.", + "capabilities": ["image-recognition"], + "inputModalities": ["image", "text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 2048, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "devstral-medium", + "name": "Mistral: Devstral Medium", + "description": "Devstral Medium is a high-performance code generation and agentic reasoning model developed jointly by Mistral AI and All Hands AI. Positioned as a step up from Devstral Small, it achieves 61.6% on SWE-Bench Verified, placing it ahead of Gemini 2.5 Pro and GPT-4.1 in code-related tasks, at a fraction of the cost. It is designed for generalization across prompt styles and tool use in code agents and frameworks.\n\nDevstral Medium is available via API only (not open-weight), and supports enterprise deployment on private infrastructure, with optional fine-tuning capabilities.", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "ownedBy": "mistral" + }, + { + "id": "spotlight", + "name": "Arcee AI: Spotlight", + "description": "Spotlight is a 7‑billion‑parameter vision‑language model derived from Qwen 2.5‑VL and fine‑tuned by Arcee AI for tight image‑text grounding tasks. It offers a 32 k‑token context window, enabling rich multimodal conversations that combine lengthy documents with one or more images. Training emphasized fast inference on consumer GPUs while retaining strong captioning, visual‐question‑answering, and diagram‑analysis accuracy. As a result, Spotlight slots neatly into agent workflows where screenshots, charts or UI mock‑ups need to be interpreted on the fly. Early benchmarks show it matching or out‑scoring larger VLMs such as LLaVA‑1.6 13 B on popular VQA and POPE alignment tests. ", + "capabilities": ["image-recognition"], + "inputModalities": ["image", "text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 65537, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.18 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.18 + } + }, + "ownedBy": "arceeai" + }, + { + "id": "maestro-reasoning", + "name": "Arcee AI: Maestro Reasoning", + "description": "Maestro Reasoning is Arcee's flagship analysis model: a 32 B‑parameter derivative of Qwen 2.5‑32 B tuned with DPO and chain‑of‑thought RL for step‑by‑step logic. Compared to the earlier 7 B preview, the production 32 B release widens the context window to 128 k tokens and doubles pass‑rate on MATH and GSM‑8K, while also lifting code completion accuracy. Its instruction style encourages structured \"thought → answer\" traces that can be parsed or hidden according to user preference. That transparency pairs well with audit‑focused industries like finance or healthcare where seeing the reasoning path matters. In Arcee Conductor, Maestro is automatically selected for complex, multi‑constraint queries that smaller SLMs bounce. ", + "capabilities": ["reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.8999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.3000000000000003 + } + } + }, + { + "id": "virtuoso-large", + "name": "Arcee AI: Virtuoso Large", + "description": "Virtuoso‑Large is Arcee's top‑tier general‑purpose LLM at 72 B parameters, tuned to tackle cross‑domain reasoning, creative writing and enterprise QA. Unlike many 70 B peers, it retains the 128 k context inherited from Qwen 2.5, letting it ingest books, codebases or financial filings wholesale. Training blended DeepSeek R1 distillation, multi‑epoch supervised fine‑tuning and a final DPO/RLHF alignment stage, yielding strong performance on BIG‑Bench‑Hard, GSM‑8K and long‑context Needle‑In‑Haystack tests. Enterprises use Virtuoso‑Large as the \"fallback\" brain in Conductor pipelines when other SLMs flag low confidence. Despite its size, aggressive KV‑cache optimizations keep first‑token latency in the low‑second range on 8× H100 nodes, making it a practical production‑grade powerhouse.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.75 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + }, + "ownedBy": "arceeai" + }, + { + "id": "coder-large", + "name": "Arcee AI: Coder Large", + "description": "Coder‑Large is a 32 B‑parameter offspring of Qwen 2.5‑Instruct that has been further trained on permissively‑licensed GitHub, CodeSearchNet and synthetic bug‑fix corpora. It supports a 32k context window, enabling multi‑file refactoring or long diff review in a single call, and understands 30‑plus programming languages with special attention to TypeScript, Go and Terraform. Internal benchmarks show 5–8 pt gains over CodeLlama‑34 B‑Python on HumanEval and competitive BugFix scores thanks to a reinforcement pass that rewards compilable output. The model emits structured explanations alongside code blocks by default, making it suitable for educational tooling as well as production copilot scenarios. Cost‑wise, Together AI prices it well below proprietary incumbents, so teams can scale interactive coding without runaway spend. ", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.7999999999999999 + } + }, + "ownedBy": "arceeai" + }, + { + "id": "o4-mini-high", + "name": "OpenAI: o4 Mini High", + "description": "OpenAI o4-mini-high is the same model as [o4-mini](/openai/o4-mini) with reasoning_effort set to high. \n\nOpenAI o4-mini is a compact reasoning model in the o-series, optimized for fast, cost-efficient performance while retaining strong multimodal and agentic capabilities. It supports tool use and demonstrates competitive reasoning and coding performance across benchmarks like AIME (99.5% with Python) and SWE-bench, outperforming its predecessor o3-mini and even approaching o3 in some domains.\n\nDespite its smaller size, o4-mini exhibits high accuracy in STEM tasks, visual problem solving (e.g., MathVista, MMMU), and code editing. It is especially well-suited for high-throughput scenarios where latency or cost is critical. Thanks to its efficient architecture and refined reinforcement learning training, o4-mini can chain tools, generate structured outputs, and solve multi-step tasks with minimal delay—often in under a minute.", + "capabilities": ["function-call", "structured-output", "reasoning", "web-search", "image-recognition"], + "inputModalities": ["image", "text"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 100000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.275 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "llemma_7b", + "name": "EleutherAI: Llemma 7b", + "description": "Llemma 7B is a language model for mathematics. It was initialized with Code Llama 7B weights, and trained on the Proof-Pile-2 for 200B tokens. Llemma models are particularly strong at chain-of-thought mathematical reasoning and using computational tools for mathematics, such as Python and formal theorem provers.", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 4096, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + } + }, + { + "id": "codellama-instruct-solidity", + "name": "AlfredPros: CodeLLaMa 7B Instruct Solidity", + "description": "A finetuned 7 billion parameters Code LLaMA - Instruct model to generate Solidity smart contract using 4-bit QLoRA finetuning provided by PEFT library.", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 4096, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + } + }, + { + "id": "olmo-2-0325-instruct", + "name": "AllenAI: Olmo 2 32B Instruct", + "description": "OLMo-2 32B Instruct is a supervised instruction-finetuned variant of the OLMo-2 32B March 2025 base model. It excels in complex reasoning and instruction-following tasks across diverse benchmarks such as GSM8K, MATH, IFEval, and general NLP evaluation. Developed by AI2, OLMo-2 32B is part of an open, research-oriented initiative, trained primarily on English-language datasets to advance the understanding and development of open-source language models.", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + } + }, + "ownedBy": "ai2" + }, + { + "id": "skyfall-v2", + "name": "TheDrummer: Skyfall 36B V2", + "description": "Skyfall 36B v2 is an enhanced iteration of Mistral Small 2501, specifically fine-tuned for improved creativity, nuanced writing, role-playing, and coherent storytelling.", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.55 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.7999999999999999 + } + } + }, + { + "id": "o3-mini-high", + "name": "OpenAI: o3 Mini High", + "description": "OpenAI o3-mini-high is the same model as [o3-mini](/openai/o3-mini) with reasoning_effort set to high. \n\no3-mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and coding. The model features three adjustable reasoning effort levels and supports key developer capabilities including function calling, structured outputs, and streaming, though it does not include vision processing capabilities.\n\nThe model demonstrates significant improvements over its predecessor, with expert testers preferring its responses 56% of the time and noting a 39% reduction in major errors on complex questions. With medium reasoning effort settings, o3-mini matches the performance of the larger o1 model on challenging reasoning evaluations like AIME and GPQA, while maintaining lower latency and cost.", + "capabilities": ["function-call", "structured-output", "reasoning", "web-search"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 200000, + "maxOutputTokens": 100000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.55 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "aion-1-0", + "name": "AionLabs: Aion-1.0", + "description": "Aion-1.0 is a multi-model system designed for high performance across various tasks, including reasoning and coding. It is built on DeepSeek-R1, augmented with additional models and techniques such as Tree of Thoughts (ToT) and Mixture of Experts (MoE). It is Aion Lab's most powerful reasoning model.", + "capabilities": ["reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 8 + } + }, + "ownedBy": "aion" + }, + { + "id": "aion-1-0-mini", + "name": "AionLabs: Aion-1.0-Mini", + "description": "Aion-1.0-Mini 32B parameter model is a distilled version of the DeepSeek-R1 model, designed for strong performance in reasoning domains such as mathematics, coding, and logic. It is a modified variant of a FuseAI model that outperforms R1-Distill-Qwen-32B and R1-Distill-Llama-70B, with benchmark results available on its [Hugging Face page](https://huggingface.co/FuseAI/FuseO1-DeepSeekR1-QwQ-SkyT1-32B-Preview), independently replicated for verification.", + "capabilities": ["reasoning"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.4 + } + }, + "ownedBy": "aion" + }, + { + "id": "aion-rp-llama-3-1", + "name": "AionLabs: Aion-RP 1.0 (8B)", + "description": "Aion-RP-Llama-3.1-8B ranks the highest in the character evaluation portion of the RPBench-Auto benchmark, a roleplaying-specific variant of Arena-Hard-Auto, where LLMs evaluate each other’s responses. It is a fine-tuned base model rather than an instruct model, designed to produce more natural and varied writing.", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5999999999999999 + } + }, + "ownedBy": "aion" + }, + { + "id": "l3-1-hanami-x1", + "name": "Sao10K: Llama 3.1 70B Hanami x1", + "description": "This is [Sao10K](/sao10k)'s experiment over [Euryale v2.2](/sao10k/l3.1-euryale-70b).", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 16000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3 + } + } + }, + { + "id": "l3-3-euryale", + "name": "Sao10K: Llama 3.3 Euryale 70B", + "description": "Euryale L3.3 70B is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.2](/models/sao10k/l3-euryale-70b).", + "capabilities": ["structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 131072, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.65 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.75 + } + } + }, + { + "id": "nova-lite-v1", + "name": "Amazon: Nova Lite 1.0", + "description": "Amazon Nova Lite 1.0 is a very low-cost multimodal model from Amazon that focused on fast processing of image, video, and text inputs to generate text output. Amazon Nova Lite can handle real-time customer interactions, document analysis, and visual question-answering tasks with high accuracy.\n\nWith an input context of 300K tokens, it can analyze multiple images or up to 30 minutes of video in a single input.", + "capabilities": ["function-call", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 300000, + "maxOutputTokens": 5120, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.06 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.24 + } + }, + "ownedBy": "amazon" + }, + { + "id": "nova-micro-v1", + "name": "Amazon: Nova Micro 1.0", + "description": "Amazon Nova Micro 1.0 is a text-only model that delivers the lowest latency responses in the Amazon Nova family of models at a very low cost. With a context length of 128K tokens and optimized for speed and cost, Amazon Nova Micro excels at tasks such as text summarization, translation, content classification, interactive chat, and brainstorming. It has simple mathematical reasoning and coding abilities.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 128000, + "maxOutputTokens": 5120, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.035 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.14 + } + }, + "ownedBy": "amazon" + }, + { + "id": "pixtral-large-2411", + "name": "Mistral: Pixtral Large 2411", + "description": "Pixtral Large is a 124B parameter, open-weight, multimodal model built on top of [Mistral Large 2](/mistralai/mistral-large-2411). The model is able to understand documents, charts and natural images.\n\nThe model is available under the Mistral Research License (MRL) for research and educational use, and the Mistral Commercial License for experimentation, testing, and production for commercial purposes.\n\n", + "capabilities": ["function-call", "structured-output", "image-recognition"], + "inputModalities": ["text", "image"], + "outputModalities": ["text"], + "contextWindow": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 6 + } + }, + "ownedBy": "mistral" + }, + { + "id": "sorcererlm-8x22b", + "name": "SorcererLM 8x22B", + "description": "SorcererLM is an advanced RP and storytelling model, built as a Low-rank 16-bit LoRA fine-tuned on [WizardLM-2 8x22B](/microsoft/wizardlm-2-8x22b).\n\n- Advanced reasoning and emotional intelligence for engaging and immersive interactions\n- Vivid writing capabilities enriched with spatial and contextual awareness\n- Enhanced narrative depth, promoting creative and dynamic storytelling", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 16000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 4.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4.5 + } + } + }, + { + "id": "unslopnemo", + "name": "TheDrummer: UnslopNemo 12B", + "description": "UnslopNemo v4.1 is the latest addition from the creator of Rocinante, designed for adventure writing and role-play scenarios.", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + } + } + }, + { + "id": "magnum-v4", + "name": "Magnum v4 72B", + "description": "This is a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet(https://openrouter.ai/anthropic/claude-3.5-sonnet) and Opus(https://openrouter.ai/anthropic/claude-3-opus).\n\nThe model is fine-tuned on top of [Qwen2.5 72B](https://openrouter.ai/qwen/qwen-2.5-72b-instruct).", + "capabilities": ["structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 16384, + "maxOutputTokens": 2048, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + } + } + }, + { + "id": "inflection-3-pi", + "name": "Inflection: Inflection 3 Pi", + "description": "Inflection 3 Pi powers Inflection's [Pi](https://pi.ai) chatbot, including backstory, emotional intelligence, productivity, and safety. It has access to recent news, and excels in scenarios like customer support and roleplay.\n\nPi has been trained to mirror your tone and style, if you use more emojis, so will Pi! Try experimenting with various prompts and conversation styles.", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8000, + "maxOutputTokens": 1024, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + }, + "ownedBy": "inflection" + }, + { + "id": "inflection-3-productivity", + "name": "Inflection: Inflection 3 Productivity", + "description": "Inflection 3 Productivity is optimized for following instructions. It is better for tasks requiring JSON output or precise adherence to provided guidelines. It has access to recent news.\n\nFor emotional intelligence similar to Pi, see [Inflect 3 Pi](/inflection/inflection-3-pi)\n\nSee [Inflection's announcement](https://inflection.ai/blog/enterprise) for more details.", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8000, + "maxOutputTokens": 1024, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + }, + "ownedBy": "inflection" + }, + { + "id": "rocinante", + "name": "TheDrummer: Rocinante 12B", + "description": "Rocinante 12B is designed for engaging storytelling and rich prose.\n\nEarly testers have reported:\n- Expanded vocabulary with unique and expressive word choices\n- Enhanced creativity for vivid narratives\n- Adventure-filled and captivating stories", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.16999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.43 + } + } + }, + { + "id": "llama-3-1-lumimaid", + "name": "NeverSleep: Lumimaid v0.2 8B", + "description": "Lumimaid v0.2 8B is a finetune of [Llama 3.1 8B](/models/meta-llama/llama-3.1-8b-instruct) with a \"HUGE step up dataset wise\" compared to Lumimaid v0.1. Sloppy chats output were purged.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "capabilities": ["structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "ownedBy": "meta" + }, + { + "id": "l3-1-euryale", + "name": "Sao10K: Llama 3.1 Euryale 70B v2.2", + "description": "Euryale L3.1 70B v2.2 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.1](/models/sao10k/l3-euryale-70b).", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.65 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.75 + } + } + }, + { + "id": "l3-euryale", + "name": "Sao10k: Llama 3 Euryale 70B v2.1", + "description": "Euryale 70B v2.1 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k).\n\n- Better prompt adherence.\n- Better anatomy / spatial awareness.\n- Adapts much better to unique and custom formatting / reply formats.\n- Very creative, lots of unique swipes.\n- Is not restrictive during roleplays.", + "capabilities": ["function-call"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "maxOutputTokens": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.48 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.48 + } + } + }, + { + "id": "llama-guard-2", + "name": "Meta: LlamaGuard 2 8B", + "description": "This safeguard model has 8B parameters and is based on the Llama 3 family. Just like is predecessor, [LlamaGuard 1](https://huggingface.co/meta-llama/LlamaGuard-7b), it can do both prompt and response classification.\n\nLlamaGuard 2 acts as a normal LLM would, generating text that indicates whether the given input/output is safe/unsafe. If deemed unsafe, it will also share the content categories violated.\n\nFor best results, please use raw prompt input or the `/completions` endpoint, instead of the chat API.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + } + }, + "ownedBy": "meta" + }, + { + "id": "mistral-instruct-v0-2", + "name": "Mistral: Mistral 7B Instruct v0.2", + "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct](/modelsmistralai/mistral-7b-instruct-v0.1), with the following changes:\n\n- 32k context window (vs 8k context in v0.1)\n- Rope-theta = 1e6\n- No Sliding-Window Attention", + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + } + }, + "ownedBy": "mistral" + }, + { + "id": "mixtral-8x7b-instruct", + "name": "Mistral: Mixtral 8x7B Instruct", + "description": "Mixtral 8x7B Instruct is a pretrained generative Sparse Mixture of Experts, by Mistral AI, for chat and instruction use. Incorporates 8 experts (feed-forward networks) for a total of 47 billion parameters.\n\nInstruct model fine-tuned by Mistral. #moe", + "capabilities": ["function-call", "structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 32768, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.54 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.54 + } + }, + "ownedBy": "mistral" + }, + { + "id": "noromaid", + "name": "Noromaid 20B", + "description": "A collab between IkariDev and Undi. This merge is suitable for RP, ERP, and general knowledge.\n\n#merge #uncensored", + "capabilities": ["structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 4096, + "maxOutputTokens": 2048, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.75 + } + } + }, + { + "id": "goliath", + "name": "Goliath 120B", + "description": "A large LLM created by combining two fine-tuned Llama 70B models into one 120B model. Combines Xwin and Euryale.\n\nCredits to\n- [@chargoddard](https://huggingface.co/chargoddard) for developing the framework used to merge the model - [mergekit](https://github.com/cg123/mergekit).\n- [@Undi95](https://huggingface.co/Undi95) for helping with the merge ratios.\n\n#merge", + "capabilities": ["structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 6144, + "maxOutputTokens": 1024, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3.75 + }, + "output": { + "currency": "USD", + "perMillionTokens": 7.5 + } + } + }, + { + "id": "weaver", + "name": "Mancer: Weaver (alpha)", + "description": "An attempt to recreate Claude-style verbosity, but don't expect the same level of coherence or memory. Meant for use in roleplay/narrative situations.", + "capabilities": ["structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8000, + "maxOutputTokens": 2000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.75 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1 + } + } + }, + { + "id": "remm-slerp-l2", + "name": "ReMM SLERP 13B", + "description": "A recreation trial of the original MythoMax-L2-B13 but with updated models. #merge", + "capabilities": ["structured-output"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 6144, + "maxOutputTokens": 4096, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.44999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.65 + } + } + }, + { + "id": "gte-base", + "name": "Thenlper: GTE-Base", + "description": "The gte-base embedding model encodes English sentences and paragraphs into a 768-dimensional dense vector space, delivering efficient and effective semantic embeddings optimized for textual similarity, semantic search, and clustering applications.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 512, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.005 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "gte-large", + "name": "Thenlper: GTE-Large", + "description": "The gte-large embedding model converts English sentences, paragraphs and moderate-length documents into a 1024-dimensional dense vector space, delivering high-quality semantic embeddings optimized for information retrieval, semantic textual similarity, reranking and clustering tasks. Trained via multi-stage contrastive learning on a large domain-diverse relevance corpus, it offers excellent performance across general-purpose embedding use-cases.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 512, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.01 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "e5-large-v2", + "name": "Intfloat: E5-Large-v2", + "description": "The e5-large-v2 embedding model maps English sentences, paragraphs, and documents into a 1024-dimensional dense vector space, delivering high-accuracy semantic embeddings optimized for retrieval, semantic search, reranking, and similarity-scoring tasks.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 512, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.01 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "id": "e5-base-v2", + "name": "Intfloat: E5-Base-v2", + "description": "The e5-base-v2 embedding model encodes English sentences and paragraphs into a 768-dimensional dense vector space, producing efficient and high-quality semantic embeddings optimized for tasks such as semantic search, similarity scoring, retrieval and clustering.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 512, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.005 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "id": "paraphrase-minilm-l6-v2", + "name": "Sentence Transformers: paraphrase-MiniLM-L6-v2", + "description": "The paraphrase-MiniLM-L6-v2 embedding model converts sentences and short paragraphs into a 384-dimensional dense vector space, producing high-quality semantic embeddings optimized for paraphrase detection, semantic similarity scoring, clustering, and lightweight retrieval tasks.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 512, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.005 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "id": "all-minilm-l12-v2", + "name": "Sentence Transformers: all-MiniLM-L12-v2", + "description": "The all-MiniLM-L12-v2 embedding model maps sentences and short paragraphs into a 384-dimensional dense vector space, producing efficient and high-quality semantic embeddings optimized for tasks such as semantic search, clustering, and similarity-scoring.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 512, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.005 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "id": "multi-qa-mpnet-base-dot-v1", + "name": "Sentence Transformers: multi-qa-mpnet-base-dot-v1", + "description": "The multi-qa-mpnet-base-dot-v1 embedding model transforms sentences and short paragraphs into a 768-dimensional dense vector space, generating high-quality semantic embeddings optimized for question-and-answer retrieval, semantic search, and similarity-scoring across diverse content.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 512, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.005 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "id": "all-mpnet-base-v2", + "name": "Sentence Transformers: all-mpnet-base-v2", + "description": "The all-mpnet-base-v2 embedding model encodes sentences and short paragraphs into a 768-dimensional dense vector space, providing high-fidelity semantic embeddings well suited for tasks like information retrieval, clustering, similarity scoring, and text ranking.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 512, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.005 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "id": "all-minilm-l6-v2", + "name": "Sentence Transformers: all-MiniLM-L6-v2", + "description": "The all-MiniLM-L6-v2 embedding model maps sentences and short paragraphs into a 384-dimensional dense vector space, enabling high-quality semantic representations that are ideal for downstream tasks such as information retrieval, clustering, similarity scoring, and text ranking.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 512, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.005 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "id": "mistral-embed-2312", + "name": "Mistral: Mistral Embed 2312", + "description": "Mistral Embed is a specialized embedding model for text data, optimized for semantic search and RAG applications. Developed by Mistral AI in late 2023, it produces 1024-dimensional vectors that effectively capture semantic relationships in text.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "mistral" + }, + { + "id": "codestral-embed-2505", + "name": "Mistral: Codestral Embed 2505", + "description": "Mistral Codestral Embed is specially designed for code, perfect for embedding code databases, repositories, and powering coding assistants with state-of-the-art retrieval.", + "capabilities": ["embedding"], + "inputModalities": ["text"], + "outputModalities": ["text"], + "contextWindow": 8192, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "mistral" + }, + { + "id": "kolors", + "name": "kolors", + "ownedBy": "kolors" + }, + { + "id": "kat-coder-air-v1", + "name": "kat-coder-air-v1", + "ownedBy": "streamlake" + }, + { + "id": "kat-coder-exp-1010", + "name": "kat-coder-exp-1010", + "ownedBy": "streamlake" + }, + { + "id": "minimax-m2-1--lightning", + "name": "minimax-m2-1--lightning", + "capabilities": ["reasoning", "function-call"], + "reasoning": { + "supportedEfforts": [], + "interleaved": true + }, + "ownedBy": "minimax" + }, + { + "id": "qwen-image-edit-2509", + "name": "qwen-image-edit-2509", + "capabilities": ["function-call", "image-generation"], + "ownedBy": "alibaba" + }, + { + "id": "wan2-2-i2v-a14b", + "name": "wan2-2-i2v-a14b", + "capabilities": ["video-generation"], + "ownedBy": "alibaba" + }, + { + "id": "wan2-2-t2v-a14b", + "name": "wan2-2-t2v-a14b", + "capabilities": ["video-generation"], + "ownedBy": "alibaba" + }, + { + "id": "telespeechasr", + "name": "telespeechasr", + "ownedBy": "silicon" + }, + { + "id": "moss-ttsd-v0-5", + "name": "moss-ttsd-v0-5", + "ownedBy": "silicon" + }, + { + "id": "cosyvoice2", + "name": "cosyvoice2", + "ownedBy": "silicon" + }, + { + "id": "sensevoicesmall", + "name": "sensevoicesmall", + "ownedBy": "silicon" + }, + { + "id": "indextts-2", + "name": "indextts-2", + "capabilities": ["audio-generation"], + "ownedBy": "silicon" + }, + { + "id": "bce-embedding-base_v1", + "name": "bce-embedding-base_v1", + "capabilities": ["embedding"], + "ownedBy": "silicon" + }, + { + "id": "bce-reranker-base_v1", + "name": "bce-reranker-base_v1", + "capabilities": ["rerank"], + "ownedBy": "silicon" + }, + { + "id": "internlm2_5-chat", + "name": "internlm2_5-chat", + "ownedBy": "intern" + }, + { + "id": "glm-4-chat", + "name": "glm-4-chat", + "capabilities": ["function-call"], + "ownedBy": "zhipu" + }, + { + "id": "flux.1-schnell", + "name": "flux.1-schnell", + "capabilities": ["image-generation"], + "ownedBy": "bfl" + }, + { + "id": "fish-speech-1-4", + "name": "fish-speech-1-4", + "ownedBy": "silicon" + }, + { + "id": "gpt-sovits", + "name": "gpt-sovits", + "ownedBy": "openai" + }, + { + "id": "fish-speech-1-5", + "name": "fish-speech-1-5", + "ownedBy": "silicon" + }, + { + "id": "flux.1-pro", + "name": "flux.1-pro", + "capabilities": ["image-generation"], + "ownedBy": "bfl" + }, + { + "id": "seed-rice", + "name": "seed-rice", + "ownedBy": "bytedance" + }, + { + "id": "abab5-5-chat", + "name": "abab5-5-chat", + "ownedBy": "minimax" + }, + { + "id": "abab5-5s-chat", + "name": "abab5-5s-chat", + "ownedBy": "minimax" + }, + { + "id": "abab6-5g-chat", + "name": "abab6-5g-chat", + "ownedBy": "minimax" + }, + { + "id": "abab6-5s-chat", + "name": "abab6-5s-chat", + "ownedBy": "minimax" + }, + { + "id": "abab6-5t-chat", + "name": "abab6-5t-chat", + "ownedBy": "minimax" + }, + { + "id": "charglm-3", + "name": "charglm-3", + "ownedBy": "ocoolai" + }, + { + "id": "charglm-4", + "name": "charglm-4", + "ownedBy": "ocoolai" + }, + { + "id": "chatglm-pro", + "name": "chatglm-pro", + "ownedBy": "ocoolai" + }, + { + "id": "chirp-v3-0", + "name": "chirp-v3-0", + "ownedBy": "ocoolai" + }, + { + "id": "chirp-v3-5", + "name": "chirp-v3-5", + "ownedBy": "ocoolai" + }, + { + "id": "claude-3-5-haiku-20241022-cursor", + "name": "claude-3-5-haiku-20241022-cursor", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "anthropic" + }, + { + "id": "claude-3-5-sonnet-all", + "name": "claude-3-5-sonnet-all", + "capabilities": ["function-call", "image-recognition", "web-search", "computer-use"], + "ownedBy": "anthropic" + }, + { + "id": "claude-3-7-sonnet-20250219-all", + "name": "claude-3-7-sonnet-20250219-all", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search", "computer-use"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "ownedBy": "anthropic" + }, + { + "id": "claude-3-7-sonnet-thinking-all", + "name": "claude-3-7-sonnet-thinking-all", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search", "computer-use"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "ownedBy": "anthropic" + }, + { + "id": "claude-3-haiku-20240307-cursor", + "name": "claude-3-haiku-20240307-cursor", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "anthropic" + }, + { + "id": "claude-opus-4-6-20260205", + "name": "claude-opus-4-6-20260205", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search", "computer-use"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 32000 + } + }, + "ownedBy": "anthropic" + }, + { + "id": "codegeex-4", + "name": "codegeex-4", + "ownedBy": "ocoolai" + }, + { + "id": "concise", + "name": "concise", + "ownedBy": "ocoolai" + }, + { + "id": "concise-scholar", + "name": "concise-scholar", + "ownedBy": "ocoolai" + }, + { + "id": "deepseek-chat-0324", + "name": "deepseek-chat-0324", + "capabilities": ["reasoning", "function-call"], + "reasoning": { + "supportedEfforts": ["none"] + }, + "ownedBy": "deepseek" + }, + { + "id": "deepseek-r1-250528", + "name": "deepseek-r1-250528", + "capabilities": ["reasoning", "function-call"], + "ownedBy": "deepseek" + }, + { + "id": "deepseek-reasoner-0528", + "name": "deepseek-reasoner-0528", + "capabilities": ["reasoning", "function-call"], + "ownedBy": "deepseek" + }, + { + "id": "deepseek-reasoner-v3-2", + "name": "deepseek-reasoner-v3-2", + "capabilities": ["reasoning", "function-call"], + "ownedBy": "deepseek" + }, + { + "id": "deepseek", + "name": "deepseek", + "capabilities": ["function-call"], + "ownedBy": "deepseek" + }, + { + "id": "deepseek-v3-1-250821", + "name": "deepseek-v3-1-250821", + "capabilities": ["reasoning", "function-call"], + "reasoning": { + "supportedEfforts": ["none"] + }, + "ownedBy": "deepseek" + }, + { + "id": "deepseek-v3-250324", + "name": "deepseek-v3-250324", + "capabilities": ["reasoning", "function-call"], + "reasoning": { + "supportedEfforts": ["none"] + }, + "ownedBy": "deepseek" + }, + { + "id": "detail", + "name": "detail", + "ownedBy": "ocoolai" + }, + { + "id": "detail-scholar", + "name": "detail-scholar", + "ownedBy": "ocoolai" + }, + { + "id": "doubao-1-5-lite-32k-250115", + "name": "doubao-1-5-lite-32k-250115", + "ownedBy": "bytedance" + }, + { + "id": "doubao-1-5-pro-32k-character-250715", + "name": "doubao-1-5-pro-32k-character-250715", + "ownedBy": "bytedance" + }, + { + "id": "doubao-1-5-thinking-pro-250415", + "name": "doubao-1-5-thinking-pro-250415", + "capabilities": ["reasoning"], + "reasoning": { + "supportedEfforts": ["none", "high"] + }, + "ownedBy": "bytedance" + }, + { + "id": "doubao-1-5-vision-pro-32k-250115", + "name": "doubao-1-5-vision-pro-32k-250115", + "capabilities": ["image-recognition"], + "ownedBy": "bytedance" + }, + { + "id": "doubao-1-5-vision-lite-250315", + "name": "doubao-1-5-vision-lite-250315", + "capabilities": ["image-recognition"], + "ownedBy": "bytedance" + }, + { + "id": "doubao-1-5-vision-pro-250328", + "name": "doubao-1-5-vision-pro-250328", + "capabilities": ["image-recognition"], + "ownedBy": "bytedance" + }, + { + "id": "doubao-embedding", + "name": "doubao-embedding", + "capabilities": ["embedding"], + "ownedBy": "bytedance" + }, + { + "id": "doubao-embedding-large-text-250515", + "name": "doubao-embedding-large-text-250515", + "capabilities": ["embedding"], + "ownedBy": "bytedance" + }, + { + "id": "doubao-embedding-vision-241215", + "name": "doubao-embedding-vision-241215", + "capabilities": ["embedding", "image-recognition"], + "ownedBy": "bytedance" + }, + { + "id": "doubao-embedding-vision-250328", + "name": "doubao-embedding-vision-250328", + "capabilities": ["embedding", "image-recognition"], + "ownedBy": "bytedance" + }, + { + "id": "doubao-embedding-vision-250615", + "name": "doubao-embedding-vision-250615", + "capabilities": ["embedding", "image-recognition"], + "ownedBy": "bytedance" + }, + { + "id": "doubao-seed-1-6-251015", + "name": "doubao-seed-1-6-251015", + "capabilities": ["reasoning", "function-call", "image-recognition"], + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "bytedance" + }, + { + "id": "doubao-seed-1-6-flash-250828", + "name": "doubao-seed-1-6-flash-250828", + "capabilities": ["reasoning", "function-call", "image-recognition"], + "ownedBy": "bytedance" + }, + { + "id": "doubao-seed-1-6-lite-251015", + "name": "doubao-seed-1-6-lite-251015", + "capabilities": ["reasoning", "function-call", "image-recognition"], + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "bytedance" + }, + { + "id": "doubao-seed-1-8-251228", + "name": "doubao-seed-1-8-251228", + "capabilities": ["reasoning", "function-call", "image-recognition"], + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "bytedance" + }, + { + "id": "doubao-seedream-4-0-250828", + "name": "doubao-seedream-4-0-250828", + "capabilities": ["function-call"], + "ownedBy": "bytedance" + }, + { + "id": "doubao-seedream-4-5-251128", + "name": "doubao-seedream-4-5-251128", + "capabilities": ["function-call"], + "ownedBy": "bytedance" + }, + { + "id": "doubao-tts", + "name": "doubao-tts", + "ownedBy": "bytedance" + }, + { + "id": "doubao-vision-pro-32k", + "name": "doubao-vision-pro-32k", + "capabilities": ["image-recognition"], + "ownedBy": "bytedance" + }, + { + "id": "embedding-001", + "name": "embedding-001", + "capabilities": ["embedding"], + "ownedBy": "ocoolai" + }, + { + "id": "embedding-gecko-001", + "name": "embedding-gecko-001", + "capabilities": ["embedding"], + "ownedBy": "ocoolai" + }, + { + "id": "emohaa", + "name": "emohaa", + "ownedBy": "ocoolai" + }, + { + "id": "ernie-3-5-128k", + "name": "ernie-3-5-128k", + "ownedBy": "baidu" + }, + { + "id": "ernie-3-5-8k", + "name": "ernie-3-5-8k", + "ownedBy": "baidu" + }, + { + "id": "ernie-3-5-8k-preview", + "name": "ernie-3-5-8k-preview", + "ownedBy": "baidu" + }, + { + "id": "ernie-4-0-8k", + "name": "ernie-4-0-8k", + "ownedBy": "baidu" + }, + { + "id": "ernie-4-0-8k-latest", + "name": "ernie-4-0-8k-latest", + "ownedBy": "baidu" + }, + { + "id": "ernie-4-0-8k-preview", + "name": "ernie-4-0-8k-preview", + "ownedBy": "baidu" + }, + { + "id": "ernie-4-0-turbo-128k", + "name": "ernie-4-0-turbo-128k", + "ownedBy": "baidu" + }, + { + "id": "ernie-4-0-turbo-8k", + "name": "ernie-4-0-turbo-8k", + "ownedBy": "baidu" + }, + { + "id": "ernie-4-0-turbo-8k-latest", + "name": "ernie-4-0-turbo-8k-latest", + "ownedBy": "baidu" + }, + { + "id": "ernie-4-0-turbo-8k-preview", + "name": "ernie-4-0-turbo-8k-preview", + "ownedBy": "baidu" + }, + { + "id": "ernie-4-5-turbo-128k", + "name": "ernie-4-5-turbo-128k", + "ownedBy": "baidu" + }, + { + "id": "ernie-4-5-turbo-32k", + "name": "ernie-4-5-turbo-32k", + "ownedBy": "baidu" + }, + { + "id": "ernie-character-8k", + "name": "ernie-character-8k", + "ownedBy": "baidu" + }, + { + "id": "ernie-character-fiction-8k", + "name": "ernie-character-fiction-8k", + "ownedBy": "baidu" + }, + { + "id": "ernie-lite-8k", + "name": "ernie-lite-8k", + "ownedBy": "baidu" + }, + { + "id": "ernie-lite-pro-128k", + "name": "ernie-lite-pro-128k", + "ownedBy": "baidu" + }, + { + "id": "ernie-novel-8k", + "name": "ernie-novel-8k", + "ownedBy": "baidu" + }, + { + "id": "ernie-speed-128k", + "name": "ernie-speed-128k", + "ownedBy": "baidu" + }, + { + "id": "ernie-speed-8k", + "name": "ernie-speed-8k", + "ownedBy": "baidu" + }, + { + "id": "ernie-speed-pro-128k", + "name": "ernie-speed-pro-128k", + "ownedBy": "baidu" + }, + { + "id": "ernie-tiny-8k", + "name": "ernie-tiny-8k", + "ownedBy": "baidu" + }, + { + "id": "flux", + "name": "flux", + "capabilities": ["image-generation"], + "ownedBy": "bfl" + }, + { + "id": "gemini-1-5-flash-exp-0827", + "name": "gemini-1-5-flash-exp-0827", + "capabilities": ["image-recognition"], + "ownedBy": "google" + }, + { + "id": "gemini-1-5-flash-latest", + "name": "gemini-1-5-flash-latest", + "capabilities": ["image-recognition"], + "ownedBy": "google" + }, + { + "id": "gemini-1-5-pro-001", + "name": "gemini-1-5-pro-001", + "capabilities": ["image-recognition"], + "ownedBy": "google" + }, + { + "id": "gemini-1-5-pro-002", + "name": "gemini-1-5-pro-002", + "capabilities": ["image-recognition"], + "ownedBy": "google" + }, + { + "id": "gemini-1-5-pro-exp-0827", + "name": "gemini-1-5-pro-exp-0827", + "capabilities": ["image-recognition"], + "ownedBy": "google" + }, + { + "id": "gemini-1-5-pro-latest", + "name": "gemini-1-5-pro-latest", + "capabilities": ["image-recognition"], + "ownedBy": "google" + }, + { + "id": "gemini-2-0-flash-lite-preview", + "name": "gemini-2-0-flash-lite-preview", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "google" + }, + { + "id": "gemini-2-0-pro-exp", + "name": "gemini-2-0-pro-exp", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "google" + }, + { + "id": "gemini-2-5-computer-use-preview-10-2025", + "name": "gemini-2-5-computer-use-preview-10-2025", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search", "computer-use"], + "ownedBy": "google" + }, + { + "id": "gemini-2-5-flash-ci", + "name": "gemini-2-5-flash-ci", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 24576 + } + }, + "ownedBy": "google" + }, + { + "id": "gemini-2-5-flash-preview-native-audio-dialog", + "name": "gemini-2-5-flash-preview-native-audio-dialog", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 24576 + } + }, + "ownedBy": "google" + }, + { + "id": "gemini-2-5-pro-ci", + "name": "gemini-2-5-pro-ci", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "reasoning": { + "supportedEfforts": ["low", "medium", "high"], + "thinkingTokenLimits": { + "min": 128, + "max": 32768 + } + }, + "ownedBy": "google" + }, + { + "id": "gemini-2-5-pro-preview-06-05-thinking-512", + "name": "gemini-2-5-pro-preview-06-05-thinking-512", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "reasoning": { + "supportedEfforts": ["low", "medium", "high"], + "thinkingTokenLimits": { + "min": 128, + "max": 32768 + } + }, + "ownedBy": "google" + }, + { + "id": "gemini-2-5-pro-thinking-128", + "name": "gemini-2-5-pro-thinking-128", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "reasoning": { + "supportedEfforts": ["low", "medium", "high"], + "thinkingTokenLimits": { + "min": 128, + "max": 32768 + } + }, + "ownedBy": "google" + }, + { + "id": "gemini-2-5-pro-thinking-512", + "name": "gemini-2-5-pro-thinking-512", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "reasoning": { + "supportedEfforts": ["low", "medium", "high"], + "thinkingTokenLimits": { + "min": 128, + "max": 32768 + } + }, + "ownedBy": "google" + }, + { + "id": "gemini-3-pro-image-preview-2k", + "name": "gemini-3-pro-image-preview-2k", + "capabilities": ["function-call", "image-recognition", "image-generation", "web-search"], + "ownedBy": "google" + }, + { + "id": "gemini-3-pro-image-preview-4k", + "name": "gemini-3-pro-image-preview-4k", + "capabilities": ["function-call", "image-recognition", "image-generation", "web-search"], + "ownedBy": "google" + }, + { + "id": "gemini-3-1-pro-preview-all", + "name": "gemini-3-1-pro-preview-all", + "capabilities": ["reasoning", "function-call"], + "reasoning": { + "supportedEfforts": ["low", "medium", "high"], + "thinkingTokenLimits": { + "min": 128, + "max": 32768 + } + }, + "ownedBy": "google" + }, + { + "id": "gemini-embedding-exp", + "name": "gemini-embedding-exp", + "capabilities": ["embedding"], + "ownedBy": "google" + }, + { + "id": "glm-4-air", + "name": "glm-4-air", + "capabilities": ["function-call"], + "ownedBy": "zhipu" + }, + { + "id": "glm-4-airx", + "name": "glm-4-airx", + "capabilities": ["function-call"], + "ownedBy": "zhipu" + }, + { + "id": "glm-4-flashx", + "name": "glm-4-flashx", + "capabilities": ["function-call"], + "ownedBy": "zhipu" + }, + { + "id": "glm-4-long", + "name": "glm-4-long", + "capabilities": ["function-call"], + "ownedBy": "zhipu" + }, + { + "id": "glm-4v-flash", + "name": "glm-4v-flash", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "zhipu" + }, + { + "id": "glm-z1-air", + "name": "glm-z1-air", + "ownedBy": "zhipu" + }, + { + "id": "glm-z1-airx", + "name": "glm-z1-airx", + "ownedBy": "zhipu" + }, + { + "id": "glm-z1-flash", + "name": "glm-z1-flash", + "ownedBy": "zhipu" + }, + { + "id": "gpt-3-5o", + "name": "gpt-3-5o", + "ownedBy": "openai" + }, + { + "id": "gpt-4-all", + "name": "gpt-4-all", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "openai" + }, + { + "id": "gpt-4-claude3-haiku-20240307", + "name": "gpt-4-claude3-haiku-20240307", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "openai" + }, + { + "id": "gpt-4-claude3-opus-20240229", + "name": "gpt-4-claude3-opus-20240229", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "openai" + }, + { + "id": "gpt-4-claude3-sonnet-20240229", + "name": "gpt-4-claude3-sonnet-20240229", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "openai" + }, + { + "id": "gpt-4-claude3-5-haiku-20241022", + "name": "gpt-4-claude3-5-haiku-20241022", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "openai" + }, + { + "id": "gpt-4-claude3-5-sonnet-20240620", + "name": "gpt-4-claude3-5-sonnet-20240620", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "openai" + }, + { + "id": "gpt-4-claude3-5-sonnet-20241022", + "name": "gpt-4-claude3-5-sonnet-20241022", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "openai" + }, + { + "id": "gpt-4-claude3-5-sonnet-all", + "name": "gpt-4-claude3-5-sonnet-all", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "openai" + }, + { + "id": "gpt-4-claude3-7-sonnet-20250219", + "name": "gpt-4-claude3-7-sonnet-20250219", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "openai" + }, + { + "id": "gpt-4-gizmo-*", + "name": "gpt-4-gizmo-*", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "openai" + }, + { + "id": "gpt-4-grok-3-all", + "name": "gpt-4-grok-3-all", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "openai" + }, + { + "id": "gpt-4-1-2025-04-14", + "name": "gpt-4-1-2025-04-14", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "openai" + }, + { + "id": "gpt-4-1-nano-2025-04-14", + "name": "gpt-4-1-nano-2025-04-14", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "openai" + }, + { + "id": "gpt-4-5-preview", + "name": "gpt-4-5-preview", + "capabilities": ["function-call", "web-search"], + "ownedBy": "openai" + }, + { + "id": "gpt-4-5-preview-2025-02-27", + "name": "gpt-4-5-preview-2025-02-27", + "capabilities": ["function-call", "web-search"], + "ownedBy": "openai" + }, + { + "id": "gpt-4o-all", + "name": "gpt-4o-all", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "openai" + }, + { + "id": "gpt-4o-audio-preview-2024-10-01", + "name": "gpt-4o-audio-preview-2024-10-01", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "openai" + }, + { + "id": "gpt-4o-audio-preview-2024-12-17", + "name": "gpt-4o-audio-preview-2024-12-17", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "openai" + }, + { + "id": "gpt-4o-lite", + "name": "gpt-4o-lite", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "openai" + }, + { + "id": "gpt-4o-mini-audio-preview-2024-12-17", + "name": "gpt-4o-mini-audio-preview-2024-12-17", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "openai" + }, + { + "id": "gpt-4o-mini-realtime-preview", + "name": "gpt-4o-mini-realtime-preview", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "openai" + }, + { + "id": "gpt-4o-mini-realtime-preview-2024-12-17", + "name": "gpt-4o-mini-realtime-preview-2024-12-17", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "openai" + }, + { + "id": "gpt-4o-mini-transcribe", + "name": "gpt-4o-mini-transcribe", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "openai" + }, + { + "id": "gpt-4o-mini-transcribe-2025-03-20", + "name": "gpt-4o-mini-transcribe-2025-03-20", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "openai" + }, + { + "id": "gpt-4o-mini-tts-1", + "name": "gpt-4o-mini-tts-1", + "capabilities": ["audio-generation"], + "ownedBy": "openai" + }, + { + "id": "gpt-4o-mini-tts-2025-03-20", + "name": "gpt-4o-mini-tts-2025-03-20", + "capabilities": ["audio-generation"], + "ownedBy": "openai" + }, + { + "id": "gpt-4o-realtime-preview", + "name": "gpt-4o-realtime-preview", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "openai" + }, + { + "id": "gpt-4o-realtime-preview-2024-10-01", + "name": "gpt-4o-realtime-preview-2024-10-01", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "openai" + }, + { + "id": "gpt-4o-realtime-preview-2024-12-17", + "name": "gpt-4o-realtime-preview-2024-12-17", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "openai" + }, + { + "id": "gpt-4o-realtime-preview-2025-06-03", + "name": "gpt-4o-realtime-preview-2025-06-03", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "openai" + }, + { + "id": "gpt-4o-transcribe", + "name": "gpt-4o-transcribe", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "openai" + }, + { + "id": "gpt-4o-transcribe-2025-03-20", + "name": "gpt-4o-transcribe-2025-03-20", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "openai" + }, + { + "id": "gpt-5-2025-08-07", + "name": "gpt-5-2025-08-07", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high", "max"] + }, + "ownedBy": "openai" + }, + { + "id": "gpt-5-all", + "name": "gpt-5-all", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "gpt-5-chat-2025-08-07", + "name": "gpt-5-chat-2025-08-07", + "capabilities": ["image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "gpt-5-mini-2025-08-07", + "name": "gpt-5-mini-2025-08-07", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "gpt-5-nano-2025-08-07", + "name": "gpt-5-nano-2025-08-07", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "gpt-5-pro-2025-10-06", + "name": "gpt-5-pro-2025-10-06", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["high"] + }, + "ownedBy": "openai" + }, + { + "id": "gpt-5-thinking-all", + "name": "gpt-5-thinking-all", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "gpt-5-1-2025-11-13", + "name": "gpt-5-1-2025-11-13", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "gpt-5-2-2025-12-11", + "name": "gpt-5-2-2025-12-11", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high", "max"] + }, + "ownedBy": "openai" + }, + { + "id": "gpt-oss-1", + "name": "gpt-oss-1", + "capabilities": ["reasoning"], + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "grok-3-all", + "name": "grok-3-all", + "capabilities": ["function-call", "web-search"], + "ownedBy": "xai" + }, + { + "id": "grok-3-deepersearch", + "name": "grok-3-deepersearch", + "capabilities": ["function-call", "web-search"], + "ownedBy": "xai" + }, + { + "id": "grok-3-deepsearch", + "name": "grok-3-deepsearch", + "capabilities": ["function-call", "web-search"], + "ownedBy": "xai" + }, + { + "id": "grok-3-deepsearch-all", + "name": "grok-3-deepsearch-all", + "capabilities": ["function-call", "web-search"], + "ownedBy": "xai" + }, + { + "id": "grok-3-image", + "name": "grok-3-image", + "capabilities": ["function-call", "image-generation", "web-search"], + "ownedBy": "xai" + }, + { + "id": "grok-3-reasoner", + "name": "grok-3-reasoner", + "capabilities": ["reasoning", "function-call", "web-search"], + "ownedBy": "xai" + }, + { + "id": "grok-3-reasoner-all", + "name": "grok-3-reasoner-all", + "capabilities": ["reasoning", "function-call", "web-search"], + "ownedBy": "xai" + }, + { + "id": "gte-rerank", + "name": "gte-rerank", + "capabilities": ["embedding", "rerank"], + "ownedBy": "alibaba" + }, + { + "id": "hailuo", + "name": "hailuo", + "ownedBy": "ocoolai" + }, + { + "id": "hunyuan-code", + "name": "hunyuan-code", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-embedding", + "name": "hunyuan-embedding", + "capabilities": ["function-call", "embedding"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-functioncall", + "name": "hunyuan-functioncall", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-large", + "name": "hunyuan-large", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-large-longcontext", + "name": "hunyuan-large-longcontext", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-lite", + "name": "hunyuan-lite", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-pro", + "name": "hunyuan-pro", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-role", + "name": "hunyuan-role", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-standard", + "name": "hunyuan-standard", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-t1-20250321", + "name": "hunyuan-t1-20250321", + "capabilities": ["reasoning", "function-call"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-t1-latest", + "name": "hunyuan-t1-latest", + "capabilities": ["reasoning", "function-call"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-turbo", + "name": "hunyuan-turbo", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-turbo-vision", + "name": "hunyuan-turbo-vision", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-turbos-20250226", + "name": "hunyuan-turbos-20250226", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-turbos-20250313", + "name": "hunyuan-turbos-20250313", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-turbos-latest", + "name": "hunyuan-turbos-latest", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-vision", + "name": "hunyuan-vision", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "tencent" + }, + { + "id": "jimeng-3-0", + "name": "jimeng-3-0", + "ownedBy": "ocoolai" + }, + { + "id": "jina-clip-v1", + "name": "jina-clip-v1", + "ownedBy": "jina" + }, + { + "id": "jina-colbert-v1-en", + "name": "jina-colbert-v1-en", + "ownedBy": "jina" + }, + { + "id": "jina-embeddings-v2-base-de", + "name": "jina-embeddings-v2-base-de", + "capabilities": ["embedding"], + "ownedBy": "jina" + }, + { + "id": "jina-embeddings-v2-base-en", + "name": "jina-embeddings-v2-base-en", + "capabilities": ["embedding"], + "ownedBy": "jina" + }, + { + "id": "jina-embeddings-v2-base-es", + "name": "jina-embeddings-v2-base-es", + "capabilities": ["embedding"], + "ownedBy": "jina" + }, + { + "id": "jina-embeddings-v2-base-zh", + "name": "jina-embeddings-v2-base-zh", + "capabilities": ["embedding"], + "ownedBy": "jina" + }, + { + "id": "jina-reranker-v1-base-en", + "name": "jina-reranker-v1-base-en", + "capabilities": ["rerank"], + "ownedBy": "jina" + }, + { + "id": "jina-reranker-v1-tiny-en", + "name": "jina-reranker-v1-tiny-en", + "capabilities": ["rerank"], + "ownedBy": "jina" + }, + { + "id": "jina-reranker-v1-turbo-en", + "name": "jina-reranker-v1-turbo-en", + "capabilities": ["rerank"], + "ownedBy": "jina" + }, + { + "id": "jina-reranker-v2-base-multilingual", + "name": "jina-reranker-v2-base-multilingual", + "capabilities": ["rerank"], + "ownedBy": "jina" + }, + { + "id": "kat-dev-exp", + "name": "kat-dev-exp", + "ownedBy": "streamlake" + }, + { + "id": "kimi-k2-250905", + "name": "kimi-k2-250905", + "capabilities": ["function-call"], + "ownedBy": "moonshot" + }, + { + "id": "kimi-latest", + "name": "kimi-latest", + "capabilities": ["image-recognition"], + "ownedBy": "moonshot" + }, + { + "id": "luma-video", + "name": "luma-video", + "capabilities": ["video-generation"], + "ownedBy": "ocoolai" + }, + { + "id": "llama-4", + "name": "llama-4", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "meta" + }, + { + "id": "midjourney", + "name": "midjourney", + "capabilities": ["image-generation"], + "ownedBy": "ocoolai" + }, + { + "id": "mj-chat", + "name": "mj-chat", + "ownedBy": "ocoolai" + }, + { + "id": "nano-banana-2", + "name": "nano-banana-2", + "ownedBy": "google" + }, + { + "id": "net-glm-3-turbo", + "name": "net-glm-3-turbo", + "ownedBy": "ocoolai" + }, + { + "id": "net-gpt-3-5-turbo", + "name": "net-gpt-3-5-turbo", + "ownedBy": "ocoolai" + }, + { + "id": "net-gpt-3-5-turbo-16k", + "name": "net-gpt-3-5-turbo-16k", + "ownedBy": "ocoolai" + }, + { + "id": "net-gpt-4", + "name": "net-gpt-4", + "ownedBy": "ocoolai" + }, + { + "id": "net-gpt-4-0125-preview", + "name": "net-gpt-4-0125-preview", + "capabilities": ["function-call"], + "ownedBy": "ocoolai" + }, + { + "id": "net-gpt-4-1106-preview", + "name": "net-gpt-4-1106-preview", + "capabilities": ["function-call"], + "ownedBy": "ocoolai" + }, + { + "id": "net-gpt-4-turbo-preview", + "name": "net-gpt-4-turbo-preview", + "capabilities": ["function-call"], + "ownedBy": "ocoolai" + }, + { + "id": "o1-all", + "name": "o1-all", + "capabilities": ["reasoning", "function-call", "image-recognition"], + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "o1-pro-all", + "name": "o1-pro-all", + "capabilities": ["reasoning", "function-call", "image-recognition"], + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "o3-2025-04-16", + "name": "o3-2025-04-16", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "ownedBy": "openai" + }, + { + "id": "o3-mini-2025-01-31", + "name": "o3-mini-2025-01-31", + "capabilities": ["reasoning", "function-call", "web-search"], + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "o3-mini-all", + "name": "o3-mini-all", + "capabilities": ["reasoning", "function-call", "web-search"], + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "o3-mini-high-all", + "name": "o3-mini-high-all", + "capabilities": ["reasoning", "function-call", "web-search"], + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "o3-pro-2025-06-10", + "name": "o3-pro-2025-06-10", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "o4-mini-2025-04-16", + "name": "o4-mini-2025-04-16", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "ocoolai-helper", + "name": "ocoolai-helper", + "ownedBy": "ocoolai" + }, + { + "id": "omni-moderation-2024-09-26", + "name": "omni-moderation-2024-09-26", + "ownedBy": "ocoolai" + }, + { + "id": "parse-pdf", + "name": "parse-pdf", + "ownedBy": "ocoolai" + }, + { + "id": "pika-text-to-video", + "name": "pika-text-to-video", + "capabilities": ["video-generation"], + "ownedBy": "ocoolai" + }, + { + "id": "qwen-chat", + "name": "qwen-chat", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-coder-plus", + "name": "qwen-coder-plus", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-coder-plus-latest", + "name": "qwen-coder-plus-latest", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-coder-turbo", + "name": "qwen-coder-turbo", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-coder-turbo-latest", + "name": "qwen-coder-turbo-latest", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-math-plus-latest", + "name": "qwen-math-plus-latest", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-math-turbo-latest", + "name": "qwen-math-turbo-latest", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-max-fast", + "name": "qwen-max-fast", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-vl-max-latest", + "name": "qwen-vl-max-latest", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-vl-ocr-latest", + "name": "qwen-vl-ocr-latest", + "capabilities": ["function-call", "image-recognition", "file-input"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-vl-plus-latest", + "name": "qwen-vl-plus-latest", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "alibaba" + }, + { + "id": "qwen1-5-chat", + "name": "qwen1-5-chat", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "qwen2-math-instruct", + "name": "qwen2-math-instruct", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "qwen3-coder-flash-2025-07-28", + "name": "qwen3-coder-flash-2025-07-28", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "research", + "name": "research", + "ownedBy": "ocoolai" + }, + { + "id": "research-scholar", + "name": "research-scholar", + "ownedBy": "ocoolai" + }, + { + "id": "sambert-v1", + "name": "sambert-v1", + "ownedBy": "ocoolai" + }, + { + "id": "search-gpts-chat", + "name": "search-gpts-chat", + "ownedBy": "ocoolai" + }, + { + "id": "seedream-4-0-250828", + "name": "seedream-4-0-250828", + "ownedBy": "bytedance" + }, + { + "id": "sora", + "name": "sora", + "capabilities": ["video-generation"], + "ownedBy": "openai" + }, + { + "id": "sparkdesk-4-0-ultra", + "name": "sparkdesk-4-0-ultra", + "ownedBy": "ocoolai" + }, + { + "id": "sparkdesk-lite", + "name": "sparkdesk-lite", + "ownedBy": "ocoolai" + }, + { + "id": "sparkdesk-max", + "name": "sparkdesk-max", + "ownedBy": "ocoolai" + }, + { + "id": "sparkdesk-max-32k", + "name": "sparkdesk-max-32k", + "ownedBy": "ocoolai" + }, + { + "id": "sparkdesk-pro", + "name": "sparkdesk-pro", + "ownedBy": "ocoolai" + }, + { + "id": "sparkdesk-pro-128k", + "name": "sparkdesk-pro-128k", + "ownedBy": "ocoolai" + }, + { + "id": "speech-01-hd", + "name": "speech-01-hd", + "ownedBy": "ocoolai" + }, + { + "id": "speech-01-turbo", + "name": "speech-01-turbo", + "ownedBy": "ocoolai" + }, + { + "id": "speech-02-hd", + "name": "speech-02-hd", + "ownedBy": "ocoolai" + }, + { + "id": "speech-02-turbo", + "name": "speech-02-turbo", + "ownedBy": "ocoolai" + }, + { + "id": "stable-diffusion", + "name": "stable-diffusion", + "capabilities": ["image-generation"], + "ownedBy": "stability" + }, + { + "id": "step-1-128k", + "name": "step-1-128k", + "ownedBy": "stepfun" + }, + { + "id": "step-1-256k", + "name": "step-1-256k", + "ownedBy": "stepfun" + }, + { + "id": "step-1-8k", + "name": "step-1-8k", + "ownedBy": "stepfun" + }, + { + "id": "step-1-flash", + "name": "step-1-flash", + "ownedBy": "stepfun" + }, + { + "id": "step-1-5v-mini", + "name": "step-1-5v-mini", + "ownedBy": "stepfun" + }, + { + "id": "step-1o-turbo-vision", + "name": "step-1o-turbo-vision", + "capabilities": ["image-recognition"], + "ownedBy": "stepfun" + }, + { + "id": "step-1o-vision-32k", + "name": "step-1o-vision-32k", + "capabilities": ["image-recognition"], + "ownedBy": "stepfun" + }, + { + "id": "step-1v-32k", + "name": "step-1v-32k", + "ownedBy": "stepfun" + }, + { + "id": "step-1v-8k", + "name": "step-1v-8k", + "ownedBy": "stepfun" + }, + { + "id": "step-2-16k-exp", + "name": "step-2-16k-exp", + "ownedBy": "stepfun" + }, + { + "id": "step-2-mini", + "name": "step-2-mini", + "ownedBy": "stepfun" + }, + { + "id": "step-r1-v-mini", + "name": "step-r1-v-mini", + "capabilities": ["reasoning"], + "ownedBy": "stepfun" + }, + { + "id": "step-tts-mini", + "name": "step-tts-mini", + "capabilities": ["audio-generation"], + "ownedBy": "stepfun" + }, + { + "id": "suno", + "name": "suno", + "ownedBy": "suno" + }, + { + "id": "suno-v3", + "name": "suno-v3", + "ownedBy": "suno" + }, + { + "id": "text-embedding-v2", + "name": "text-embedding-v2", + "capabilities": ["embedding"], + "ownedBy": "alibaba" + }, + { + "id": "text-embedding-v3", + "name": "text-embedding-v3", + "capabilities": ["embedding"], + "ownedBy": "alibaba" + }, + { + "id": "tts-hd-1", + "name": "tts-hd-1", + "capabilities": ["audio-generation"], + "ownedBy": "openai" + }, + { + "id": "url-analysis", + "name": "url-analysis", + "ownedBy": "ocoolai" + }, + { + "id": "yi-vision-v2", + "name": "yi-vision-v2", + "capabilities": ["image-recognition"], + "ownedBy": "01ai" + }, + { + "id": "seedance-get", + "name": "seedance-get", + "ownedBy": "bytedance" + }, + { + "id": "qwen-turbo-2024-02-06", + "name": "qwen-turbo-2024-02-06", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "deepseek-v3-1-sn", + "name": "deepseek-v3-1-sn", + "capabilities": ["reasoning", "function-call"], + "reasoning": { + "supportedEfforts": ["none"] + }, + "ownedBy": "deepseek" + }, + { + "id": "mj_fast_blend", + "name": "mj_fast_blend", + "ownedBy": "dmxapi" + }, + { + "id": "mj_fast_upscale", + "name": "mj_fast_upscale", + "ownedBy": "dmxapi" + }, + { + "id": "qwen-mt-flash", + "name": "qwen-mt-flash", + "ownedBy": "alibaba" + }, + { + "id": "speech-2-5-turbo-preview", + "name": "speech-2-5-turbo-preview", + "ownedBy": "dmxapi" + }, + { + "id": "wan2-6-image", + "name": "wan2-6-image", + "capabilities": ["image-generation", "video-generation"], + "ownedBy": "alibaba" + }, + { + "id": "clh45think-20251001", + "name": "clh45think-20251001", + "ownedBy": "dmxapi" + }, + { + "id": "mj_turbo_upscale_creative", + "name": "mj_turbo_upscale_creative", + "ownedBy": "dmxapi" + }, + { + "id": "qwen-plus-2024-11-25", + "name": "qwen-plus-2024-11-25", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-vl-plus-2024-08-09", + "name": "qwen-vl-plus-2024-08-09", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "alibaba" + }, + { + "id": "yi-spark", + "name": "yi-spark", + "ownedBy": "01ai" + }, + { + "id": "kling_multi_elements_add", + "name": "kling_multi_elements_add", + "capabilities": ["video-generation"], + "ownedBy": "kling" + }, + { + "id": "mj_fast_modal", + "name": "mj_fast_modal", + "ownedBy": "dmxapi" + }, + { + "id": "deepgeminiflash-liu", + "name": "deepgeminiflash-liu", + "ownedBy": "dmxapi" + }, + { + "id": "mj_relax_pic_reader", + "name": "mj_relax_pic_reader", + "ownedBy": "dmxapi" + }, + { + "id": "qwen3-5-plus-2026-02-15", + "name": "qwen3-5-plus-2026-02-15", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "mj_fast_upscale_2x", + "name": "mj_fast_upscale_2x", + "ownedBy": "dmxapi" + }, + { + "id": "mj_relax_zoom", + "name": "mj_relax_zoom", + "ownedBy": "dmxapi" + }, + { + "id": "mj_fast_inpaint", + "name": "mj_fast_inpaint", + "ownedBy": "dmxapi" + }, + { + "id": "vidu-get", + "name": "vidu-get", + "capabilities": ["video-generation"], + "ownedBy": "vidu" + }, + { + "id": "wan2-6-get", + "name": "wan2-6-get", + "capabilities": ["video-generation"], + "ownedBy": "alibaba" + }, + { + "id": "g-p-t-5-2", + "name": "g-p-t-5-2", + "ownedBy": "dmxapi" + }, + { + "id": "qwen-turbo-2025-02-11", + "name": "qwen-turbo-2025-02-11", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "gpt-5-web", + "name": "gpt-5-web", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "minimax-hailuo-02", + "name": "minimax-hailuo-02", + "ownedBy": "minimax" + }, + { + "id": "yaya-duck-cute-180", + "name": "yaya-duck-cute-180", + "ownedBy": "dmxapi" + }, + { + "id": "mj_relax_pan", + "name": "mj_relax_pan", + "ownedBy": "dmxapi" + }, + { + "id": "clo20251101", + "name": "clo20251101", + "ownedBy": "dmxapi" + }, + { + "id": "gpt-5-2-pro-responses", + "name": "gpt-5-2-pro-responses", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["medium", "high", "max"] + }, + "ownedBy": "openai" + }, + { + "id": "minimax-hailuo-2-3-fast", + "name": "minimax-hailuo-2-3-fast", + "ownedBy": "minimax" + }, + { + "id": "mj_relax_modal", + "name": "mj_relax_modal", + "ownedBy": "dmxapi" + }, + { + "id": "mj_turbo_shorten", + "name": "mj_turbo_shorten", + "ownedBy": "dmxapi" + }, + { + "id": "somark", + "name": "somark", + "ownedBy": "dmxapi" + }, + { + "id": "mj_fast_high_variation", + "name": "mj_fast_high_variation", + "ownedBy": "dmxapi" + }, + { + "id": "deepgeminipro-liu", + "name": "deepgeminipro-liu", + "ownedBy": "dmxapi" + }, + { + "id": "mj_relax_edits", + "name": "mj_relax_edits", + "ownedBy": "dmxapi" + }, + { + "id": "qwen-vl-chat-v1", + "name": "qwen-vl-chat-v1", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "alibaba" + }, + { + "id": "abab7-chat-preview", + "name": "abab7-chat-preview", + "ownedBy": "minimax" + }, + { + "id": "kling_audio_video_to_audio", + "name": "kling_audio_video_to_audio", + "capabilities": ["video-generation"], + "ownedBy": "kling" + }, + { + "id": "g-p-t-5-1-codex", + "name": "g-p-t-5-1-codex", + "ownedBy": "dmxapi" + }, + { + "id": "kling_multi_elements_submit", + "name": "kling_multi_elements_submit", + "capabilities": ["video-generation"], + "ownedBy": "kling" + }, + { + "id": "m2-her", + "name": "m2-her", + "ownedBy": "dmxapi" + }, + { + "id": "deepseek-r1-plus", + "name": "deepseek-r1-plus", + "capabilities": ["reasoning", "function-call"], + "ownedBy": "deepseek" + }, + { + "id": "doubao-embedding-vision-251215", + "name": "doubao-embedding-vision-251215", + "capabilities": ["embedding", "image-recognition"], + "ownedBy": "bytedance" + }, + { + "id": "qwen-vl-max-2024-11-19", + "name": "qwen-vl-max-2024-11-19", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "alibaba" + }, + { + "id": "g3-flash-preview", + "name": "g3-flash-preview", + "ownedBy": "dmxapi" + }, + { + "id": "gui-plus", + "name": "gui-plus", + "ownedBy": "dmxapi" + }, + { + "id": "mj_fast_upscale_subtle", + "name": "mj_fast_upscale_subtle", + "ownedBy": "dmxapi" + }, + { + "id": "baichuan-m2-plus", + "name": "baichuan-m2-plus", + "capabilities": ["reasoning"], + "ownedBy": "baichuan" + }, + { + "id": "mj_turbo_prompt_analyzer", + "name": "mj_turbo_prompt_analyzer", + "ownedBy": "dmxapi" + }, + { + "id": "qwen-vl-max-2024-08-09", + "name": "qwen-vl-max-2024-08-09", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "alibaba" + }, + { + "id": "g-p-t-5-1", + "name": "g-p-t-5-1", + "ownedBy": "dmxapi" + }, + { + "id": "qwen-coder-plus-2024-11-06", + "name": "qwen-coder-plus-2024-11-06", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-math-plus-2024-09-19", + "name": "qwen-math-plus-2024-09-19", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "speech-2-6-hd", + "name": "speech-2-6-hd", + "ownedBy": "dmxapi" + }, + { + "id": "mj_turbo_upload", + "name": "mj_turbo_upload", + "ownedBy": "dmxapi" + }, + { + "id": "mj_turbo_video", + "name": "mj_turbo_video", + "capabilities": ["video-generation"], + "ownedBy": "dmxapi" + }, + { + "id": "deep-research", + "name": "deep-research", + "ownedBy": "dmxapi" + }, + { + "id": "ernie-lite-8k-0308", + "name": "ernie-lite-8k-0308", + "ownedBy": "baidu" + }, + { + "id": "qwen-audio-turbo-2024-12-04", + "name": "qwen-audio-turbo-2024-12-04", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "mj_turbo_custom_zoom", + "name": "mj_turbo_custom_zoom", + "ownedBy": "dmxapi" + }, + { + "id": "mj_turbo_edits", + "name": "mj_turbo_edits", + "ownedBy": "dmxapi" + }, + { + "id": "mj_turbo_prompt_analyzer_extended", + "name": "mj_turbo_prompt_analyzer_extended", + "ownedBy": "dmxapi" + }, + { + "id": "qwen-max-2024-09-19", + "name": "qwen-max-2024-09-19", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "doubao-seed-2-0-lite-260215", + "name": "doubao-seed-2-0-lite-260215", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "bytedance" + }, + { + "id": "kling_audio_text_to_audio", + "name": "kling_audio_text_to_audio", + "capabilities": ["video-generation"], + "ownedBy": "kling" + }, + { + "id": "mj_relax_upscale_subtle", + "name": "mj_relax_upscale_subtle", + "ownedBy": "dmxapi" + }, + { + "id": "hunyuan-a13b", + "name": "hunyuan-a13b", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none"] + }, + "ownedBy": "tencent" + }, + { + "id": "kling-text2video-get", + "name": "kling-text2video-get", + "capabilities": ["video-generation"], + "ownedBy": "kling" + }, + { + "id": "kling-v2-6-image2video", + "name": "kling-v2-6-image2video", + "capabilities": ["image-generation", "video-generation"], + "ownedBy": "kling" + }, + { + "id": "grok-4-1-non-thinking", + "name": "grok-4-1-non-thinking", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "ownedBy": "xai" + }, + { + "id": "mj_relax_imagine", + "name": "mj_relax_imagine", + "ownedBy": "dmxapi" + }, + { + "id": "mj_fast_variation", + "name": "mj_fast_variation", + "ownedBy": "dmxapi" + }, + { + "id": "mj_relax_prompt_analyzer", + "name": "mj_relax_prompt_analyzer", + "ownedBy": "dmxapi" + }, + { + "id": "mj_turbo_reroll", + "name": "mj_turbo_reroll", + "ownedBy": "dmxapi" + }, + { + "id": "kling_image_expand", + "name": "kling_image_expand", + "capabilities": ["image-generation", "video-generation"], + "ownedBy": "kling" + }, + { + "id": "qwen-audio-turbo-latest", + "name": "qwen-audio-turbo-latest", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "seedgeminipro-liu", + "name": "seedgeminipro-liu", + "ownedBy": "bytedance" + }, + { + "id": "hehe-tywd", + "name": "hehe-tywd", + "ownedBy": "dmxapi" + }, + { + "id": "multimodal-embedding-v1", + "name": "multimodal-embedding-v1", + "capabilities": ["embedding"], + "ownedBy": "dmxapi" + }, + { + "id": "minimax-clone-lastversion", + "name": "minimax-clone-lastversion", + "ownedBy": "minimax" + }, + { + "id": "qwen-turbo-2024-09-19", + "name": "qwen-turbo-2024-09-19", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-vl-max-2024-10-30", + "name": "qwen-vl-max-2024-10-30", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "alibaba" + }, + { + "id": "clo-4-6", + "name": "clo-4-6", + "ownedBy": "dmxapi" + }, + { + "id": "kling_lip_sync", + "name": "kling_lip_sync", + "capabilities": ["video-generation"], + "ownedBy": "kling" + }, + { + "id": "mj_relax_custom_zoom", + "name": "mj_relax_custom_zoom", + "ownedBy": "dmxapi" + }, + { + "id": "mj_fast_pan", + "name": "mj_fast_pan", + "ownedBy": "dmxapi" + }, + { + "id": "mj_relax_upscale", + "name": "mj_relax_upscale", + "ownedBy": "dmxapi" + }, + { + "id": "qwen-plus-2024-11-27", + "name": "qwen-plus-2024-11-27", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "gemini-3-pro-image-preview-dfsx-0-3", + "name": "gemini-3-pro-image-preview-dfsx-0-3", + "capabilities": ["function-call", "image-recognition", "image-generation", "web-search"], + "ownedBy": "google" + }, + { + "id": "gpt-image-1-dmx01", + "name": "gpt-image-1-dmx01", + "capabilities": ["image-generation"], + "ownedBy": "openai" + }, + { + "id": "mj_relax_video", + "name": "mj_relax_video", + "capabilities": ["video-generation"], + "ownedBy": "dmxapi" + }, + { + "id": "mj_turbo_variation", + "name": "mj_turbo_variation", + "ownedBy": "dmxapi" + }, + { + "id": "mj_fast_imagine", + "name": "mj_fast_imagine", + "ownedBy": "dmxapi" + }, + { + "id": "kling_extend", + "name": "kling_extend", + "capabilities": ["video-generation"], + "ownedBy": "kling" + }, + { + "id": "paiwo-picture", + "name": "paiwo-picture", + "ownedBy": "dmxapi" + }, + { + "id": "mj_relax_blend", + "name": "mj_relax_blend", + "ownedBy": "dmxapi" + }, + { + "id": "g-p-t-5-codex", + "name": "g-p-t-5-codex", + "ownedBy": "dmxapi" + }, + { + "id": "glm-4-0520", + "name": "glm-4-0520", + "capabilities": ["function-call"], + "ownedBy": "zhipu" + }, + { + "id": "clo-45", + "name": "clo-45", + "ownedBy": "dmxapi" + }, + { + "id": "cls45-0929", + "name": "cls45-0929", + "ownedBy": "dmxapi" + }, + { + "id": "mj_relax_background_eraser", + "name": "mj_relax_background_eraser", + "ownedBy": "dmxapi" + }, + { + "id": "mj_relax_high_variation", + "name": "mj_relax_high_variation", + "ownedBy": "dmxapi" + }, + { + "id": "doubao-seededit-3-0-i2i-250628", + "name": "doubao-seededit-3-0-i2i-250628", + "capabilities": ["function-call"], + "ownedBy": "bytedance" + }, + { + "id": "mj_fast_prompt_analyzer_extended", + "name": "mj_fast_prompt_analyzer_extended", + "ownedBy": "dmxapi" + }, + { + "id": "qwen-coder-turbo-2024-09-19", + "name": "qwen-coder-turbo-2024-09-19", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "viduq2-pro", + "name": "viduq2-pro", + "capabilities": ["video-generation"], + "ownedBy": "vidu" + }, + { + "id": "paiwo-v5-6-ttv", + "name": "paiwo-v5-6-ttv", + "ownedBy": "dmxapi" + }, + { + "id": "kling_multi_elements_init", + "name": "kling_multi_elements_init", + "capabilities": ["video-generation"], + "ownedBy": "kling" + }, + { + "id": "clh45-20251001", + "name": "clh45-20251001", + "ownedBy": "dmxapi" + }, + { + "id": "do-sd-wan", + "name": "do-sd-wan", + "capabilities": ["video-generation"], + "ownedBy": "dmxapi" + }, + { + "id": "qwen-plus-2024-12-20", + "name": "qwen-plus-2024-12-20", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "speech-2-5-hd-preview", + "name": "speech-2-5-hd-preview", + "ownedBy": "dmxapi" + }, + { + "id": "yi-vision", + "name": "yi-vision", + "capabilities": ["image-recognition"], + "ownedBy": "01ai" + }, + { + "id": "mj_fast_upload", + "name": "mj_fast_upload", + "ownedBy": "dmxapi" + }, + { + "id": "grok-imagine-0-9", + "name": "grok-imagine-0-9", + "capabilities": ["web-search"], + "ownedBy": "xai" + }, + { + "id": "mj_fast_background_eraser", + "name": "mj_fast_background_eraser", + "ownedBy": "dmxapi" + }, + { + "id": "qwen-vl-max-2024-02-01", + "name": "qwen-vl-max-2024-02-01", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "alibaba" + }, + { + "id": "imagen4", + "name": "imagen4", + "capabilities": ["image-generation"], + "ownedBy": "google" + }, + { + "id": "mj_relax_variation", + "name": "mj_relax_variation", + "ownedBy": "dmxapi" + }, + { + "id": "tts-pro", + "name": "tts-pro", + "capabilities": ["audio-generation"], + "ownedBy": "openai" + }, + { + "id": "qwen-plus-2024-08-06", + "name": "qwen-plus-2024-08-06", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "mj_fast_shorten", + "name": "mj_fast_shorten", + "ownedBy": "dmxapi" + }, + { + "id": "qwen-vl-v1", + "name": "qwen-vl-v1", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "alibaba" + }, + { + "id": "ernie-lite-8k-0922", + "name": "ernie-lite-8k-0922", + "ownedBy": "baidu" + }, + { + "id": "mj_turbo_zoom", + "name": "mj_turbo_zoom", + "ownedBy": "dmxapi" + }, + { + "id": "qh-4k", + "name": "qh-4k", + "ownedBy": "dmxapi" + }, + { + "id": "qwen-plus-2024-09-19", + "name": "qwen-plus-2024-09-19", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "mj_relax_upscale_4x", + "name": "mj_relax_upscale_4x", + "ownedBy": "dmxapi" + }, + { + "id": "mj_turbo_imagine", + "name": "mj_turbo_imagine", + "ownedBy": "dmxapi" + }, + { + "id": "qwen-math-turbo-2024-09-19", + "name": "qwen-math-turbo-2024-09-19", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "minimax-clone-upload", + "name": "minimax-clone-upload", + "ownedBy": "minimax" + }, + { + "id": "mj_relax_upscale_2x", + "name": "mj_relax_upscale_2x", + "ownedBy": "dmxapi" + }, + { + "id": "mj_fast_custom_zoom", + "name": "mj_fast_custom_zoom", + "ownedBy": "dmxapi" + }, + { + "id": "mj_fast_zoom", + "name": "mj_fast_zoom", + "ownedBy": "dmxapi" + }, + { + "id": "mj_turbo_background_eraser", + "name": "mj_turbo_background_eraser", + "ownedBy": "dmxapi" + }, + { + "id": "chat-seedream-3-0", + "name": "chat-seedream-3-0", + "ownedBy": "dmxapi" + }, + { + "id": "gpt-5-1-cfi", + "name": "gpt-5-1-cfi", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "mj_turbo_inpaint", + "name": "mj_turbo_inpaint", + "ownedBy": "dmxapi" + }, + { + "id": "qwen-plus-2024-06-24", + "name": "qwen-plus-2024-06-24", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "viduq2-ctv", + "name": "viduq2-ctv", + "capabilities": ["video-generation"], + "ownedBy": "vidu" + }, + { + "id": "mj_fast_upscale_creative", + "name": "mj_fast_upscale_creative", + "ownedBy": "dmxapi" + }, + { + "id": "baichuan-m3-plus", + "name": "baichuan-m3-plus", + "capabilities": ["reasoning"], + "ownedBy": "baichuan" + }, + { + "id": "dmxapl-cls45-0929", + "name": "dmxapl-cls45-0929", + "ownedBy": "dmxapi" + }, + { + "id": "kling_virtual_try_on", + "name": "kling_virtual_try_on", + "capabilities": ["video-generation"], + "ownedBy": "kling" + }, + { + "id": "speech-2-6-turbo", + "name": "speech-2-6-turbo", + "ownedBy": "dmxapi" + }, + { + "id": "speech-2-8-hd", + "name": "speech-2-8-hd", + "ownedBy": "dmxapi" + }, + { + "id": "minimax_minimax-hailuo-02", + "name": "minimax_minimax-hailuo-02", + "ownedBy": "minimax" + }, + { + "id": "music-2-0", + "name": "music-2-0", + "ownedBy": "dmxapi" + }, + { + "id": "qwen-vl-ocr-2024-10-28", + "name": "qwen-vl-ocr-2024-10-28", + "capabilities": ["function-call", "image-recognition", "file-input"], + "ownedBy": "alibaba" + }, + { + "id": "glm-4-alltools", + "name": "glm-4-alltools", + "capabilities": ["function-call"], + "ownedBy": "zhipu" + }, + { + "id": "musesteamer-air-image", + "name": "musesteamer-air-image", + "capabilities": ["image-generation"], + "ownedBy": "dmxapi" + }, + { + "id": "qvq-max-2025-03-25", + "name": "qvq-max-2025-03-25", + "capabilities": ["reasoning"], + "ownedBy": "alibaba" + }, + { + "id": "paiwo-get", + "name": "paiwo-get", + "ownedBy": "dmxapi" + }, + { + "id": "wan2-6-r2v", + "name": "wan2-6-r2v", + "capabilities": ["reasoning", "video-generation"], + "ownedBy": "alibaba" + }, + { + "id": "kling_multi_image2image", + "name": "kling_multi_image2image", + "capabilities": ["image-generation", "video-generation"], + "ownedBy": "kling" + }, + { + "id": "mj_turbo_pic_reader", + "name": "mj_turbo_pic_reader", + "ownedBy": "dmxapi" + }, + { + "id": "mj_turbo_upscale_subtle", + "name": "mj_turbo_upscale_subtle", + "ownedBy": "dmxapi" + }, + { + "id": "deepseek-r1-long", + "name": "deepseek-r1-long", + "capabilities": ["reasoning", "function-call"], + "ownedBy": "deepseek" + }, + { + "id": "g-l-m-4-6", + "name": "g-l-m-4-6", + "ownedBy": "dmxapi" + }, + { + "id": "doubao-seed-2-0-mini-260215", + "name": "doubao-seed-2-0-mini-260215", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "bytedance" + }, + { + "id": "mj_fast_describe", + "name": "mj_fast_describe", + "ownedBy": "dmxapi" + }, + { + "id": "music-2-5", + "name": "music-2-5", + "ownedBy": "dmxapi" + }, + { + "id": "doubao-seed-2-0-code-preview-260215", + "name": "doubao-seed-2-0-code-preview-260215", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "bytedance" + }, + { + "id": "doubao-seed-2-0-pro-260215", + "name": "doubao-seed-2-0-pro-260215", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "bytedance" + }, + { + "id": "minimax-m2-5-guan", + "name": "minimax-m2-5-guan", + "capabilities": ["reasoning", "function-call"], + "reasoning": { + "supportedEfforts": [], + "interleaved": true + }, + "ownedBy": "minimax" + }, + { + "id": "mj_relax_inpaint", + "name": "mj_relax_inpaint", + "ownedBy": "dmxapi" + }, + { + "id": "qwen-turbo-2024-06-24", + "name": "qwen-turbo-2024-06-24", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "gpt-image-1-dmx00", + "name": "gpt-image-1-dmx00", + "capabilities": ["image-generation"], + "ownedBy": "openai" + }, + { + "id": "dmxapl-cls45-0929-sk", + "name": "dmxapl-cls45-0929-sk", + "ownedBy": "dmxapi" + }, + { + "id": "gpt-5-2-cdx", + "name": "gpt-5-2-cdx", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high", "max"] + }, + "ownedBy": "openai" + }, + { + "id": "kling-image2video-get", + "name": "kling-image2video-get", + "capabilities": ["image-generation", "video-generation"], + "ownedBy": "kling" + }, + { + "id": "mj_relax_describe", + "name": "mj_relax_describe", + "ownedBy": "dmxapi" + }, + { + "id": "kling_multi_elements_clear", + "name": "kling_multi_elements_clear", + "capabilities": ["video-generation"], + "ownedBy": "kling" + }, + { + "id": "mj_relax_low_variation", + "name": "mj_relax_low_variation", + "ownedBy": "dmxapi" + }, + { + "id": "qwen-audio-turbo", + "name": "qwen-audio-turbo", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "g-p-t-5-2-codex", + "name": "g-p-t-5-2-codex", + "ownedBy": "dmxapi" + }, + { + "id": "kling_multi_elements_preview", + "name": "kling_multi_elements_preview", + "capabilities": ["video-generation"], + "ownedBy": "kling" + }, + { + "id": "hunyuan-2-0-instruct-20251111", + "name": "hunyuan-2-0-instruct-20251111", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "minimax_t2v-01-director", + "name": "minimax_t2v-01-director", + "ownedBy": "minimax" + }, + { + "id": "mj_fast_video", + "name": "mj_fast_video", + "capabilities": ["video-generation"], + "ownedBy": "dmxapi" + }, + { + "id": "mj_fast_pic_reader", + "name": "mj_fast_pic_reader", + "ownedBy": "dmxapi" + }, + { + "id": "qwen-audio-turbo-2024-08-07", + "name": "qwen-audio-turbo-2024-08-07", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "hunyuan-standard-256k", + "name": "hunyuan-standard-256k", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "qwen-math-plus-2024-08-16", + "name": "qwen-math-plus-2024-08-16", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "mj_fast_reroll", + "name": "mj_fast_reroll", + "ownedBy": "dmxapi" + }, + { + "id": "mj_turbo_blend", + "name": "mj_turbo_blend", + "ownedBy": "dmxapi" + }, + { + "id": "qwen-longcontext-chat", + "name": "qwen-longcontext-chat", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "qwen3-omni-flash-all", + "name": "qwen3-omni-flash-all", + "capabilities": ["function-call", "image-recognition", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "kling_video", + "name": "kling_video", + "capabilities": ["video-generation"], + "ownedBy": "kling" + }, + { + "id": "minimax-hailuo-2-3", + "name": "minimax-hailuo-2-3", + "ownedBy": "minimax" + }, + { + "id": "mj_fast_upscale_4x", + "name": "mj_fast_upscale_4x", + "ownedBy": "dmxapi" + }, + { + "id": "paiwo-v5-6-itv2", + "name": "paiwo-v5-6-itv2", + "ownedBy": "dmxapi" + }, + { + "id": "baichuan-m3", + "name": "baichuan-m3", + "capabilities": ["reasoning"], + "ownedBy": "baichuan" + }, + { + "id": "deepseek-r1-250120", + "name": "deepseek-r1-250120", + "capabilities": ["reasoning", "function-call"], + "ownedBy": "deepseek" + }, + { + "id": "mj_fast_prompt_analyzer", + "name": "mj_fast_prompt_analyzer", + "ownedBy": "dmxapi" + }, + { + "id": "mj_fast_edits", + "name": "mj_fast_edits", + "ownedBy": "dmxapi" + }, + { + "id": "paiwo-v5-6-itv", + "name": "paiwo-v5-6-itv", + "ownedBy": "dmxapi" + }, + { + "id": "qwen2-audio-instruct", + "name": "qwen2-audio-instruct", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "gpt-5-nano-ssvip", + "name": "gpt-5-nano-ssvip", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "kling-v2-6-text2video", + "name": "kling-v2-6-text2video", + "capabilities": ["video-generation"], + "ownedBy": "kling" + }, + { + "id": "kling_image", + "name": "kling_image", + "capabilities": ["image-generation", "video-generation"], + "ownedBy": "kling" + }, + { + "id": "minimax_files_retrieve", + "name": "minimax_files_retrieve", + "ownedBy": "minimax" + }, + { + "id": "mj_relax_prompt_analyzer_extended", + "name": "mj_relax_prompt_analyzer_extended", + "ownedBy": "dmxapi" + }, + { + "id": "mj_relax_shorten", + "name": "mj_relax_shorten", + "ownedBy": "dmxapi" + }, + { + "id": "doubao-seedream-3-0-t2i-250415", + "name": "doubao-seedream-3-0-t2i-250415", + "capabilities": ["function-call"], + "ownedBy": "bytedance" + }, + { + "id": "gpt-5-2-pro-chat", + "name": "gpt-5-2-pro-chat", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["medium", "high", "max"] + }, + "ownedBy": "openai" + }, + { + "id": "g2-5-flash", + "name": "g2-5-flash", + "ownedBy": "dmxapi" + }, + { + "id": "hunyuan-2-0-thinking-20251109", + "name": "hunyuan-2-0-thinking-20251109", + "capabilities": ["reasoning", "function-call"], + "ownedBy": "tencent" + }, + { + "id": "mj_turbo_pan", + "name": "mj_turbo_pan", + "ownedBy": "dmxapi" + }, + { + "id": "wan2-6-t2i", + "name": "wan2-6-t2i", + "capabilities": ["video-generation"], + "ownedBy": "alibaba" + }, + { + "id": "clo45thinking-20251101", + "name": "clo45thinking-20251101", + "ownedBy": "dmxapi" + }, + { + "id": "huoshan-deepseek-r1-64k", + "name": "huoshan-deepseek-r1-64k", + "capabilities": ["reasoning", "function-call"], + "ownedBy": "dmxapi" + }, + { + "id": "doubao-seedance-1-5-pro-responses", + "name": "doubao-seedance-1-5-pro-responses", + "capabilities": ["function-call"], + "ownedBy": "bytedance" + }, + { + "id": "g-l-m-4-7", + "name": "g-l-m-4-7", + "ownedBy": "dmxapi" + }, + { + "id": "huoshan-deepseek-v3", + "name": "huoshan-deepseek-v3", + "capabilities": ["reasoning", "function-call"], + "ownedBy": "dmxapi" + }, + { + "id": "mj_turbo_upscale", + "name": "mj_turbo_upscale", + "ownedBy": "dmxapi" + }, + { + "id": "qwen-image-edit-plus", + "name": "qwen-image-edit-plus", + "capabilities": ["function-call", "image-generation"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-plus-2024-07-23", + "name": "qwen-plus-2024-07-23", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-rerank", + "name": "qwen3-rerank", + "capabilities": ["rerank", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "mj_relax_upscale_creative", + "name": "mj_relax_upscale_creative", + "ownedBy": "dmxapi" + }, + { + "id": "mj_turbo_describe", + "name": "mj_turbo_describe", + "ownedBy": "dmxapi" + }, + { + "id": "kling_multi_elements_delete", + "name": "kling_multi_elements_delete", + "capabilities": ["video-generation"], + "ownedBy": "kling" + }, + { + "id": "deepseek-v3-1-uc", + "name": "deepseek-v3-1-uc", + "capabilities": ["reasoning", "function-call"], + "reasoning": { + "supportedEfforts": ["none"] + }, + "ownedBy": "deepseek" + }, + { + "id": "qwen-audio-chat", + "name": "qwen-audio-chat", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "mj_turbo_modal", + "name": "mj_turbo_modal", + "ownedBy": "dmxapi" + }, + { + "id": "qwen-vl-plus-2023-12-01", + "name": "qwen-vl-plus-2023-12-01", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "alibaba" + }, + { + "id": "yi-medium-200k", + "name": "yi-medium-200k", + "ownedBy": "01ai" + }, + { + "id": "cl35s1022", + "name": "cl35s1022", + "ownedBy": "dmxapi" + }, + { + "id": "g3-pro-preview", + "name": "g3-pro-preview", + "ownedBy": "dmxapi" + }, + { + "id": "gpt-image-1-dmx03", + "name": "gpt-image-1-dmx03", + "capabilities": ["image-generation"], + "ownedBy": "openai" + }, + { + "id": "mj_turbo_upscale_2x", + "name": "mj_turbo_upscale_2x", + "ownedBy": "dmxapi" + }, + { + "id": "suno_music", + "name": "suno_music", + "ownedBy": "suno" + }, + { + "id": "minimax_s2v-01", + "name": "minimax_s2v-01", + "ownedBy": "minimax" + }, + { + "id": "suno_lyrics", + "name": "suno_lyrics", + "ownedBy": "suno" + }, + { + "id": "claude-code-1", + "name": "claude-code-1", + "capabilities": ["function-call"], + "ownedBy": "anthropic" + }, + { + "id": "gpt-image-1-dmx02", + "name": "gpt-image-1-dmx02", + "capabilities": ["image-generation"], + "ownedBy": "openai" + }, + { + "id": "mj_relax_upload", + "name": "mj_relax_upload", + "ownedBy": "dmxapi" + }, + { + "id": "baichuan2", + "name": "baichuan2", + "ownedBy": "baichuan" + }, + { + "id": "deepclaude-liu", + "name": "deepclaude-liu", + "ownedBy": "dmxapi" + }, + { + "id": "g2-5-pro", + "name": "g2-5-pro", + "ownedBy": "dmxapi" + }, + { + "id": "qwen-plus-2024-02-06", + "name": "qwen-plus-2024-02-06", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "mj_relax_reroll", + "name": "mj_relax_reroll", + "ownedBy": "dmxapi" + }, + { + "id": "mj_turbo_low_variation", + "name": "mj_turbo_low_variation", + "ownedBy": "dmxapi" + }, + { + "id": "mj_turbo_upscale_4x", + "name": "mj_turbo_upscale_4x", + "ownedBy": "dmxapi" + }, + { + "id": "gemini-3-pro-image-preview-0-3", + "name": "gemini-3-pro-image-preview-0-3", + "capabilities": ["function-call", "image-recognition", "image-generation", "web-search"], + "ownedBy": "google" + }, + { + "id": "glm-4-1v-thinking-flash", + "name": "glm-4-1v-thinking-flash", + "capabilities": ["reasoning", "function-call"], + "ownedBy": "zhipu" + }, + { + "id": "mj_fast_low_variation", + "name": "mj_fast_low_variation", + "ownedBy": "dmxapi" + }, + { + "id": "mj_turbo_high_variation", + "name": "mj_turbo_high_variation", + "ownedBy": "dmxapi" + }, + { + "id": "suno_uploads", + "name": "suno_uploads", + "ownedBy": "suno" + }, + { + "id": "gpt-5-2-responses", + "name": "gpt-5-2-responses", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high", "max"] + }, + "ownedBy": "openai" + }, + { + "id": "g-p-t-5", + "name": "g-p-t-5", + "ownedBy": "dmxapi" + }, + { + "id": "kling_effects", + "name": "kling_effects", + "capabilities": ["video-generation"], + "ownedBy": "kling" + }, + { + "id": "veo2", + "name": "veo2", + "capabilities": ["video-generation"], + "ownedBy": "google" + }, + { + "id": "t2v-01-director", + "name": "t2v-01-director", + "ownedBy": "aionly" + }, + { + "id": "doubao-seedance-1-0-pro", + "name": "doubao-seedance-1-0-pro", + "capabilities": ["function-call"], + "ownedBy": "bytedance" + }, + { + "id": "doubao-tts01", + "name": "doubao-tts01", + "ownedBy": "bytedance" + }, + { + "id": "gpt-o1", + "name": "gpt-o1", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "openai" + }, + { + "id": "minimax-s2v-01", + "name": "minimax-s2v-01", + "ownedBy": "minimax" + }, + { + "id": "claude-sonnet4", + "name": "claude-sonnet4", + "capabilities": ["function-call"], + "ownedBy": "anthropic" + }, + { + "id": "veo-3-0-generate-001", + "name": "veo-3-0-generate-001", + "capabilities": ["video-generation"], + "ownedBy": "google" + }, + { + "id": "ah-claude-sonnet4", + "name": "ah-claude-sonnet4", + "capabilities": ["function-call"], + "ownedBy": "aionly" + }, + { + "id": "seedream4-0", + "name": "seedream4-0", + "ownedBy": "bytedance" + }, + { + "id": "gemini-2-5-flash-image-text", + "name": "gemini-2-5-flash-image-text", + "capabilities": ["function-call", "image-recognition", "image-generation", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 24576 + } + }, + "ownedBy": "google" + }, + { + "id": "veo-3-0-fast-generate-001", + "name": "veo-3-0-fast-generate-001", + "capabilities": ["video-generation"], + "ownedBy": "google" + }, + { + "id": "gpt-5-codex-azure", + "name": "gpt-5-codex-azure", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "gpt-5-pro-openai", + "name": "gpt-5-pro-openai", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["high"] + }, + "ownedBy": "openai" + }, + { + "id": "claude-sonnet-4-5-20250929-02", + "name": "claude-sonnet-4-5-20250929-02", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search", "computer-use"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "ownedBy": "anthropic" + }, + { + "id": "claude-haiku-4-5-20251001-02", + "name": "claude-haiku-4-5-20251001-02", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search", "computer-use"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "ownedBy": "anthropic" + }, + { + "id": "doubao-seedance-1-0-pro-fast", + "name": "doubao-seedance-1-0-pro-fast", + "capabilities": ["function-call"], + "ownedBy": "bytedance" + }, + { + "id": "gemini-3-pro-image-preview-e", + "name": "gemini-3-pro-image-preview-e", + "capabilities": ["function-call", "image-recognition", "image-generation", "web-search"], + "ownedBy": "google" + }, + { + "id": "gpt-image-1-2025-04", + "name": "gpt-image-1-2025-04", + "capabilities": ["image-generation"], + "ownedBy": "openai" + }, + { + "id": "gemini-3-pro-image-preview-text", + "name": "gemini-3-pro-image-preview-text", + "capabilities": ["function-call", "image-recognition", "image-generation", "web-search"], + "ownedBy": "google" + }, + { + "id": "claude-opus-4-5-20251101-02", + "name": "claude-opus-4-5-20251101-02", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search", "computer-use"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 64000 + } + }, + "ownedBy": "anthropic" + }, + { + "id": "doubao-seedance-1-5-pro", + "name": "doubao-seedance-1-5-pro", + "capabilities": ["function-call"], + "ownedBy": "bytedance" + }, + { + "id": "seed-tts-2-0", + "name": "seed-tts-2-0", + "capabilities": ["audio-generation"], + "ownedBy": "bytedance" + }, + { + "id": "gemini-3-pro-image-vip", + "name": "gemini-3-pro-image-vip", + "capabilities": ["function-call", "image-recognition", "image-generation", "web-search"], + "ownedBy": "google" + }, + { + "id": "wan2-6-t2i-vip", + "name": "wan2-6-t2i-vip", + "capabilities": ["video-generation"], + "ownedBy": "alibaba" + }, + { + "id": "qwen3-max-vip", + "name": "qwen3-max-vip", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "vq2", + "name": "vq2", + "ownedBy": "aionly" + }, + { + "id": "vq2-turbo", + "name": "vq2-turbo", + "ownedBy": "aionly" + }, + { + "id": "vq2-pro", + "name": "vq2-pro", + "ownedBy": "aionly" + }, + { + "id": "qwen-image-max", + "name": "qwen-image-max", + "capabilities": ["function-call", "image-generation"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-image-plus-vip", + "name": "qwen-image-plus-vip", + "capabilities": ["function-call", "image-generation"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-image-edit-plus-vip", + "name": "qwen-image-edit-plus-vip", + "capabilities": ["function-call", "image-generation"], + "ownedBy": "alibaba" + }, + { + "id": "nemotron3-nano", + "name": "nemotron3-nano", + "ownedBy": "nvidia" + }, + { + "id": "claude-sonnet4-5", + "name": "claude-sonnet4-5", + "capabilities": ["function-call"], + "ownedBy": "anthropic" + }, + { + "id": "claude-opus4-5", + "name": "claude-opus4-5", + "capabilities": ["function-call"], + "ownedBy": "anthropic" + }, + { + "id": "claude-haiku4-5", + "name": "claude-haiku4-5", + "capabilities": ["function-call"], + "ownedBy": "anthropic" + }, + { + "id": "claude-opus4", + "name": "claude-opus4", + "capabilities": ["function-call"], + "ownedBy": "anthropic" + }, + { + "id": "claude-opus4-1", + "name": "claude-opus4-1", + "capabilities": ["function-call"], + "ownedBy": "anthropic" + }, + { + "id": "claude-haiku3-5", + "name": "claude-haiku3-5", + "capabilities": ["function-call"], + "ownedBy": "anthropic" + }, + { + "id": "gpt-o3", + "name": "gpt-o3", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "openai" + }, + { + "id": "wan2-6-i2v-flash", + "name": "wan2-6-i2v-flash", + "capabilities": ["video-generation"], + "ownedBy": "alibaba" + }, + { + "id": "wan2-6-i2v-flash-vip", + "name": "wan2-6-i2v-flash-vip", + "capabilities": ["video-generation"], + "ownedBy": "alibaba" + }, + { + "id": "flux2-dev", + "name": "flux2-dev", + "capabilities": ["image-generation"], + "ownedBy": "bfl" + }, + { + "id": "claude-opus4-6", + "name": "claude-opus4-6", + "capabilities": ["function-call"], + "ownedBy": "anthropic" + }, + { + "id": "kld-o-4-6", + "name": "kld-o-4-6", + "ownedBy": "aionly" + }, + { + "id": "vq3-pro", + "name": "vq3-pro", + "ownedBy": "aionly" + }, + { + "id": "wan2-6-t2v-vip", + "name": "wan2-6-t2v-vip", + "capabilities": ["video-generation"], + "ownedBy": "alibaba" + }, + { + "id": "speech-2-8-turbo", + "name": "speech-2-8-turbo", + "ownedBy": "aionly" + }, + { + "id": "gemini-2-5-flash-preview-09-2025-thinking-*", + "name": "gemini-2-5-flash-preview-09-2025-thinking-*", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 24576 + } + }, + "ownedBy": "google" + }, + { + "id": "gemini-2-5-flash-thinking-*", + "name": "gemini-2-5-flash-thinking-*", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 24576 + } + }, + "ownedBy": "google" + }, + { + "id": "gemini-2-5-flash-thinking-24576", + "name": "gemini-2-5-flash-thinking-24576", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 24576 + } + }, + "ownedBy": "google" + }, + { + "id": "gemini-2-5-flash-thinking-512", + "name": "gemini-2-5-flash-thinking-512", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 24576 + } + }, + "ownedBy": "google" + }, + { + "id": "gemini-2-5-pro-thinking-*", + "name": "gemini-2-5-pro-thinking-*", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "reasoning": { + "supportedEfforts": ["low", "medium", "high"], + "thinkingTokenLimits": { + "min": 128, + "max": 32768 + } + }, + "ownedBy": "google" + }, + { + "id": "gemini-3-pro-image-preview-flatfee", + "name": "gemini-3-pro-image-preview-flatfee", + "capabilities": ["function-call", "image-recognition", "image-generation", "web-search"], + "ownedBy": "google" + }, + { + "id": "gpt-5-1-chat-2025-11-13", + "name": "gpt-5-1-chat-2025-11-13", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "gpt-5-1-codex-mini-2025-11-13", + "name": "gpt-5-1-codex-mini-2025-11-13", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "gpt-5-2-2025-12-11-none", + "name": "gpt-5-2-2025-12-11-none", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high", "max"] + }, + "ownedBy": "openai" + }, + { + "id": "gpt-5-2-2025-12-11-xhigh", + "name": "gpt-5-2-2025-12-11-xhigh", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high", "max"] + }, + "ownedBy": "openai" + }, + { + "id": "gpt-5-2-chat-2025-12-11", + "name": "gpt-5-2-chat-2025-12-11", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high", "max"] + }, + "ownedBy": "openai" + }, + { + "id": "gpt-5-2-none", + "name": "gpt-5-2-none", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high", "max"] + }, + "ownedBy": "openai" + }, + { + "id": "gpt-5-2-xhigh", + "name": "gpt-5-2-xhigh", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high", "max"] + }, + "ownedBy": "openai" + }, + { + "id": "gpt-realtime-2025-08-28", + "name": "gpt-realtime-2025-08-28", + "ownedBy": "openai" + }, + { + "id": "babbage-002", + "name": "babbage-002", + "ownedBy": "openai" + }, + { + "id": "gemini-2-5-pro-flatfee", + "name": "gemini-2-5-pro-flatfee", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "reasoning": { + "supportedEfforts": ["low", "medium", "high"], + "thinkingTokenLimits": { + "min": 128, + "max": 32768 + } + }, + "ownedBy": "google" + }, + { + "id": "gpt-3-5-turbo-instruct-0914", + "name": "gpt-3-5-turbo-instruct-0914", + "ownedBy": "openai" + }, + { + "id": "gpt-35-turbo", + "name": "gpt-35-turbo", + "ownedBy": "openai" + }, + { + "id": "gpt-35-turbo-instruct", + "name": "gpt-35-turbo-instruct", + "ownedBy": "openai" + }, + { + "id": "gpt-4-dalle", + "name": "gpt-4-dalle", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "openai" + }, + { + "id": "gpt-4-1-mini-2024-05-14", + "name": "gpt-4-1-mini-2024-05-14", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "openai" + }, + { + "id": "gpt-4o-audio-preview-2025-06-03", + "name": "gpt-4o-audio-preview-2025-06-03", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "openai" + }, + { + "id": "gpt-4o-mini-search-preview-2025-03-11", + "name": "gpt-4o-mini-search-preview-2025-03-11", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "openai" + }, + { + "id": "gpt-4o-search-preview-2025-03-11", + "name": "gpt-4o-search-preview-2025-03-11", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "openai" + }, + { + "id": "gpt-5-search-api", + "name": "gpt-5-search-api", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "gpt-5-search-api-2025-10-14", + "name": "gpt-5-search-api-2025-10-14", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "gpt-audio-2025-08-28", + "name": "gpt-audio-2025-08-28", + "ownedBy": "openai" + }, + { + "id": "gpt-audio-mini-2025-10-06", + "name": "gpt-audio-mini-2025-10-06", + "ownedBy": "openai" + }, + { + "id": "gpt-image-1-5-all", + "name": "gpt-image-1-5-all", + "capabilities": ["image-generation"], + "ownedBy": "openai" + }, + { + "id": "o1-mini-all", + "name": "o1-mini-all", + "capabilities": ["reasoning"], + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "o1-preview-all", + "name": "o1-preview-all", + "capabilities": ["reasoning"], + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "o3-all", + "name": "o3-all", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "ownedBy": "openai" + }, + { + "id": "o3-deep-research-2025-06-26", + "name": "o3-deep-research-2025-06-26", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "reasoning": { + "supportedEfforts": ["medium"] + }, + "ownedBy": "openai" + }, + { + "id": "o4-mini-all", + "name": "o4-mini-all", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "o4-mini-deep-research-2025-06-26", + "name": "o4-mini-deep-research-2025-06-26", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "reasoning": { + "supportedEfforts": ["medium"] + }, + "ownedBy": "openai" + }, + { + "id": "o4-mini-high-all", + "name": "o4-mini-high-all", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "claude-3-5-sonnet-latest", + "name": "claude-3-5-sonnet-latest", + "capabilities": ["function-call", "image-recognition", "web-search", "computer-use"], + "ownedBy": "anthropic" + }, + { + "id": "gpt-3-5-turbo-instruct-09-14", + "name": "gpt-3-5-turbo-instruct-09-14", + "ownedBy": "openai" + }, + { + "id": "gpt-4o-transcribe-diarize", + "name": "gpt-4o-transcribe-diarize", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "openai" + }, + { + "id": "gpt-realtime", + "name": "gpt-realtime", + "ownedBy": "openai" + }, + { + "id": "gpt-realtime-mini", + "name": "gpt-realtime-mini", + "ownedBy": "openai" + }, + { + "id": "gpt-realtime-mini-2025-10-06", + "name": "gpt-realtime-mini-2025-10-06", + "ownedBy": "openai" + }, + { + "id": "grok-3-nx", + "name": "grok-3-nx", + "capabilities": ["function-call", "web-search"], + "ownedBy": "xai" + }, + { + "id": "kling-v1", + "name": "kling-v1", + "capabilities": ["video-generation"], + "ownedBy": "kling" + }, + { + "id": "kling-v1-6", + "name": "kling-v1-6", + "capabilities": ["video-generation"], + "ownedBy": "kling" + }, + { + "id": "kling-v2-1-master", + "name": "kling-v2-1-master", + "capabilities": ["video-generation"], + "ownedBy": "kling" + }, + { + "id": "kling-v2-5-turbo", + "name": "kling-v2-5-turbo", + "capabilities": ["video-generation"], + "ownedBy": "kling" + }, + { + "id": "kling-v2-master", + "name": "kling-v2-master", + "capabilities": ["video-generation"], + "ownedBy": "kling" + }, + { + "id": "o1-pro-2025-03-19", + "name": "o1-pro-2025-03-19", + "capabilities": ["reasoning", "function-call", "image-recognition"], + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "o3-pro-all", + "name": "o3-pro-all", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "computer-use-preview-2025-03-11", + "name": "computer-use-preview-2025-03-11", + "capabilities": ["computer-use"], + "ownedBy": "burncloud" + }, + { + "id": "doubao-1-5-ui-tars-250428", + "name": "doubao-1-5-ui-tars-250428", + "ownedBy": "bytedance" + }, + { + "id": "doubao-lite-128k-240828", + "name": "doubao-lite-128k-240828", + "ownedBy": "bytedance" + }, + { + "id": "doubao-lite-32k-240828", + "name": "doubao-lite-32k-240828", + "ownedBy": "bytedance" + }, + { + "id": "doubao-pro-32k-241215", + "name": "doubao-pro-32k-241215", + "ownedBy": "bytedance" + }, + { + "id": "doubao-seedance-1-0-lite-i2v-250428", + "name": "doubao-seedance-1-0-lite-i2v-250428", + "capabilities": ["function-call"], + "ownedBy": "bytedance" + }, + { + "id": "doubao-seedance-1-0-lite-t2v-250428", + "name": "doubao-seedance-1-0-lite-t2v-250428", + "capabilities": ["function-call"], + "ownedBy": "bytedance" + }, + { + "id": "doubao-seedance-1-0-pro-250528", + "name": "doubao-seedance-1-0-pro-250528", + "capabilities": ["function-call"], + "ownedBy": "bytedance" + }, + { + "id": "gemini-2-5-flash-lite-thinking-*", + "name": "gemini-2-5-flash-lite-thinking-*", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 512, + "max": 24576 + } + }, + "ownedBy": "google" + }, + { + "id": "gpt-5-flatfee", + "name": "gpt-5-flatfee", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "gpt-5-nano-high", + "name": "gpt-5-nano-high", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "gpt-5-nano-low", + "name": "gpt-5-nano-low", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "gpt-5-nano-medium", + "name": "gpt-5-nano-medium", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "gpt-5-nano-minimal", + "name": "gpt-5-nano-minimal", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["minimal", "low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "qwen-coder-plus-1106", + "name": "qwen-coder-plus-1106", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-coder-turbo-0919", + "name": "qwen-coder-turbo-0919", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-math-plus-0919", + "name": "qwen-math-plus-0919", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-math-turbo-0919", + "name": "qwen-math-turbo-0919", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-max-0403", + "name": "qwen-max-0403", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-max-0428", + "name": "qwen-max-0428", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-max-0919", + "name": "qwen-max-0919", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-plus-0919", + "name": "qwen-plus-0919", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-plus-2025-07-14", + "name": "qwen-plus-2025-07-14", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "sora-2-15s", + "name": "sora-2-15s", + "capabilities": ["video-generation"], + "ownedBy": "openai" + }, + { + "id": "sora-2-characters", + "name": "sora-2-characters", + "capabilities": ["video-generation"], + "ownedBy": "openai" + }, + { + "id": "sora-2-landscape", + "name": "sora-2-landscape", + "capabilities": ["video-generation"], + "ownedBy": "openai" + }, + { + "id": "sora-2-landscape-pro-25s", + "name": "sora-2-landscape-pro-25s", + "capabilities": ["video-generation"], + "ownedBy": "openai" + }, + { + "id": "sora-2-portrait-pro-25s", + "name": "sora-2-portrait-pro-25s", + "capabilities": ["video-generation"], + "ownedBy": "openai" + }, + { + "id": "baichuan-text-embedding", + "name": "Baichuan-Text-Embedding", + "capabilities": ["embedding"], + "ownedBy": "baichuan" + }, + { + "id": "baichuan2-turbo", + "name": "Baichuan2-Turbo", + "contextWindow": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.32 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.32 + } + }, + "ownedBy": "baichuan" + }, + { + "id": "baichuan2-turbo-192k", + "name": "Baichuan2-Turbo-192k", + "contextWindow": 192000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.64 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.64 + } + }, + "ownedBy": "baichuan" + }, + { + "id": "tavily", + "name": "tavily", + "ownedBy": "302ai" + }, + { + "id": "searchapi", + "name": "searchapi", + "ownedBy": "302ai" + }, + { + "id": "clipdrop", + "name": "clipdrop", + "ownedBy": "302ai" + }, + { + "id": "vectorizer", + "name": "vectorizer", + "ownedBy": "302ai" + }, + { + "id": "302", + "name": "302 API", + "ownedBy": "302ai" + }, + { + "id": "zhipu-embedding-2", + "name": "zhipu-embedding-2", + "capabilities": ["embedding"], + "ownedBy": "302ai" + }, + { + "id": "luma", + "name": "luma", + "capabilities": ["video-generation"], + "ownedBy": "302ai" + }, + { + "id": "generalv3-5", + "name": "Spark Max", + "contextWindow": 8000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 4.73 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4.73 + } + }, + "ownedBy": "302ai" + }, + { + "id": "doc2x", + "name": "doc2x", + "ownedBy": "302ai" + }, + { + "id": "glif", + "name": "glif", + "ownedBy": "302ai" + }, + { + "id": "sensechat-5", + "name": "SenseChat-5", + "contextWindow": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 6.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15.4 + } + }, + "ownedBy": "302ai" + }, + { + "id": "sensechat-turbo", + "name": "SenseChat-Turbo", + "contextWindow": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.33 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.77 + } + }, + "ownedBy": "302ai" + }, + { + "id": "azure_tts", + "name": "azure_tts", + "ownedBy": "302ai" + }, + { + "id": "mistral-large-2", + "name": "Mistral-Large-2", + "capabilities": ["function-call"], + "contextWindow": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + }, + "ownedBy": "mistral" + }, + { + "id": "deepl-en", + "name": "DeepL-EN", + "contextWindow": 32000, + "ownedBy": "302ai" + }, + { + "id": "deepl-zh", + "name": "DeepL-ZH", + "contextWindow": 32000, + "ownedBy": "302ai" + }, + { + "id": "deepl-ja", + "name": "DeepL-JA", + "contextWindow": 32000, + "ownedBy": "302ai" + }, + { + "id": "klingai", + "name": "klingai", + "capabilities": ["video-generation"], + "ownedBy": "kling" + }, + { + "id": "gpt-4-plus", + "name": "gpt-4-plus", + "capabilities": ["function-call", "image-recognition"], + "contextWindow": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 30 + }, + "output": { + "currency": "USD", + "perMillionTokens": 60 + } + }, + "ownedBy": "openai" + }, + { + "id": "exaai", + "name": "exaai", + "ownedBy": "302ai" + }, + { + "id": "cogvideox", + "name": "cogvideox", + "capabilities": ["video-generation"], + "ownedBy": "zhipu" + }, + { + "id": "farui-plus", + "name": "farui-plus", + "contextWindow": 12000, + "ownedBy": "302ai" + }, + { + "id": "doubao_tts_hd", + "name": "doubao_tts_hd", + "ownedBy": "bytedance" + }, + { + "id": "bochaai", + "name": "bochaai", + "ownedBy": "302ai" + }, + { + "id": "gpt-3-5-sonnet-cursor", + "name": "claude-3.5-sonnet", + "contextWindow": 200000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + }, + "ownedBy": "openai" + }, + { + "id": "minimaxi_video-01", + "name": "minimaxi_video-01", + "capabilities": ["video-generation"], + "ownedBy": "minimax" + }, + { + "id": "deepl", + "name": "DeepL", + "ownedBy": "302ai" + }, + { + "id": "suno-api", + "name": "suno-api", + "ownedBy": "suno" + }, + { + "id": "fish-audio", + "name": "fish-audio", + "ownedBy": "302ai" + }, + { + "id": "llama3-2", + "name": "Llama3.2-90B", + "contextWindow": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "ownedBy": "meta" + }, + { + "id": "minimaxi_text2voice", + "name": "minimaxi_text2voice", + "ownedBy": "minimax" + }, + { + "id": "dubbingx", + "name": "dubbingx", + "contextWindow": 32768, + "ownedBy": "302ai" + }, + { + "id": "pika", + "name": "pika", + "capabilities": ["video-generation"], + "contextWindow": 32768, + "ownedBy": "302ai" + }, + { + "id": "hedra", + "name": "hedra", + "ownedBy": "302ai" + }, + { + "id": "pix", + "name": "pix", + "contextWindow": 32768, + "ownedBy": "302ai" + }, + { + "id": "gpt-4o-plus", + "name": "gpt-4o-plus", + "capabilities": ["function-call", "image-recognition", "web-search"], + "contextWindow": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + }, + "ownedBy": "openai" + }, + { + "id": "luma-photon", + "name": "Luma-Photon", + "capabilities": ["video-generation"], + "ownedBy": "302ai" + }, + { + "id": "coder-claude-3-5-sonnet-20240620", + "name": "coder-claude-3-5-sonnet-20240620", + "capabilities": ["function-call", "image-recognition", "web-search", "computer-use"], + "contextWindow": 200000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + }, + "ownedBy": "302ai" + }, + { + "id": "coder-claude-3-5-sonnet-20241022", + "name": "coder-claude-3-5-sonnet-20241022", + "capabilities": ["function-call", "image-recognition", "web-search", "computer-use"], + "contextWindow": 200000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + }, + "ownedBy": "302ai" + }, + { + "id": "minimaxi_music", + "name": "minimaxi_music", + "ownedBy": "minimax" + }, + { + "id": "llama3-3", + "name": "Llama3.3-70B", + "contextWindow": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.9 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.9 + } + }, + "ownedBy": "meta" + }, + { + "id": "runway_exapnd", + "name": "runway_exapnd", + "capabilities": ["video-generation"], + "ownedBy": "runway" + }, + { + "id": "o1-plus", + "name": "o1-plus", + "capabilities": ["reasoning", "function-call", "image-recognition"], + "contextWindow": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 100 + }, + "output": { + "currency": "USD", + "perMillionTokens": 200 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "tripo3d", + "name": "tripo3d", + "ownedBy": "302ai" + }, + { + "id": "vidu", + "name": "vidu", + "capabilities": ["video-generation"], + "ownedBy": "vidu" + }, + { + "id": "doubao-vision-lite-32k", + "name": "Doubao-Vision-Lite-32k", + "capabilities": ["image-recognition"], + "contextWindow": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "general_v2-1_l", + "name": "general_v2.1_L", + "ownedBy": "302ai" + }, + { + "id": "general_v2-0_l", + "name": "general_v2.0_L", + "ownedBy": "302ai" + }, + { + "id": "general_v2-0", + "name": "general_v2.0", + "ownedBy": "302ai" + }, + { + "id": "general_v2-0_l_seededit", + "name": "general_v2.0_L_seededit", + "ownedBy": "302ai" + }, + { + "id": "general_v2-0_l_character", + "name": "general_v2.0_L_character", + "ownedBy": "302ai" + }, + { + "id": "doubao", + "name": "Doubao", + "ownedBy": "bytedance" + }, + { + "id": "kolors-virtual-try-on-v1", + "name": "kolors-virtual-try-on-v1", + "ownedBy": "kolors" + }, + { + "id": "kolors-virtual-try-on-v1-5", + "name": "kolors-virtual-try-on-v1-5", + "ownedBy": "kolors" + }, + { + "id": "memobase", + "name": "memobase", + "ownedBy": "302ai" + }, + { + "id": "gpt-3-5-sonnet-20241022-cursor", + "name": "claude-3.5-sonnet", + "contextWindow": 200000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + }, + "ownedBy": "openai" + }, + { + "id": "gpt-3-5-sonnet-20240620-cursor", + "name": "claude-3.5-sonnet", + "contextWindow": 200000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + }, + "ownedBy": "openai" + }, + { + "id": "gpt-4o-sonnet-cursor", + "name": "claude-3.5-sonnet", + "capabilities": ["function-call", "image-recognition", "web-search"], + "contextWindow": 200000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + }, + "ownedBy": "openai" + }, + { + "id": "deepseek-r1-baidu", + "name": "Deepseek-R1-Baidu", + "capabilities": ["reasoning", "function-call"], + "contextWindow": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.3 + } + }, + "ownedBy": "deepseek" + }, + { + "id": "deepseek-v3-baidu", + "name": "Deepseek-V3-Baidu", + "capabilities": ["reasoning", "function-call"], + "contextWindow": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + }, + "reasoning": { + "supportedEfforts": ["none"] + }, + "ownedBy": "deepseek" + }, + { + "id": "minimaxi_video-01-live2d", + "name": "minimaxi_video-01-live2d", + "capabilities": ["video-generation"], + "ownedBy": "minimax" + }, + { + "id": "minimaxi_s2v-01", + "name": "minimaxi_S2V-01", + "ownedBy": "minimax" + }, + { + "id": "zzkj", + "name": "WiseDiag-Z1", + "contextWindow": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 18.7 + }, + "output": { + "currency": "USD", + "perMillionTokens": 75.9 + } + }, + "ownedBy": "302ai" + }, + { + "id": "zzkj-lite", + "name": "WiseDiag-Z1 Lite", + "contextWindow": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3.85 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15.4 + } + }, + "ownedBy": "302ai" + }, + { + "id": "zzkj-genetics", + "name": "WiseDiag-Genetics", + "contextWindow": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 24.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 94.6 + } + }, + "ownedBy": "302ai" + }, + { + "id": "gpt-3-7-sonnet-20250219-cursor", + "name": "claude-3.7-sonnet", + "contextWindow": 200000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + }, + "ownedBy": "openai" + }, + { + "id": "minimaxi-image-01", + "name": "minimaxi-image-01", + "capabilities": ["image-generation"], + "ownedBy": "minimax" + }, + { + "id": "cogview-4", + "name": "cogview-4", + "ownedBy": "zhipu" + }, + { + "id": "cogview-4-250304", + "name": "cogview-4-250304", + "ownedBy": "zhipu" + }, + { + "id": "doubao-seededit", + "name": "Doubao-Seededit", + "capabilities": ["function-call"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 50 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "ernie-4-5-8k-preview", + "name": "ernie-4.5-8k-preview", + "contextWindow": 8000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.66 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.53 + } + }, + "ownedBy": "baidu" + }, + { + "id": "baidubce-irag-1-0", + "name": "baidubce-irag-1.0", + "ownedBy": "302ai" + }, + { + "id": "gpt-4o-image-generation", + "name": "gpt-4o-image-generation", + "capabilities": ["function-call", "image-generation"], + "contextWindow": 128000, + "ownedBy": "openai" + }, + { + "id": "high_aes_general_v30l_zt2i", + "name": "high_aes_general_v30l_zt2i", + "ownedBy": "302ai" + }, + { + "id": "general_v3-0", + "name": "general_v3.0", + "ownedBy": "302ai" + }, + { + "id": "glm-4-air-250414", + "name": "GLM-4-Air-250414", + "capabilities": ["function-call"], + "contextWindow": 32000, + "maxOutputTokens": 4000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.07 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.07 + } + }, + "ownedBy": "zhipu" + }, + { + "id": "glm-4-flash-250414", + "name": "GLM-4-Flash-250414", + "capabilities": ["function-call"], + "contextWindow": 128000, + "maxOutputTokens": 4000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "zhipu" + }, + { + "id": "glm-z1-rumination-0414", + "name": "THUDM/GLM-Z1-Rumination-32B-0414", + "contextWindow": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.07 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.07 + } + }, + "ownedBy": "zhipu" + }, + { + "id": "doubao-1-5-thinking-pro-vision-250415", + "name": "Doubao-1.5-Thinking-Pro-Vision", + "capabilities": ["reasoning", "image-recognition"], + "contextWindow": 96000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.3 + } + }, + "reasoning": { + "supportedEfforts": ["none", "high"] + }, + "ownedBy": "bytedance" + }, + { + "id": "firecrawl", + "name": "firecrawl", + "ownedBy": "302ai" + }, + { + "id": "doubao-1-5-ui-tars-250328", + "name": "Doubao-1.5-UI-Tars-250328", + "contextWindow": 32000, + "maxOutputTokens": 4000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.7 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "sensenova-v6-pro", + "name": "SenseNova-V6-Pro", + "contextWindow": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.55 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.43 + } + }, + "ownedBy": "sensenova" + }, + { + "id": "sensenova-v6-turbo", + "name": "SenseNova-V6-Turbo", + "contextWindow": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.275 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.715 + } + }, + "ownedBy": "sensenova" + }, + { + "id": "sensenova-v6-reasoner", + "name": "SenseNova-V6-Reasoner", + "capabilities": ["reasoning"], + "contextWindow": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.66 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.53 + } + }, + "ownedBy": "sensenova" + }, + { + "id": "ernie-x1-turbo-32k", + "name": "ERNIE-X1-Turbo-32k", + "contextWindow": 32000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.165 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.66 + } + }, + "ownedBy": "baidu" + }, + { + "id": "ernie-4-5-turbo-vl-32k", + "name": "ERNIE-4.5-Turbo-VL-32k", + "capabilities": ["image-recognition"], + "contextWindow": 8000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.495 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.43 + } + }, + "ownedBy": "baidu" + }, + { + "id": "higgsfield", + "name": "higgsfield", + "ownedBy": "302ai" + }, + { + "id": "higgsfield-shortads", + "name": "higgsfield-shortads", + "ownedBy": "302ai" + }, + { + "id": "deepseek-r1-huoshan-0528", + "name": "DeepSeek-R1-Huoshan-0528", + "capabilities": ["reasoning", "function-call"], + "contextWindow": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.3 + } + }, + "ownedBy": "deepseek" + }, + { + "id": "hedra-app", + "name": "hedra-app", + "ownedBy": "302ai" + }, + { + "id": "kling-v1-5", + "name": "kling-v1-5", + "capabilities": ["video-generation"], + "ownedBy": "kling" + }, + { + "id": "gemini-2-5-flash-deepsearch", + "name": "gemini-2.5-flash-deepsearch", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "contextWindow": 1000000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 25 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 24576 + } + }, + "ownedBy": "google" + }, + { + "id": "gemini-2-5-pro-deepsearch", + "name": "gemini-2.5-pro-deepsearch", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "contextWindow": 1000000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 35 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"], + "thinkingTokenLimits": { + "min": 128, + "max": 32768 + } + }, + "ownedBy": "google" + }, + { + "id": "community", + "name": "deepseek/deepseek-v3/community", + "contextWindow": 64000, + "maxOutputTokens": 4000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1 + } + }, + "ownedBy": "302ai" + }, + { + "id": "v1beta1-text-synthesize", + "name": "v1beta1-text-synthesize", + "ownedBy": "302ai" + }, + { + "id": "qwenlong-l1", + "name": "Tongyi-Zhiwen/QwenLong-L1-32B", + "capabilities": ["function-call"], + "contextWindow": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.14 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "t2v-01", + "name": "T2V-01", + "ownedBy": "302ai" + }, + { + "id": "i2v-01", + "name": "I2V-01", + "ownedBy": "302ai" + }, + { + "id": "i2v-01-live", + "name": "I2V-01-live", + "ownedBy": "302ai" + }, + { + "id": "i2v-01-director", + "name": "I2V-01-Director", + "ownedBy": "302ai" + }, + { + "id": "chanjing-video", + "name": "chanjing-video", + "capabilities": ["video-generation"], + "ownedBy": "302ai" + }, + { + "id": "chanjing-cicada1-0", + "name": "chanjing-cicada1.0", + "ownedBy": "302ai" + }, + { + "id": "chanjing-cicada3-0", + "name": "chanjing-cicada3.0", + "ownedBy": "302ai" + }, + { + "id": "kling-v2-1", + "name": "kling-v2-1", + "capabilities": ["video-generation"], + "ownedBy": "kling" + }, + { + "id": "gpt-4-sonnet-20250514-cursor", + "name": "gpt-4-sonnet-20250514-cursor", + "capabilities": ["function-call", "image-recognition"], + "contextWindow": 200000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + }, + "ownedBy": "openai" + }, + { + "id": "gpt-4-opus-4-20250514-cursor", + "name": "gpt-4-opus-4-20250514-cursor", + "capabilities": ["function-call", "image-recognition"], + "contextWindow": 200000, + "ownedBy": "openai" + }, + { + "id": "gpt-4-opus-20250514-cursor", + "name": "gpt-4-opus-20250514-cursor", + "capabilities": ["function-call", "image-recognition"], + "contextWindow": 200000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 75 + } + }, + "ownedBy": "openai" + }, + { + "id": "higgsfield-soul", + "name": "higgsfield-soul", + "ownedBy": "302ai" + }, + { + "id": "seededit_v3-0", + "name": "seededit_v3.0", + "ownedBy": "bytedance" + }, + { + "id": "glm-4-1v-thinking-flashx", + "name": "glm-4.1v-thinking-flashx", + "capabilities": ["reasoning", "function-call"], + "contextWindow": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "ownedBy": "zhipu" + }, + { + "id": "higgsfield-character", + "name": "higgsfield-character", + "ownedBy": "302ai" + }, + { + "id": "playai-tts", + "name": "playai-tts", + "ownedBy": "302ai" + }, + { + "id": "runway-aleph", + "name": "runway-aleph", + "capabilities": ["video-generation"], + "ownedBy": "runway" + }, + { + "id": "sonnet-4-20250514", + "name": "cc-sonnet-4-20250514", + "contextWindow": 200000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.9 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4.5 + } + }, + "ownedBy": "302ai" + }, + { + "id": "speech-01-turbo-240228", + "name": "speech-01-turbo-240228", + "ownedBy": "302ai" + }, + { + "id": "sensenova-v6-5-turbo", + "name": "SenseNova-V6-5-Turbo", + "contextWindow": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.473 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.419 + } + }, + "ownedBy": "sensenova" + }, + { + "id": "sensenova-v6-5-pro", + "name": "SenseNova-V6-5-Pro", + "contextWindow": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.946 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.838 + } + }, + "ownedBy": "sensenova" + }, + { + "id": "doubao-seed-1-6-flash-250715", + "name": "doubao-seed-1-6-flash-250715", + "capabilities": ["reasoning", "function-call", "image-recognition"], + "contextWindow": 256000, + "maxOutputTokens": 16000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.023 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.231 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "kimi-k2-250711", + "name": "kimi-k2-250711", + "capabilities": ["function-call"], + "contextWindow": 128000, + "maxOutputTokens": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.632 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.53 + } + }, + "ownedBy": "moonshot" + }, + { + "id": "hunyuan-turbos-20250716", + "name": "hunyuan-turbos-20250716", + "capabilities": ["function-call"], + "contextWindow": 32000, + "maxOutputTokens": 16000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.132 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.33 + } + }, + "ownedBy": "tencent" + }, + { + "id": "hunyuan-t1-20250711", + "name": "hunyuan-t1-20250711", + "capabilities": ["reasoning", "function-call"], + "contextWindow": 28000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.132 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.33 + } + }, + "ownedBy": "tencent" + }, + { + "id": "slides_glm_agent", + "name": "slides_glm_agent", + "ownedBy": "302ai" + }, + { + "id": "cogito-v2-preview-deepseek", + "name": "deepcogito/cogito-v2-preview-deepseek-671b", + "capabilities": ["function-call"], + "contextWindow": 64000, + "ownedBy": "cogito" + }, + { + "id": "cogito-v2-preview-llama", + "name": "deepcogito/cogito-v2-preview-llama-405B", + "contextWindow": 64000, + "ownedBy": "cogito" + }, + { + "id": "u1-pro", + "name": "U1-Pro", + "contextWindow": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + }, + "ownedBy": "302ai" + }, + { + "id": "u1", + "name": "U1", + "contextWindow": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "ownedBy": "302ai" + }, + { + "id": "opus-4-1-20250805", + "name": "cc-opus-4-1-20250805", + "contextWindow": 200000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 4.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 22.5 + } + }, + "ownedBy": "302ai" + }, + { + "id": "3-5-haiku-20241022", + "name": "cc-3-5-haiku-20241022", + "contextWindow": 200000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.24 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + }, + "ownedBy": "302ai" + }, + { + "id": "dop-lite", + "name": "dop-lite", + "ownedBy": "302ai" + }, + { + "id": "dop-preview", + "name": "dop-preview", + "ownedBy": "302ai" + }, + { + "id": "dop-turbo", + "name": "dop-turbo", + "ownedBy": "302ai" + }, + { + "id": "higgsfield-api-soul", + "name": "higgsfield-api-soul", + "ownedBy": "302ai" + }, + { + "id": "higgsfield-api-speak", + "name": "higgsfield-api-speak", + "ownedBy": "302ai" + }, + { + "id": "higgsfield-apps", + "name": "higgsfield-apps", + "ownedBy": "302ai" + }, + { + "id": "img", + "name": "sophnet2", + "ownedBy": "302ai" + }, + { + "id": "sonnet-4-5-20250929", + "name": "cc-sonnet-4-5-20250929", + "contextWindow": 200000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.9 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4.5 + } + }, + "ownedBy": "302ai" + }, + { + "id": "haiku-4-5-20251001", + "name": "cc-haiku-4-5-20251001", + "contextWindow": 200000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "ownedBy": "302ai" + }, + { + "id": "s1", + "name": "S1", + "contextWindow": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "ownedBy": "302ai" + }, + { + "id": "viduq2", + "name": "viduq2", + "capabilities": ["video-generation"], + "ownedBy": "vidu" + }, + { + "id": "doubao-seedance-1-0-pro-fast-251015", + "name": "doubao-seedance-1-0-pro-fast-251015", + "capabilities": ["function-call"], + "ownedBy": "bytedance" + }, + { + "id": "minimax-image-01", + "name": "minimax-image-01", + "capabilities": ["image-generation"], + "ownedBy": "minimax" + }, + { + "id": "minimaxi-image-01-live", + "name": "minimax-image-01", + "capabilities": ["image-generation"], + "ownedBy": "minimax" + }, + { + "id": "minimax-image-01-live", + "name": "minimax-image-01", + "capabilities": ["image-generation"], + "ownedBy": "minimax" + }, + { + "id": "voyage-context-3", + "name": "voyage-context-3", + "ownedBy": "voyage" + }, + { + "id": "rerank-2-5", + "name": "rerank-2.5", + "capabilities": ["rerank"], + "ownedBy": "cohere" + }, + { + "id": "rerank-2-5-lite", + "name": "rerank-2.5-lite", + "capabilities": ["rerank"], + "ownedBy": "cohere" + }, + { + "id": "music-1-5", + "name": "music-1.5", + "ownedBy": "302ai" + }, + { + "id": "gpt-5-1-plus", + "name": "gpt-5.1-plus", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "contextWindow": 400000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "gpt-5-1-thinking-plus", + "name": "gpt-5.1-thinking-plus", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "contextWindow": 400000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "ernie-5-0-thinking-latest", + "name": "ernie-5.0-thinking-latest", + "capabilities": ["reasoning"], + "contextWindow": 128000, + "maxOutputTokens": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.946 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.573 + } + }, + "ownedBy": "baidu" + }, + { + "id": "opus-4-5-20251101", + "name": "cc-opus-4-5-20251101", + "contextWindow": 200000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 7.5 + } + }, + "ownedBy": "302ai" + }, + { + "id": "glm-4-6-flash", + "name": "GLM-4.6V-Flash", + "capabilities": ["function-call", "reasoning"], + "contextWindow": 64000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "ownedBy": "zhipu" + }, + { + "id": "glm-asr-2512", + "name": "glm-asr-2512", + "ownedBy": "zhipu" + }, + { + "id": "glm-tts-clone", + "name": "glm-tts-clone", + "capabilities": ["audio-generation"], + "ownedBy": "zhipu" + }, + { + "id": "glm-tts", + "name": "glm-tts", + "ownedBy": "zhipu" + }, + { + "id": "gpt-4o-mini-tts-2025-12-15", + "name": "gpt-4o-mini-tts-2025-12-15", + "capabilities": ["audio-generation"], + "ownedBy": "openai" + }, + { + "id": "glm-for-coding", + "name": "glm-for-coding", + "capabilities": ["reasoning"], + "contextWindow": 200000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.18 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.78 + } + }, + "ownedBy": "zhipu" + }, + { + "id": "higgsfield-soul-standard", + "name": "higgsfield-soul-standard", + "ownedBy": "302ai" + }, + { + "id": "higgsfield-dop-standard", + "name": "higgsfield-dop-standard", + "ownedBy": "302ai" + }, + { + "id": "glm-4-7-preview", + "name": "glm-4.7-preview", + "capabilities": ["function-call", "reasoning"], + "contextWindow": 200000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "ownedBy": "zhipu" + }, + { + "id": "glm-4-7-coding-preview", + "name": "glm-4.7-coding-preview", + "capabilities": ["function-call", "reasoning"], + "contextWindow": 200000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "ownedBy": "zhipu" + }, + { + "id": "minimax-m2-1-highspeed", + "name": "MiniMax-M2.1-highspeed", + "capabilities": ["reasoning", "function-call"], + "contextWindow": 1000000, + "maxOutputTokens": 80000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4.8 + } + }, + "reasoning": { + "supportedEfforts": [], + "interleaved": true + }, + "ownedBy": "minimax" + }, + { + "id": "doubao-seedance-1-5-pro-251215", + "name": "doubao-seedance-1-5-pro-251215", + "capabilities": ["function-call"], + "ownedBy": "bytedance" + }, + { + "id": "qwq-plus-latest", + "name": "QwQ-Plus-Latest", + "capabilities": ["reasoning"], + "contextWindow": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.23 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.58 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwq-plus-2025-03-05", + "name": "QwQ-Plus-2025-03-05", + "capabilities": ["reasoning"], + "contextWindow": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.23 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.58 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qvq-max-latest", + "name": "QvQ-Max-Latest", + "capabilities": ["reasoning"], + "contextWindow": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4.58 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qvq-max-2025-05-15", + "name": "QvQ-Max-2025-05-15", + "capabilities": ["reasoning"], + "contextWindow": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4.58 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qvq-plus", + "name": "QvQ-Plus", + "capabilities": ["reasoning"], + "contextWindow": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.29 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.72 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qvq-plus-latest", + "name": "QvQ-Plus-Latest", + "capabilities": ["reasoning"], + "contextWindow": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.29 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.72 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qvq-plus-2025-05-15", + "name": "QvQ-Plus-2025-05-15", + "capabilities": ["reasoning"], + "contextWindow": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.29 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.72 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen2-5-instruct-1m", + "name": "Qwen2.5-14B", + "capabilities": ["function-call"], + "contextWindow": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.143 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.43 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-coder-plus-2025-09-23", + "name": "qwen3-coder-plus-2025-09-23", + "capabilities": ["function-call"], + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.572 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.29 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-mt-lite", + "name": "qwen-mt-lite", + "contextWindow": 4096, + "maxOutputTokens": 2048, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.086 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.23 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-plus-2025-12-01", + "name": "Qwen-Plus-2025-12-01", + "capabilities": ["function-call", "reasoning"], + "contextWindow": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.12 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-plus-2025-09-11", + "name": "Qwen-Plus-2025-09-11", + "capabilities": ["function-call", "reasoning"], + "contextWindow": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.12 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-plus-2025-01-25", + "name": "Qwen-Plus-2025-01-25", + "capabilities": ["function-call", "reasoning"], + "contextWindow": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.12 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.286 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-plus-2025-01-12", + "name": "Qwen-Plus-2025-01-12", + "capabilities": ["function-call", "reasoning"], + "contextWindow": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.12 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.286 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-max-2024-04-28", + "name": "Qwen-Max-2024-04-28", + "capabilities": ["function-call"], + "contextWindow": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5.72 + }, + "output": { + "currency": "USD", + "perMillionTokens": 17.143 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-max-2024-04-03", + "name": "Qwen-Max-2024-04-03", + "capabilities": ["function-call"], + "contextWindow": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5.72 + }, + "output": { + "currency": "USD", + "perMillionTokens": 17.143 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-long-latest", + "name": "Qwen-Long-Latest", + "capabilities": ["function-call", "file-input"], + "contextWindow": 1000000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.072 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.286 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-long-2025-01-25", + "name": "Qwen-Long-2025-01-25", + "capabilities": ["function-call", "file-input"], + "contextWindow": 1000000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.072 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.286 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-vl-plus-2025-08-15", + "name": "Qwen-VL-Plus-2025-08-15", + "capabilities": ["function-call", "image-recognition"], + "contextWindow": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.12 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-vl-plus-2025-07-10", + "name": "Qwen-VL-Plus-2025-07-10", + "capabilities": ["function-call", "image-recognition"], + "contextWindow": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.022 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.22 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-vl-plus-2025-05-07", + "name": "Qwen-VL-Plus-2025-05-07", + "capabilities": ["function-call", "image-recognition"], + "contextWindow": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.22 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.65 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-vl-plus-2025-01-25", + "name": "Qwen-VL-Plus-2025-01-25", + "capabilities": ["function-call", "image-recognition"], + "contextWindow": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.22 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.65 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-vl-plus-2025-01-02", + "name": "Qwen-VL-Plus-2025-01-02", + "capabilities": ["function-call", "image-recognition"], + "contextWindow": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.22 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.65 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-turbo-2025-07-15", + "name": "Qwen-Turbo-2025-07-15", + "capabilities": ["function-call", "reasoning"], + "contextWindow": 1000000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.05 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.43 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-vl-max-2025-08-13", + "name": "Qwen-VL-Max-2025-08-13", + "capabilities": ["function-call", "image-recognition"], + "contextWindow": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.23 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.58 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-vl-max-2025-04-08", + "name": "Qwen-VL-Max-2025-04-08", + "capabilities": ["function-call", "image-recognition"], + "contextWindow": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.43 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.29 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-vl-max-2025-04-02", + "name": "Qwen-VL-Max-2025-04-02", + "capabilities": ["function-call", "image-recognition"], + "contextWindow": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.43 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.29 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-vl-max-2024-12-30", + "name": "Qwen-VL-Max-2024-12-30", + "capabilities": ["function-call", "image-recognition"], + "contextWindow": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.43 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.29 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-vl-ocr-2025-11-20", + "name": "Qwen-VL-OCR-2025-11-20", + "capabilities": ["function-call", "image-recognition", "file-input"], + "contextWindow": 38000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.043 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.072 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-vl-ocr-2025-08-28", + "name": "Qwen-VL-OCR-2025-08-28", + "capabilities": ["function-call", "image-recognition", "file-input"], + "contextWindow": 34000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.72 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.72 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-vl-ocr-2025-04-13", + "name": "Qwen-VL-OCR-2025-04-13", + "capabilities": ["function-call", "image-recognition", "file-input"], + "contextWindow": 34000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.72 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.72 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "glm-image", + "name": "glm-image", + "capabilities": ["image-generation"], + "ownedBy": "zhipu" + }, + { + "id": "s2", + "name": "S2", + "contextWindow": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "ownedBy": "302ai" + }, + { + "id": "u2", + "name": "U2", + "contextWindow": 32000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "ownedBy": "302ai" + }, + { + "id": "minimax-for-coding", + "name": "minimax-for-coding", + "capabilities": ["reasoning"], + "contextWindow": 200000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.36 + } + }, + "ownedBy": "minimax" + }, + { + "id": "glm-ocr", + "name": "glm-ocr", + "capabilities": ["file-input"], + "contextWindow": 32000, + "ownedBy": "zhipu" + }, + { + "id": "opus-4-6", + "name": "cc-opus-4-6", + "contextWindow": 200000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 7.5 + } + }, + "ownedBy": "302ai" + }, + { + "id": "sonnet-4-6", + "name": "cc-sonnet-4-6", + "contextWindow": 1000000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.9 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4.5 + } + }, + "ownedBy": "302ai" + }, + { + "id": "gemma3", + "name": "gemma3", + "capabilities": ["image-recognition"], + "ownedBy": "google" + }, + { + "id": "speech_paraformer-large", + "name": "speech_paraformer-large", + "ownedBy": "lanyun" + }, + { + "id": "kimi-k2-thinking-251104", + "name": "kimi-k2-thinking-251104", + "capabilities": ["reasoning", "function-call"], + "reasoning": { + "supportedEfforts": [] + }, + "ownedBy": "moonshot" + }, + { + "id": "ph-data-image-1-0", + "name": "ph-data-image-1-0", + "capabilities": ["image-generation"], + "ownedBy": "ph8" + }, + { + "id": "ph-ppt-image-1-0", + "name": "ph-ppt-image-1-0", + "capabilities": ["image-generation"], + "ownedBy": "ph8" + }, + { + "id": "longcat-flash-lite", + "name": "longcat-flash-lite", + "ownedBy": "meituan" + }, + { + "id": "gpt-realtime-mini-2025-12-15", + "name": "gpt-realtime-mini-2025-12-15", + "ownedBy": "openai" + }, + { + "id": "gpt-audio-mini-2025-12-15", + "name": "gpt-audio-mini-2025-12-15", + "ownedBy": "openai" + }, + { + "id": "chatgpt-image-latest", + "name": "chatgpt-image-latest", + "capabilities": ["image-generation"], + "ownedBy": "openai" + }, + { + "id": "gpt-5-2-pro-2025-12-11", + "name": "gpt-5-2-pro-2025-12-11", + "capabilities": ["function-call", "image-recognition", "web-search", "reasoning"], + "reasoning": { + "supportedEfforts": ["medium", "high", "max"] + }, + "ownedBy": "openai" + }, + { + "id": "gpt-4o-mini-transcribe-2025-12-15", + "name": "gpt-4o-mini-transcribe-2025-12-15", + "capabilities": ["function-call", "image-recognition", "web-search"], + "ownedBy": "openai" + }, + { + "id": "gemini-pro-latest", + "name": "Gemini Pro Latest", + "capabilities": ["function-call", "reasoning"], + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"], + "thinkingTokenLimits": { + "min": 128, + "max": 32768 + } + }, + "ownedBy": "google" + }, + { + "id": "nano-banana-pro-preview", + "name": "Nano Banana Pro", + "capabilities": ["reasoning"], + "contextWindow": 131072, + "maxOutputTokens": 32768, + "ownedBy": "google" + }, + { + "id": "gemini-robotics-er-1-5-preview", + "name": "Gemini Robotics-ER 1.5 Preview", + "capabilities": ["reasoning"], + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "ownedBy": "google" + }, + { + "id": "deep-research-pro-preview-12-2025", + "name": "Deep Research Pro Preview (Dec-12-2025)", + "capabilities": ["reasoning"], + "contextWindow": 131072, + "maxOutputTokens": 65536, + "ownedBy": "gemini" + }, + { + "id": "aqa", + "name": "Model that performs Attributed Question Answering.", + "contextWindow": 7168, + "maxOutputTokens": 1024, + "ownedBy": "gemini" + }, + { + "id": "gemini-2-5-flash-native-audio-latest", + "name": "Gemini 2.5 Flash Native Audio Latest", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 24576 + } + }, + "ownedBy": "google" + }, + { + "id": "gemini-2-5-flash-native-audio-preview-09-2025", + "name": "Gemini 2.5 Flash Native Audio Preview 09-2025", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 24576 + } + }, + "ownedBy": "google" + }, + { + "id": "gemini-2-5-flash-native-audio-preview-12-2025", + "name": "Gemini 2.5 Flash Native Audio Preview 12-2025", + "capabilities": ["reasoning", "function-call", "image-recognition", "web-search"], + "contextWindow": 131072, + "maxOutputTokens": 8192, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 24576 + } + }, + "ownedBy": "google" + }, + { + "id": "moonshot-v1-auto", + "name": "moonshot-v1-auto", + "ownedBy": "moonshot" + }, + { + "id": "qwen3-asr-flash-realtime-2026-02-10", + "name": "qwen3-asr-flash-realtime-2026-02-10", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-tts-vd-2026-01-26", + "name": "qwen3-tts-vd-2026-01-26", + "capabilities": ["audio-generation", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-tts-instruct-flash-2026-01-26", + "name": "qwen3-tts-instruct-flash-2026-01-26", + "capabilities": ["audio-generation"], + "ownedBy": "alibaba" + }, + { + "id": "qwen3-tts-instruct-flash", + "name": "qwen3-tts-instruct-flash", + "capabilities": ["audio-generation"], + "ownedBy": "alibaba" + }, + { + "id": "qwen3-tts-vc-2026-01-22", + "name": "qwen3-tts-vc-2026-01-22", + "capabilities": ["audio-generation", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-tts-instruct-flash-realtime-2026-01-22", + "name": "qwen3-tts-instruct-flash-realtime-2026-01-22", + "capabilities": ["audio-generation"], + "ownedBy": "alibaba" + }, + { + "id": "qwen3-tts-instruct-flash-realtime", + "name": "qwen3-tts-instruct-flash-realtime", + "capabilities": ["audio-generation"], + "ownedBy": "alibaba" + }, + { + "id": "qwen3-tts-vd-realtime-2026-01-15", + "name": "qwen3-tts-vd-realtime-2026-01-15", + "capabilities": ["audio-generation", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "tongyi-xiaomi-analysis-flash", + "name": "tongyi-xiaomi-analysis-flash", + "ownedBy": "dashscope" + }, + { + "id": "tongyi-xiaomi-analysis-pro", + "name": "tongyi-xiaomi-analysis-pro", + "ownedBy": "dashscope" + }, + { + "id": "qwen3-tts-vc-realtime-2026-01-15", + "name": "qwen3-tts-vc-realtime-2026-01-15", + "capabilities": ["audio-generation", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-image-edit-max-2026-01-16", + "name": "qwen-image-edit-max-2026-01-16", + "capabilities": ["function-call", "image-generation"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-image-edit-max", + "name": "qwen-image-edit-max", + "capabilities": ["function-call", "image-generation"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-image-plus-2026-01-09", + "name": "qwen-image-plus-2026-01-09", + "capabilities": ["function-call", "image-generation"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-flash-character", + "name": "qwen-flash-character", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-image-max-2025-12-30", + "name": "qwen-image-max-2025-12-30", + "capabilities": ["function-call", "image-generation"], + "ownedBy": "alibaba" + }, + { + "id": "z-image-turbo", + "name": "z-image-turbo", + "capabilities": ["image-generation"], + "ownedBy": "dashscope" + }, + { + "id": "qwen3-vl-plus-2025-12-19", + "name": "qwen3-vl-plus-2025-12-19", + "capabilities": ["function-call", "image-recognition", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-tts-vd-realtime-2025-12-16", + "name": "qwen3-tts-vd-realtime-2025-12-16", + "capabilities": ["audio-generation", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-image-edit-plus-2025-12-15", + "name": "qwen-image-edit-plus-2025-12-15", + "capabilities": ["function-call", "image-generation"], + "ownedBy": "alibaba" + }, + { + "id": "qwen3-omni-flash-2025-12-01", + "name": "qwen3-omni-flash-2025-12-01", + "capabilities": ["function-call", "image-recognition", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-omni-flash-realtime-2025-12-01", + "name": "qwen3-omni-flash-realtime-2025-12-01", + "capabilities": ["function-call", "image-recognition", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-livetranslate-flash-2025-12-01", + "name": "qwen3-livetranslate-flash-2025-12-01", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-livetranslate-flash", + "name": "qwen3-livetranslate-flash", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-tts-vc-realtime-2025-11-27", + "name": "qwen3-tts-vc-realtime-2025-11-27", + "capabilities": ["audio-generation", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-tts-flash-2025-11-27", + "name": "qwen3-tts-flash-2025-11-27", + "capabilities": ["audio-generation", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-tts-flash-realtime-2025-11-27", + "name": "qwen3-tts-flash-realtime-2025-11-27", + "capabilities": ["audio-generation", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-plus-2025-11-05", + "name": "qwen-plus-2025-11-05", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 81920 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-image-edit-plus-2025-10-30", + "name": "qwen-image-edit-plus-2025-10-30", + "capabilities": ["function-call", "image-generation"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-deep-search-planning", + "name": "qwen-deep-search-planning", + "capabilities": ["function-call", "web-search"], + "ownedBy": "alibaba" + }, + { + "id": "qwen3-asr-flash-realtime-2025-10-27", + "name": "qwen3-asr-flash-realtime-2025-10-27", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-asr-flash-realtime", + "name": "qwen3-asr-flash-realtime", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-vl-flash-2025-10-15", + "name": "qwen3-vl-flash-2025-10-15", + "capabilities": ["function-call", "image-recognition", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-tts-flash", + "name": "qwen3-tts-flash", + "capabilities": ["audio-generation", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-tts-flash-2025-09-18", + "name": "qwen3-tts-flash-2025-09-18", + "capabilities": ["audio-generation", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-tts-flash-realtime-2025-09-18", + "name": "qwen3-tts-flash-realtime-2025-09-18", + "capabilities": ["audio-generation", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-tts-flash-realtime", + "name": "qwen3-tts-flash-realtime", + "capabilities": ["audio-generation", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-omni-flash-2025-09-15", + "name": "qwen3-omni-flash-2025-09-15", + "capabilities": ["function-call", "image-recognition", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-omni-flash-realtime-2025-09-15", + "name": "qwen3-omni-flash-realtime-2025-09-15", + "capabilities": ["function-call", "image-recognition", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-s2s-flash-realtime-2025-09-22", + "name": "qwen3-s2s-flash-realtime-2025-09-22", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-livetranslate-flash-realtime-2025-09-22", + "name": "qwen3-livetranslate-flash-realtime-2025-09-22", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-vl-plus-2025-09-23", + "name": "qwen3-vl-plus-2025-09-23", + "capabilities": ["function-call", "image-recognition", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen-tts-2025-05-22", + "name": "qwen-tts-2025-05-22", + "capabilities": ["audio-generation"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-turbo-0919", + "name": "qwen-turbo-0919", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 0, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "codeqwen1-5-chat", + "name": "codeqwen1-5-chat", + "ownedBy": "dashscope" + }, + { + "id": "qwen-max-1201", + "name": "qwen-max-1201", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "qwen-max-0107", + "name": "qwen-max-0107", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "step-1x", + "name": "step-1x", + "ownedBy": "stepfun" + }, + { + "id": "step-2-16k-202411", + "name": "step-2-16k-202411", + "ownedBy": "stepfun" + }, + { + "id": "step-asr", + "name": "step-asr", + "ownedBy": "stepfun" + }, + { + "id": "step-1o-audio", + "name": "step-1o-audio", + "ownedBy": "stepfun" + }, + { + "id": "step-tts-vivid", + "name": "step-tts-vivid", + "capabilities": ["audio-generation"], + "ownedBy": "stepfun" + }, + { + "id": "step-1x-edit", + "name": "step-1x-edit", + "ownedBy": "stepfun" + }, + { + "id": "step-1f-audio", + "name": "step-1f-audio", + "ownedBy": "stepfun" + }, + { + "id": "step-2x-large", + "name": "step-2x-large", + "ownedBy": "stepfun" + }, + { + "id": "step-audio-2", + "name": "step-audio-2", + "ownedBy": "stepfun" + }, + { + "id": "step-audio-2-mini", + "name": "step-audio-2-mini", + "ownedBy": "stepfun" + }, + { + "id": "step-tts-2", + "name": "step-tts-2", + "capabilities": ["audio-generation"], + "ownedBy": "stepfun" + }, + { + "id": "dr-search-api", + "name": "dr-search-api", + "capabilities": ["web-search"], + "ownedBy": "stepfun" + }, + { + "id": "step-gui", + "name": "step-gui", + "ownedBy": "stepfun" + }, + { + "id": "step-3-agent-lite", + "name": "step-3-agent-lite", + "capabilities": ["reasoning"], + "ownedBy": "stepfun" + }, + { + "id": "megrez-instruct", + "name": "megrez-instruct", + "ownedBy": "infini" + }, + { + "id": "seedance-1-0", + "name": "seedance-1-0", + "ownedBy": "bytedance" + }, + { + "id": "dracarys-llama-3-1-instruct", + "name": "dracarys-llama-3-1-instruct", + "ownedBy": "nvidia" + }, + { + "id": "fuyu", + "name": "fuyu", + "ownedBy": "nvidia" + }, + { + "id": "jamba-1-5-large-instruct", + "name": "jamba-1-5-large-instruct", + "ownedBy": "ai21" + }, + { + "id": "jamba-1-5-mini-instruct", + "name": "jamba-1-5-mini-instruct", + "ownedBy": "ai21" + }, + { + "id": "sea-lion-instruct", + "name": "sea-lion-instruct", + "ownedBy": "nvidia" + }, + { + "id": "baichuan2-chat", + "name": "baichuan2-chat", + "ownedBy": "baichuan" + }, + { + "id": "starcoder2", + "name": "starcoder2", + "ownedBy": "nvidia" + }, + { + "id": "dbrx-instruct", + "name": "dbrx-instruct", + "ownedBy": "nvidia" + }, + { + "id": "deplot", + "name": "deplot", + "ownedBy": "nvidia" + }, + { + "id": "gemma", + "name": "gemma", + "ownedBy": "google" + }, + { + "id": "paligemma", + "name": "paligemma", + "ownedBy": "nvidia" + }, + { + "id": "recurrentgemma", + "name": "recurrentgemma", + "ownedBy": "nvidia" + }, + { + "id": "shieldgemma", + "name": "shieldgemma", + "ownedBy": "nvidia" + }, + { + "id": "gemma-2-cpt-sahabatai-instruct", + "name": "gemma-2-cpt-sahabatai-instruct", + "ownedBy": "google" + }, + { + "id": "granite-3-0-a800m-instruct", + "name": "granite-3-0-a800m-instruct", + "ownedBy": "nvidia" + }, + { + "id": "granite-3-0-instruct", + "name": "granite-3-0-instruct", + "ownedBy": "nvidia" + }, + { + "id": "granite-3-3-instruct", + "name": "granite-3-3-instruct", + "ownedBy": "nvidia" + }, + { + "id": "granite-code-instruct", + "name": "granite-code-instruct", + "ownedBy": "nvidia" + }, + { + "id": "granite-guardian-3-0", + "name": "granite-guardian-3-0", + "ownedBy": "nvidia" + }, + { + "id": "colosseum_355b_instruct_16k", + "name": "colosseum_355b_instruct_16k", + "ownedBy": "nvidia" + }, + { + "id": "italia_10b_instruct_16k", + "name": "italia_10b_instruct_16k", + "ownedBy": "nvidia" + }, + { + "id": "llama-3-1-swallow-instruct-v0-1", + "name": "llama-3-1-swallow-instruct-v0-1", + "ownedBy": "meta" + }, + { + "id": "marin-instruct", + "name": "marin-instruct", + "ownedBy": "nvidia" + }, + { + "id": "breeze-instruct", + "name": "breeze-instruct", + "ownedBy": "nvidia" + }, + { + "id": "llama2", + "name": "llama2", + "ownedBy": "meta" + }, + { + "id": "kosmos-2", + "name": "kosmos-2", + "ownedBy": "nvidia" + }, + { + "id": "phi-4-mini-flash", + "name": "phi-4-mini-flash", + "ownedBy": "microsoft" + }, + { + "id": "mathstral-v0-1", + "name": "mathstral-v0-1", + "ownedBy": "nvidia" + }, + { + "id": "mistral-medium-3-instruct", + "name": "mistral-medium-3-instruct", + "ownedBy": "mistral" + }, + { + "id": "mistral-nemotron", + "name": "mistral-nemotron", + "ownedBy": "mistral" + }, + { + "id": "mistral-small-instruct", + "name": "mistral-small-instruct", + "ownedBy": "mistral" + }, + { + "id": "mixtral-8x22b-instruct-v0-1", + "name": "mixtral-8x22b-instruct-v0-1", + "ownedBy": "mistral" + }, + { + "id": "mixtral-8x22b-v0-1", + "name": "mixtral-8x22b-v0-1", + "ownedBy": "mistral" + }, + { + "id": "cosmos-reason2", + "name": "cosmos-reason2", + "ownedBy": "nvidia" + }, + { + "id": "embed-qa-4", + "name": "embed-qa-4", + "capabilities": ["embedding"], + "ownedBy": "cohere" + }, + { + "id": "llama-3-1-nemoguard-content-safety", + "name": "llama-3-1-nemoguard-content-safety", + "ownedBy": "meta" + }, + { + "id": "llama-3-1-nemoguard-topic-control", + "name": "llama-3-1-nemoguard-topic-control", + "ownedBy": "meta" + }, + { + "id": "llama-3-1-nemotron-reward", + "name": "llama-3-1-nemotron-reward", + "ownedBy": "meta" + }, + { + "id": "llama-3-1-nemotron-nano-v1-1", + "name": "llama-3-1-nemotron-nano-v1-1", + "ownedBy": "meta" + }, + { + "id": "llama-3-1-nemotron-nano-v1", + "name": "llama-3-1-nemotron-nano-v1", + "ownedBy": "meta" + }, + { + "id": "llama-3-1-nemotron-nano-vl-v1", + "name": "llama-3-1-nemotron-nano-vl-v1", + "capabilities": ["image-recognition"], + "ownedBy": "meta" + }, + { + "id": "llama-3-1-nemotron-safety-guard-v3", + "name": "llama-3-1-nemotron-safety-guard-v3", + "ownedBy": "meta" + }, + { + "id": "llama-3-2-nemoretriever-vlm-embed-v1", + "name": "llama-3-2-nemoretriever-vlm-embed-v1", + "capabilities": ["embedding"], + "ownedBy": "meta" + }, + { + "id": "llama-3-2-nemoretriever-300m-embed-v1", + "name": "llama-3-2-nemoretriever-300m-embed-v1", + "capabilities": ["embedding"], + "ownedBy": "meta" + }, + { + "id": "llama-3-2-nemoretriever-300m-embed-v2", + "name": "llama-3-2-nemoretriever-300m-embed-v2", + "capabilities": ["embedding"], + "ownedBy": "meta" + }, + { + "id": "llama-3-2-nv-embedqa-v1", + "name": "llama-3-2-nv-embedqa-v1", + "capabilities": ["embedding"], + "ownedBy": "meta" + }, + { + "id": "llama-3-2-nv-embedqa-v2", + "name": "llama-3-2-nv-embedqa-v2", + "capabilities": ["embedding"], + "ownedBy": "meta" + }, + { + "id": "llama-nemotron-embed-vl-v2", + "name": "llama-nemotron-embed-vl-v2", + "capabilities": ["embedding"], + "ownedBy": "meta" + }, + { + "id": "mistral-nemo-minitron-8k-instruct", + "name": "mistral-nemo-minitron-8k-instruct", + "ownedBy": "mistral" + }, + { + "id": "mistral-nemo-minitron-base", + "name": "mistral-nemo-minitron-base", + "ownedBy": "mistral" + }, + { + "id": "nemoretriever-parse", + "name": "nemoretriever-parse", + "ownedBy": "nvidia" + }, + { + "id": "nemotron-4-reward", + "name": "nemotron-4-reward", + "ownedBy": "nvidia" + }, + { + "id": "nemotron-4-mini-hindi-instruct", + "name": "nemotron-4-mini-hindi-instruct", + "ownedBy": "nvidia" + }, + { + "id": "nemotron-content-safety-reasoning", + "name": "nemotron-content-safety-reasoning", + "capabilities": ["reasoning"], + "ownedBy": "nvidia" + }, + { + "id": "nemotron-mini-instruct", + "name": "nemotron-mini-instruct", + "ownedBy": "nvidia" + }, + { + "id": "nemotron-nano-3-a3b", + "name": "nemotron-nano-3-a3b", + "ownedBy": "nvidia" + }, + { + "id": "nemotron-parse", + "name": "nemotron-parse", + "ownedBy": "nvidia" + }, + { + "id": "neva", + "name": "neva", + "ownedBy": "nvidia" + }, + { + "id": "nv-embed-v1", + "name": "nv-embed-v1", + "capabilities": ["embedding"], + "ownedBy": "nvidia" + }, + { + "id": "nv-embedcode-v1", + "name": "nv-embedcode-v1", + "capabilities": ["embedding"], + "ownedBy": "nvidia" + }, + { + "id": "nv-embedqa-e5-v5", + "name": "nv-embedqa-e5-v5", + "capabilities": ["embedding"], + "ownedBy": "nvidia" + }, + { + "id": "nv-embedqa-mistral-v2", + "name": "nv-embedqa-mistral-v2", + "capabilities": ["embedding"], + "ownedBy": "nvidia" + }, + { + "id": "nvclip", + "name": "nvclip", + "ownedBy": "nvidia" + }, + { + "id": "riva-translate-instruct", + "name": "riva-translate-instruct", + "ownedBy": "nvidia" + }, + { + "id": "riva-translate-instruct-v1-1", + "name": "riva-translate-instruct-v1-1", + "ownedBy": "nvidia" + }, + { + "id": "streampetr", + "name": "streampetr", + "ownedBy": "nvidia" + }, + { + "id": "usdcode-llama-3-1-instruct", + "name": "usdcode-llama-3-1-instruct", + "ownedBy": "nvidia" + }, + { + "id": "vila", + "name": "vila", + "ownedBy": "nvidia" + }, + { + "id": "teuken-instruct-commercial-v0-4", + "name": "teuken-instruct-commercial-v0-4", + "ownedBy": "nvidia" + }, + { + "id": "rakutenai-chat", + "name": "rakutenai-chat", + "ownedBy": "nvidia" + }, + { + "id": "rakutenai-instruct", + "name": "rakutenai-instruct", + "ownedBy": "nvidia" + }, + { + "id": "arctic-embed-l", + "name": "arctic-embed-l", + "capabilities": ["embedding"], + "ownedBy": "nvidia" + }, + { + "id": "bielik-v2-3-instruct", + "name": "bielik-v2-3-instruct", + "ownedBy": "nvidia" + }, + { + "id": "stockmark-2-instruct", + "name": "stockmark-2-instruct", + "ownedBy": "nvidia" + }, + { + "id": "chatglm3", + "name": "chatglm3", + "ownedBy": "nvidia" + }, + { + "id": "falcon3-instruct", + "name": "falcon3-instruct", + "ownedBy": "nvidia" + }, + { + "id": "llama-3-swallow-instruct-v0-1", + "name": "llama-3-swallow-instruct-v0-1", + "ownedBy": "meta" + }, + { + "id": "solar-instruct", + "name": "solar-instruct", + "ownedBy": "upstageai" + }, + { + "id": "eurollm-instruct", + "name": "eurollm-instruct", + "ownedBy": "nvidia" + }, + { + "id": "palmyra-creative", + "name": "palmyra-creative", + "ownedBy": "google" + }, + { + "id": "palmyra-fin-32k", + "name": "palmyra-fin-32k", + "ownedBy": "google" + }, + { + "id": "palmyra-med", + "name": "palmyra-med", + "ownedBy": "google" + }, + { + "id": "palmyra-med-32k", + "name": "palmyra-med-32k", + "ownedBy": "google" + }, + { + "id": "llama-3-taiwan-instruct", + "name": "llama-3-taiwan-instruct", + "capabilities": ["video-generation"], + "ownedBy": "meta" + }, + { + "id": "zamba2-instruct", + "name": "zamba2-instruct", + "ownedBy": "nvidia" + }, + { + "id": "grok-2-image-1212", + "name": "grok-2-image-1212", + "capabilities": ["image-generation", "web-search"], + "ownedBy": "xai" + }, + { + "id": "grok-imagine-video", + "name": "grok-imagine-video", + "capabilities": ["video-generation", "web-search"], + "ownedBy": "xai" + }, + { + "id": "gpt-oss-turbo", + "name": "gpt-oss-turbo", + "capabilities": ["reasoning"], + "contextWindow": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "reasoning": { + "supportedEfforts": ["low", "medium", "high"] + }, + "ownedBy": "openai" + }, + { + "id": "stablediffusion", + "name": "stablediffusion", + "ownedBy": "hyperbolic" + }, + { + "id": "tts", + "name": "tts", + "ownedBy": "hyperbolic" + }, + { + "id": "mistral-vibe-cli-with-tools", + "name": "mistral-medium-2508", + "contextWindow": 131072, + "ownedBy": "mistral" + }, + { + "id": "open-mistral-nemo", + "name": "open-mistral-nemo", + "contextWindow": 131072, + "ownedBy": "mistral" + }, + { + "id": "open-mistral-nemo-2407", + "name": "open-mistral-nemo", + "contextWindow": 131072, + "ownedBy": "mistral" + }, + { + "id": "mistral-tiny-2407", + "name": "open-mistral-nemo", + "contextWindow": 131072, + "ownedBy": "mistral" + }, + { + "id": "mistral-tiny-latest", + "name": "open-mistral-nemo", + "contextWindow": 131072, + "ownedBy": "mistral" + }, + { + "id": "mistral-large-pixtral-2411", + "name": "pixtral-large-2411", + "capabilities": ["function-call", "image-recognition"], + "contextWindow": 131072, + "ownedBy": "mistral" + }, + { + "id": "mistral-vibe-cli-latest", + "name": "devstral-2512", + "contextWindow": 262144, + "ownedBy": "mistral" + }, + { + "id": "devstral-latest", + "name": "devstral-2512", + "contextWindow": 262144, + "ownedBy": "mistral" + }, + { + "id": "devstral-small-latest", + "name": "labs-devstral-small-2512", + "contextWindow": 262144, + "ownedBy": "mistral" + }, + { + "id": "labs-mistral-small-creative", + "name": "labs-mistral-small-creative", + "contextWindow": 32768, + "ownedBy": "mistral" + }, + { + "id": "magistral-medium-2509", + "name": "magistral-medium-2509", + "capabilities": ["reasoning"], + "contextWindow": 131072, + "ownedBy": "mistral" + }, + { + "id": "magistral-small-latest", + "name": "magistral-small-2509", + "capabilities": ["reasoning"], + "contextWindow": 131072, + "ownedBy": "mistral" + }, + { + "id": "voxtral-mini-2507", + "name": "voxtral-mini-2507", + "contextWindow": 32768, + "ownedBy": "mistral" + }, + { + "id": "voxtral-mini-latest", + "name": "voxtral-mini-2507", + "contextWindow": 32768, + "ownedBy": "mistral" + }, + { + "id": "voxtral-small-latest", + "name": "voxtral-small-2507", + "contextWindow": 32768, + "ownedBy": "mistral" + }, + { + "id": "mistral-moderation-2411", + "name": "mistral-moderation-2411", + "contextWindow": 8192, + "ownedBy": "mistral" + }, + { + "id": "mistral-moderation-latest", + "name": "mistral-moderation-2411", + "contextWindow": 8192, + "ownedBy": "mistral" + }, + { + "id": "mistral-ocr-2512", + "name": "mistral-ocr-2512", + "capabilities": ["file-input"], + "contextWindow": 16384, + "ownedBy": "mistral" + }, + { + "id": "mistral-ocr-latest", + "name": "mistral-ocr-2512", + "capabilities": ["file-input"], + "contextWindow": 16384, + "ownedBy": "mistral" + }, + { + "id": "mistral-ocr-2505", + "name": "mistral-ocr-2505", + "capabilities": ["file-input"], + "contextWindow": 16384, + "ownedBy": "mistral" + }, + { + "id": "voxtral-mini-2602", + "name": "voxtral-mini-2602", + "contextWindow": 16384, + "ownedBy": "mistral" + }, + { + "id": "voxtral-mini-transcribe-2507", + "name": "voxtral-mini-transcribe-2507", + "contextWindow": 16384, + "ownedBy": "mistral" + }, + { + "id": "jina-vlm", + "name": "Jina AI: Jina VLM", + "contextWindow": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "jina" + }, + { + "id": "jina-code-embeddings", + "name": "Jina AI: Jina Code Embeddings 0.5b", + "capabilities": ["embedding"], + "contextWindow": 32768, + "maxOutputTokens": 896, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "jina" + }, + { + "id": "readerlm-v2", + "name": "Jina AI: ReaderLM v2", + "contextWindow": 524288, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "jina" + }, + { + "id": "reader-lm", + "name": "Jina AI: Reader LM 0.5b", + "contextWindow": 262144, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "jina" + }, + { + "id": "jina-embedding-b-en-v1", + "name": "Jina AI: Jina Embedding B EN v1", + "capabilities": ["embedding"], + "contextWindow": 512, + "maxOutputTokens": 768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "jina" + }, + { + "id": "c4ai-command-r-plus-08-2024", + "name": "c4ai-command-r-plus-08-2024", + "ownedBy": "modelscope" + }, + { + "id": "ministral-instruct-2410", + "name": "ministral-instruct-2410", + "ownedBy": "mistral" + }, + { + "id": "mistral-large-instruct-2407", + "name": "mistral-large-instruct-2407", + "capabilities": ["function-call"], + "ownedBy": "mistral" + }, + { + "id": "mistral-small-instruct-2409", + "name": "mistral-small-instruct-2409", + "ownedBy": "mistral" + }, + { + "id": "compassjudger-1-instruct", + "name": "compassjudger-1-instruct", + "ownedBy": "modelscope" + }, + { + "id": "internvl3_5-a28b", + "name": "internvl3_5-a28b", + "capabilities": ["image-recognition"], + "ownedBy": "intern" + }, + { + "id": "ernie-4-5-pt", + "name": "ernie-4-5-pt", + "ownedBy": "baidu" + }, + { + "id": "ernie-4-5-a3b-pt", + "name": "ernie-4-5-a3b-pt", + "ownedBy": "baidu" + }, + { + "id": "ernie-4-5-a47b-pt", + "name": "ernie-4-5-a47b-pt", + "ownedBy": "baidu" + }, + { + "id": "ernie-4-5-vl-a3b-pt", + "name": "ernie-4-5-vl-a3b-pt", + "capabilities": ["image-recognition"], + "ownedBy": "baidu" + }, + { + "id": "intern-s1", + "name": "intern-s1", + "ownedBy": "intern" + }, + { + "id": "intern-s1-mini", + "name": "intern-s1-mini", + "ownedBy": "intern" + }, + { + "id": "xiyansql-qwencoder-2412", + "name": "xiyansql-qwencoder-2412", + "capabilities": ["function-call"], + "ownedBy": "modelscope" + }, + { + "id": "xiyansql-qwencoder-2504", + "name": "xiyansql-qwencoder-2504", + "capabilities": ["function-call"], + "ownedBy": "modelscope" + }, + { + "id": "hunyuan-standard-32k", + "name": "hunyuan-standard-32k", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-turbo-latest", + "name": "hunyuan-turbo-latest", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-turbos-20250515", + "name": "hunyuan-turbos-20250515", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-t1-vision", + "name": "hunyuan-t1-vision", + "capabilities": ["reasoning", "function-call", "image-recognition"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-large-vision", + "name": "hunyuan-large-vision", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-turbos-20250604", + "name": "hunyuan-turbos-20250604", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-t1-20250529", + "name": "hunyuan-t1-20250529", + "capabilities": ["reasoning", "function-call"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-turbos-vision-20250619", + "name": "hunyuan-turbos-vision-20250619", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-t1-vision-20250619", + "name": "hunyuan-t1-vision-20250619", + "capabilities": ["reasoning", "function-call", "image-recognition"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-vision-20250720", + "name": "hunyuan-vision-20250720", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-large-role-20250822", + "name": "hunyuan-large-role-20250822", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-t1-20250822", + "name": "hunyuan-t1-20250822", + "capabilities": ["reasoning", "function-call"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-t1-vision-20250916", + "name": "hunyuan-t1-vision-20250916", + "capabilities": ["reasoning", "function-call", "image-recognition"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-turbos-20250926", + "name": "hunyuan-turbos-20250926", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-large-role-latest", + "name": "hunyuan-large-role-latest", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-turbos-role-20251114", + "name": "hunyuan-turbos-role-20251114", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-role-latest", + "name": "hunyuan-role-latest", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "hunyuan-vision-1-5-instruct", + "name": "hunyuan-vision-1-5-instruct", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "tencent" + }, + { + "id": "qwen3-instruct-2507", + "name": "qwen3-instruct-2507", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "apriel-1-6-thinker", + "name": "apriel-1-6-thinker", + "ownedBy": "huggingface" + }, + { + "id": "qwen3-thinking-2507", + "name": "qwen3-thinking-2507", + "capabilities": ["reasoning", "function-call"], + "reasoning": { + "supportedEfforts": ["low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "smollm3", + "name": "smollm3", + "ownedBy": "huggingface" + }, + { + "id": "molmo2", + "name": "molmo2", + "ownedBy": "ai2" + }, + { + "id": "aya-vision", + "name": "aya-vision", + "capabilities": ["image-recognition"], + "ownedBy": "huggingface" + }, + { + "id": "c4ai-command-r7b-12-2024", + "name": "c4ai-command-r7b-12-2024", + "capabilities": ["reasoning"], + "ownedBy": "huggingface" + }, + { + "id": "c4ai-command-a-03-2025", + "name": "c4ai-command-a-03-2025", + "ownedBy": "huggingface" + }, + { + "id": "apertus-instruct-2509", + "name": "apertus-instruct-2509", + "ownedBy": "huggingface" + }, + { + "id": "glm-4-5-air-fp8", + "name": "glm-4-5-air-fp8", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "ownedBy": "zhipu" + }, + { + "id": "eurollm-instruct-2512", + "name": "eurollm-instruct-2512", + "ownedBy": "huggingface" + }, + { + "id": "arch-router", + "name": "arch-router", + "ownedBy": "huggingface" + }, + { + "id": "llama-3-3-swallow-instruct-v0-4", + "name": "llama-3-3-swallow-instruct-v0-4", + "ownedBy": "meta" + }, + { + "id": "glm-4-5v-fp8", + "name": "glm-4-5v-fp8", + "capabilities": ["reasoning"], + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "ownedBy": "zhipu" + }, + { + "id": "glm-4-6v-fp8", + "name": "glm-4-6v-fp8", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none"], + "interleaved": true + }, + "ownedBy": "zhipu" + }, + { + "id": "l3-lunaris-v1", + "name": "l3-lunaris-v1", + "ownedBy": "huggingface" + }, + { + "id": "qwen-sea-lion-v4-it", + "name": "qwen-sea-lion-v4-it", + "capabilities": ["function-call"], + "ownedBy": "alibaba" + }, + { + "id": "ernie-4-5-a47b-base-pt", + "name": "ernie-4-5-a47b-base-pt", + "ownedBy": "baidu" + }, + { + "id": "ernie-4-5-vl-a47b-base-pt", + "name": "ernie-4-5-vl-a47b-base-pt", + "capabilities": ["image-recognition"], + "ownedBy": "baidu" + }, + { + "id": "cogito-v2-1-fp8", + "name": "cogito-v2-1-fp8", + "ownedBy": "cogito" + }, + { + "id": "dictalm-3-0", + "name": "dictalm-3-0", + "ownedBy": "huggingface" + }, + { + "id": "c4ai-command-r-08-2024", + "name": "c4ai-command-r-08-2024", + "ownedBy": "huggingface" + }, + { + "id": "aya-expanse", + "name": "aya-expanse", + "ownedBy": "huggingface" + }, + { + "id": "c4ai-command-r7b-arabic-02-2025", + "name": "c4ai-command-r7b-arabic-02-2025", + "capabilities": ["reasoning"], + "ownedBy": "huggingface" + }, + { + "id": "wan-v2-5-t2v-preview", + "name": "Wan v2.5 Text-to-Video Preview", + "capabilities": ["video-generation"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "wan-v2-6-i2v", + "name": "Wan v2.6 Image-to-Video", + "capabilities": ["video-generation"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "wan-v2-6-i2v-flash", + "name": "Wan v2.6 Image-to-Video Flash", + "capabilities": ["video-generation"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "wan-v2-6-r2v", + "name": "Wan v2.6 Reference-to-Video", + "capabilities": ["reasoning", "video-generation"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "wan-v2-6-r2v-flash", + "name": "Wan v2.6 Reference-to-Video Flash", + "capabilities": ["reasoning", "video-generation"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "wan-v2-6-t2v", + "name": "Wan v2.6 Text-to-Video", + "capabilities": ["video-generation"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "veo-3-1-fast-generate-001", + "name": "Veo 3.1 Fast Generate", + "capabilities": ["video-generation"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "google" + }, + { + "id": "veo-3-1-generate-001", + "name": "Veo 3.1", + "capabilities": ["video-generation"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "google" + }, + { + "id": "kling-v2-5-turbo-i2v", + "name": "Kling v2.5 Turbo Image-to-Video", + "capabilities": ["video-generation"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "kling" + }, + { + "id": "kling-v2-5-turbo-t2v", + "name": "Kling v2.5 Turbo Text-to-Video", + "capabilities": ["video-generation"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "kling" + }, + { + "id": "kling-v2-6-i2v", + "name": "Kling v2.6 Image-to-Video", + "capabilities": ["video-generation"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "kling" + }, + { + "id": "kling-v2-6-t2v", + "name": "Kling v2.6 Text-to-Video", + "capabilities": ["video-generation"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "kling" + }, + { + "id": "kling-v3-0-i2v", + "name": "Kling v3.0 Image-to-Video", + "capabilities": ["video-generation"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "kling" + }, + { + "id": "kling-v3-0-t2v", + "name": "Kling v3.0 Text-to-Video", + "capabilities": ["video-generation"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "kling" + }, + { + "id": "recraft-v4", + "name": "Recraft V4", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "recraft" + }, + { + "id": "recraft-v4-pro", + "name": "Recraft V4 Pro", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "recraft" + }, + { + "id": "qwen3-asr", + "name": "qwen3-asr", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "z-image", + "name": "z-image", + "capabilities": ["image-generation"], + "ownedBy": "gitee-ai" + }, + { + "id": "qwen3-tts", + "name": "qwen3-tts", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "legalone", + "name": "legalone", + "ownedBy": "gitee-ai" + }, + { + "id": "qwen-image-layered", + "name": "qwen-image-layered", + "capabilities": ["function-call", "image-generation"], + "ownedBy": "alibaba" + }, + { + "id": "qwen3-vl-reranker", + "name": "qwen3-vl-reranker", + "capabilities": ["function-call", "rerank", "image-recognition", "reasoning"], + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "ltx-2", + "name": "ltx-2", + "ownedBy": "gitee-ai" + }, + { + "id": "mai-ui", + "name": "mai-ui", + "ownedBy": "gitee-ai" + }, + { + "id": "qwen-image-2512", + "name": "qwen-image-2512", + "capabilities": ["function-call", "image-generation"], + "ownedBy": "alibaba" + }, + { + "id": "sinong1-0", + "name": "sinong1-0", + "ownedBy": "gitee-ai" + }, + { + "id": "hy-mt1-5", + "name": "hy-mt1-5", + "ownedBy": "gitee-ai" + }, + { + "id": "vajrav1", + "name": "vajrav1", + "ownedBy": "gitee-ai" + }, + { + "id": "cosyvoice3", + "name": "cosyvoice3", + "ownedBy": "gitee-ai" + }, + { + "id": "sam3", + "name": "sam3", + "ownedBy": "gitee-ai" + }, + { + "id": "fun-asr-nano-2512", + "name": "fun-asr-nano-2512", + "ownedBy": "gitee-ai" + }, + { + "id": "qwen-image-edit-2511", + "name": "qwen-image-edit-2511", + "capabilities": ["function-call", "image-generation"], + "ownedBy": "alibaba" + }, + { + "id": "flux.2-dev", + "name": "flux.2-dev", + "capabilities": ["image-generation"], + "ownedBy": "bfl" + }, + { + "id": "infinitetalk", + "name": "infinitetalk", + "ownedBy": "gitee-ai" + }, + { + "id": "glm-asr", + "name": "glm-asr", + "ownedBy": "zhipu" + }, + { + "id": "longcat-image-edit", + "name": "longcat-image-edit", + "capabilities": ["image-generation"], + "ownedBy": "meituan" + }, + { + "id": "longcat-image", + "name": "longcat-image", + "capabilities": ["image-generation"], + "ownedBy": "meituan" + }, + { + "id": "hulu-med", + "name": "hulu-med", + "ownedBy": "gitee-ai" + }, + { + "id": "longcat-video", + "name": "longcat-video", + "capabilities": ["video-generation"], + "ownedBy": "meituan" + }, + { + "id": "hunyuanocr", + "name": "hunyuanocr", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "ernie-5-0", + "name": "ernie-5-0", + "ownedBy": "baidu" + }, + { + "id": "huatuogpt-o1", + "name": "huatuogpt-o1", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "gitee-ai" + }, + { + "id": "hunyuanvideo-1-5", + "name": "hunyuanvideo-1-5", + "capabilities": ["function-call", "video-generation"], + "ownedBy": "tencent" + }, + { + "id": "search", + "name": "search", + "ownedBy": "gitee-ai" + }, + { + "id": "seedvr2", + "name": "seedvr2", + "ownedBy": "bytedance" + }, + { + "id": "comfyui", + "name": "comfyui", + "ownedBy": "gitee-ai" + }, + { + "id": "audiofly", + "name": "audiofly", + "ownedBy": "gitee-ai" + }, + { + "id": "youtu-embedding", + "name": "youtu-embedding", + "capabilities": ["embedding"], + "ownedBy": "gitee-ai" + }, + { + "id": "mineru2-5", + "name": "mineru2-5", + "ownedBy": "gitee-ai" + }, + { + "id": "deepseek-v3_1-terminus", + "name": "deepseek-v3_1-terminus", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none"] + }, + "ownedBy": "deepseek" + }, + { + "id": "vibevoice-large", + "name": "vibevoice-large", + "ownedBy": "gitee-ai" + }, + { + "id": "hunyuan-mt-chimera", + "name": "hunyuan-mt-chimera", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "deepseek-v3_1", + "name": "deepseek-v3_1", + "capabilities": ["function-call", "reasoning"], + "reasoning": { + "supportedEfforts": ["none"] + }, + "ownedBy": "deepseek" + }, + { + "id": "codesage-large-v2", + "name": "codesage-large-v2", + "ownedBy": "gitee-ai" + }, + { + "id": "glm-4_5v", + "name": "glm-4_5v", + "capabilities": ["function-call"], + "ownedBy": "zhipu" + }, + { + "id": "nonescape-v0", + "name": "nonescape-v0", + "ownedBy": "gitee-ai" + }, + { + "id": "flux_1-krea-dev", + "name": "flux_1-krea-dev", + "capabilities": ["image-generation"], + "ownedBy": "bfl" + }, + { + "id": "wan2_2-i2v-a14b", + "name": "wan2_2-i2v-a14b", + "capabilities": ["video-generation"], + "ownedBy": "alibaba" + }, + { + "id": "cogview4_6b", + "name": "cogview4_6b", + "ownedBy": "zhipu" + }, + { + "id": "glm-4_5-air", + "name": "glm-4_5-air", + "capabilities": ["function-call"], + "ownedBy": "zhipu" + }, + { + "id": "glm-4_5", + "name": "glm-4_5", + "capabilities": ["function-call"], + "ownedBy": "zhipu" + }, + { + "id": "seed-x-ppo", + "name": "seed-x-ppo", + "ownedBy": "bytedance" + }, + { + "id": "f5-tts", + "name": "f5-tts", + "ownedBy": "gitee-ai" + }, + { + "id": "bge-reranker-large", + "name": "bge-reranker-large", + "capabilities": ["embedding", "rerank"], + "ownedBy": "baai" + }, + { + "id": "clip-vit", + "name": "clip-vit", + "ownedBy": "gitee-ai" + }, + { + "id": "resnet-50", + "name": "resnet-50", + "ownedBy": "gitee-ai" + }, + { + "id": "flux.1-kontext-dev", + "name": "flux.1-kontext-dev", + "capabilities": ["image-generation"], + "ownedBy": "bfl" + }, + { + "id": "funasr", + "name": "funasr", + "ownedBy": "gitee-ai" + }, + { + "id": "yolov8", + "name": "yolov8", + "ownedBy": "gitee-ai" + }, + { + "id": "animesharp", + "name": "animesharp", + "ownedBy": "gitee-ai" + }, + { + "id": "duix.heygem", + "name": "duix.heygem", + "ownedBy": "gitee-ai" + }, + { + "id": "ernie-4-5-turbo", + "name": "ernie-4-5-turbo", + "ownedBy": "baidu" + }, + { + "id": "indextts-1-5", + "name": "indextts-1-5", + "capabilities": ["audio-generation"], + "ownedBy": "gitee-ai" + }, + { + "id": "lingshu", + "name": "lingshu", + "ownedBy": "bailing" + }, + { + "id": "hellomeme", + "name": "hellomeme", + "ownedBy": "gitee-ai" + }, + { + "id": "instantcharacter", + "name": "instantcharacter", + "ownedBy": "gitee-ai" + }, + { + "id": "omniconsistency", + "name": "omniconsistency", + "ownedBy": "gitee-ai" + }, + { + "id": "real-esrgan", + "name": "real-esrgan", + "ownedBy": "gitee-ai" + }, + { + "id": "medgemma-it", + "name": "medgemma-it", + "ownedBy": "gitee-ai" + }, + { + "id": "dreamo", + "name": "dreamo", + "ownedBy": "gitee-ai" + }, + { + "id": "step1x-3d", + "name": "step1x-3d", + "ownedBy": "stepfun" + }, + { + "id": "ace-step-v1", + "name": "ace-step-v1", + "ownedBy": "gitee-ai" + }, + { + "id": "hidream-e1-full", + "name": "hidream-e1-full", + "ownedBy": "gitee-ai" + }, + { + "id": "hi3dgen", + "name": "hi3dgen", + "ownedBy": "gitee-ai" + }, + { + "id": "hidream-i1-full", + "name": "hidream-i1-full", + "ownedBy": "gitee-ai" + }, + { + "id": "stable-diffusion-v1-5", + "name": "stable-diffusion-v1-5", + "capabilities": ["image-generation"], + "ownedBy": "stability" + }, + { + "id": "healthgpt-l14", + "name": "healthgpt-l14", + "ownedBy": "gitee-ai" + }, + { + "id": "dianjin-r1", + "name": "dianjin-r1", + "capabilities": ["reasoning"], + "ownedBy": "gitee-ai" + }, + { + "id": "nomic-embed-code", + "name": "nomic-embed-code", + "capabilities": ["embedding"], + "ownedBy": "gitee-ai" + }, + { + "id": "spark-tts", + "name": "spark-tts", + "ownedBy": "gitee-ai" + }, + { + "id": "moark-text-moderation", + "name": "moark-text-moderation", + "ownedBy": "gitee-ai" + }, + { + "id": "megatts3", + "name": "megatts3", + "ownedBy": "gitee-ai" + }, + { + "id": "hunyuan3d-2", + "name": "hunyuan3d-2", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "moark-m1", + "name": "moark-m1", + "ownedBy": "gitee-ai" + }, + { + "id": "fin-r1", + "name": "fin-r1", + "capabilities": ["reasoning"], + "ownedBy": "gitee-ai" + }, + { + "id": "pdf-extract-kit-1-0", + "name": "pdf-extract-kit-1-0", + "ownedBy": "gitee-ai" + }, + { + "id": "internlm3-instruct", + "name": "internlm3-instruct", + "ownedBy": "intern" + }, + { + "id": "wan2-1-i2v-480p", + "name": "wan2-1-i2v-480p", + "capabilities": ["video-generation"], + "ownedBy": "alibaba" + }, + { + "id": "wan2-1-i2v-720p", + "name": "wan2-1-i2v-720p", + "capabilities": ["video-generation"], + "ownedBy": "alibaba" + }, + { + "id": "wan2-1-t2v", + "name": "wan2-1-t2v", + "capabilities": ["video-generation"], + "ownedBy": "alibaba" + }, + { + "id": "step-audio-tts", + "name": "step-audio-tts", + "ownedBy": "stepfun" + }, + { + "id": "stepvideo-t2v", + "name": "stepvideo-t2v", + "capabilities": ["video-generation"], + "ownedBy": "stepfun" + }, + { + "id": "align-ds-v", + "name": "align-ds-v", + "ownedBy": "gitee-ai" + }, + { + "id": "florence-2-large", + "name": "florence-2-large", + "ownedBy": "gitee-ai" + }, + { + "id": "security-semantic-filtering", + "name": "security-semantic-filtering", + "ownedBy": "gitee-ai" + }, + { + "id": "nsfw-classifier", + "name": "nsfw-classifier", + "ownedBy": "gitee-ai" + }, + { + "id": "got-ocr2_0", + "name": "got-ocr2_0", + "ownedBy": "gitee-ai" + }, + { + "id": "internvl2-5", + "name": "internvl2-5", + "capabilities": ["image-recognition"], + "ownedBy": "intern" + }, + { + "id": "uvdoc", + "name": "uvdoc", + "ownedBy": "gitee-ai" + }, + { + "id": "hunyuandit-v1-2-diffusers-distilled", + "name": "hunyuandit-v1-2-diffusers-distilled", + "capabilities": ["function-call"], + "ownedBy": "tencent" + }, + { + "id": "rmbg-2-0", + "name": "rmbg-2-0", + "ownedBy": "gitee-ai" + }, + { + "id": "qwen2-vl", + "name": "qwen2-vl", + "capabilities": ["function-call", "image-recognition"], + "ownedBy": "alibaba" + }, + { + "id": "internvl2", + "name": "internvl2", + "capabilities": ["image-recognition"], + "ownedBy": "intern" + }, + { + "id": "stable-diffusion-3-5-large-turbo", + "name": "stable-diffusion-3-5-large-turbo", + "capabilities": ["image-generation"], + "ownedBy": "stability" + }, + { + "id": "stable-diffusion-v1-4", + "name": "stable-diffusion-v1-4", + "capabilities": ["image-generation"], + "ownedBy": "stability" + }, + { + "id": "flux-1-schnell", + "name": "flux-1-schnell", + "capabilities": ["image-generation"], + "ownedBy": "bfl" + }, + { + "id": "fish-speech-1-2-sft", + "name": "fish-speech-1-2-sft", + "ownedBy": "gitee-ai" + }, + { + "id": "yi-chat", + "name": "yi-chat", + "ownedBy": "01ai" + }, + { + "id": "funaudiollm-cosyvoice-300m", + "name": "funaudiollm-cosyvoice-300m", + "ownedBy": "gitee-ai" + }, + { + "id": "code-raccoon-v1", + "name": "code-raccoon-v1", + "ownedBy": "gitee-ai" + }, + { + "id": "codegeex4-all", + "name": "codegeex4-all", + "ownedBy": "gitee-ai" + }, + { + "id": "stable-diffusion-3", + "name": "stable-diffusion-3", + "capabilities": ["image-generation"], + "ownedBy": "stability" + }, + { + "id": "chattts", + "name": "chattts", + "ownedBy": "gitee-ai" + }, + { + "id": "bge-small-zh-v1-5", + "name": "bge-small-zh-v1-5", + "capabilities": ["embedding"], + "ownedBy": "baai" + }, + { + "id": "whisper-large", + "name": "whisper-large", + "capabilities": ["audio-transcript"], + "ownedBy": "openai" + }, + { + "id": "speecht5_tts", + "name": "speecht5_tts", + "ownedBy": "gitee-ai" + }, + { + "id": "whisper-base", + "name": "whisper-base", + "capabilities": ["audio-transcript"], + "ownedBy": "openai" + }, + { + "id": "stable-diffusion-xl-base-1-0", + "name": "stable-diffusion-xl-base-1-0", + "capabilities": ["image-generation"], + "ownedBy": "stability" + }, + { + "id": "qwen3-5", + "name": "Qwen3.5-27B", + "capabilities": ["reasoning"], + "contextWindow": 262144, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 1.8 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 14.4 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-5-a10b", + "name": "Qwen3.5-122B-A10B", + "capabilities": ["reasoning"], + "contextWindow": 262144, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 2 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 16 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "qwen3-5-a3b", + "name": "Qwen3.5-35B-A3B", + "capabilities": ["reasoning"], + "contextWindow": 262144, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 1.6 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 12.8 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "gpt-5-4", + "name": "gpt-5-4", + "ownedBy": "openai" + }, + { + "id": "gpt-5-4-pro", + "name": "OpenAI: GPT-5.4 Pro", + "contextWindow": 1050000, + "maxOutputTokens": 128000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 30 + }, + "output": { + "currency": "USD", + "perMillionTokens": 180 + } + }, + "ownedBy": "openai" + }, + { + "id": "mercury-2", + "name": "Inception: Mercury 2", + "contextWindow": 128000, + "maxOutputTokens": 50000, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.75 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.024999999999999998 + } + }, + "ownedBy": "inception" + }, + { + "id": "gpt-5-3-chat", + "name": "OpenAI: GPT-5.3 Chat", + "contextWindow": 128000, + "maxOutputTokens": 16384, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.75 + }, + "output": { + "currency": "USD", + "perMillionTokens": 14 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.175 + } + }, + "ownedBy": "openai" + }, + { + "id": "gemini-3-1-flash-lite-preview", + "name": "Google: Gemini 3.1 Flash Lite Preview", + "contextWindow": 1048576, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.024999999999999998 + } + }, + "ownedBy": "google" + }, + { + "id": "seed-2-0-mini", + "name": "ByteDance Seed: Seed-2.0-Mini", + "contextWindow": 262144, + "maxOutputTokens": 131072, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + } + }, + "ownedBy": "bytedance" + }, + { + "id": "gemini-3-1-flash-image-preview", + "name": "Google: Nano Banana 2 (Gemini 3.1 Flash Image Preview)", + "contextWindow": 65536, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3 + } + }, + "ownedBy": "google" + }, + { + "id": "qwen3-5-flash-02-23", + "name": "Qwen: Qwen3.5-Flash", + "capabilities": ["reasoning"], + "contextWindow": 1000000, + "maxOutputTokens": 65536, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "lfm-2-a2b", + "name": "LiquidAI: LFM2-24B-A2B", + "contextWindow": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.03 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.12 + } + }, + "ownedBy": "liquidai" + }, + { + "id": "aion-2-0", + "name": "AionLabs: Aion-2.0", + "contextWindow": 131072, + "maxOutputTokens": 32768, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5999999999999999 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + } + }, + "ownedBy": "aion" + }, + { + "id": "gliner-pii", + "name": "gliner-pii", + "ownedBy": "nvidia" + }, + { + "id": "llama-nemotron-embed-v2", + "name": "llama-nemotron-embed-v2", + "ownedBy": "meta" + }, + { + "id": "nemotron-content-safety-reasoning", + "name": "nemotron-content-safety-reasoning", + "ownedBy": "nvidia" + }, + { + "id": "tiny-aya-global", + "name": "tiny-aya-global", + "ownedBy": "huggingface" + }, + { + "id": "tiny-aya-fire", + "name": "tiny-aya-fire", + "ownedBy": "huggingface" + }, + { + "id": "tiny-aya-water", + "name": "tiny-aya-water", + "ownedBy": "huggingface" + }, + { + "id": "tiny-aya-earth", + "name": "tiny-aya-earth", + "ownedBy": "huggingface" + }, + { + "id": "qwen3-5-flash", + "name": "Qwen 3.5 Flash", + "capabilities": ["reasoning"], + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + } + }, + "reasoning": { + "supportedEfforts": ["none", "low", "medium", "high"], + "thinkingTokenLimits": { + "min": 1024, + "max": 38912 + } + }, + "ownedBy": "alibaba" + }, + { + "id": "voyage-4", + "name": "voyage-4", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.06 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "voyage" + }, + { + "id": "voyage-4-large", + "name": "voyage-4-large", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.12 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "voyage" + }, + { + "id": "voyage-4-lite", + "name": "voyage-4-lite", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.02 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "ownedBy": "voyage" + } + ] +} diff --git a/packages/provider-registry/data/provider-models.json b/packages/provider-registry/data/provider-models.json new file mode 100644 index 00000000000..80184871628 --- /dev/null +++ b/packages/provider-registry/data/provider-models.json @@ -0,0 +1,17244 @@ +{ + "version": "2026.03.09", + "overrides": [ + { + "providerId": "deepseek", + "modelId": "deepseek-chat" + }, + { + "providerId": "deepseek", + "modelId": "deepseek-reasoner" + }, + { + "providerId": "gateway", + "modelId": "claude-3-5-haiku", + "apiModelId": "anthropic/claude-3.5-haiku", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4 + } + } + }, + { + "providerId": "gateway", + "modelId": "claude-3-5-sonnet", + "apiModelId": "anthropic/claude-3.5-sonnet", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + } + }, + { + "providerId": "gateway", + "modelId": "claude-3-5-sonnet-20240620", + "apiModelId": "anthropic/claude-3.5-sonnet-20240620", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + } + }, + { + "providerId": "gateway", + "modelId": "claude-3-7-sonnet", + "apiModelId": "anthropic/claude-3.7-sonnet", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + } + }, + { + "providerId": "gateway", + "modelId": "claude-3-haiku", + "apiModelId": "anthropic/claude-3-haiku", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.25 + } + } + }, + { + "providerId": "gateway", + "modelId": "claude-3-opus", + "apiModelId": "anthropic/claude-3-opus", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 75 + } + } + }, + { + "providerId": "gateway", + "modelId": "claude-haiku-4-5", + "apiModelId": "anthropic/claude-haiku-4.5", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + } + } + }, + { + "providerId": "gateway", + "modelId": "claude-opus-4", + "apiModelId": "anthropic/claude-opus-4", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 75 + } + } + }, + { + "providerId": "gateway", + "modelId": "claude-opus-4-1", + "apiModelId": "anthropic/claude-opus-4.1", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 75 + } + } + }, + { + "providerId": "gateway", + "modelId": "claude-opus-4-5", + "apiModelId": "anthropic/claude-opus-4.5", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 25 + } + } + }, + { + "providerId": "gateway", + "modelId": "claude-opus-4-6", + "apiModelId": "anthropic/claude-opus-4.6", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 25 + } + } + }, + { + "providerId": "gateway", + "modelId": "claude-sonnet-4", + "apiModelId": "anthropic/claude-sonnet-4", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + } + }, + { + "providerId": "gateway", + "modelId": "claude-sonnet-4-5", + "apiModelId": "anthropic/claude-sonnet-4.5", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + } + }, + { + "providerId": "gateway", + "modelId": "claude-sonnet-4-6", + "apiModelId": "anthropic/claude-sonnet-4.6", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + } + }, + { + "providerId": "gateway", + "modelId": "codestral", + "apiModelId": "mistral/codestral", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.8999999999999999 + } + } + }, + { + "providerId": "gateway", + "modelId": "codestral-embed", + "apiModelId": "mistral/codestral-embed", + "endpointTypes": ["openai-embeddings"] + }, + { + "providerId": "gateway", + "modelId": "codex-mini", + "apiModelId": "openai/codex-mini", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 6 + } + } + }, + { + "providerId": "gateway", + "modelId": "command-a", + "apiModelId": "cohere/command-a" + }, + { + "providerId": "gateway", + "modelId": "deepseek-r1", + "apiModelId": "deepseek/deepseek-r1", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.1500000000000004 + } + } + }, + { + "providerId": "gateway", + "modelId": "deepseek-v3", + "apiModelId": "deepseek/deepseek-v3", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.77 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.77 + } + } + }, + { + "providerId": "gateway", + "modelId": "deepseek-v3-1", + "apiModelId": "deepseek/deepseek-v3.1", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.21 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.7899999999999999 + } + } + }, + { + "providerId": "gateway", + "modelId": "deepseek-v3-1-terminus", + "apiModelId": "deepseek/deepseek-v3.1-terminus", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.27 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1 + } + } + }, + { + "providerId": "gateway", + "modelId": "deepseek-v3-2", + "apiModelId": "deepseek/deepseek-v3.2", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.26 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.38 + } + } + }, + { + "providerId": "gateway", + "modelId": "deepseek-v3-2", + "apiModelId": "deepseek/deepseek-v3.2-thinking", + "modelVariant": "thinking", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.28 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.42 + } + } + }, + { + "providerId": "gateway", + "modelId": "devstral-2", + "apiModelId": "mistral/devstral-2", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "providerId": "gateway", + "modelId": "devstral-small", + "apiModelId": "mistral/devstral-small", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + } + }, + { + "providerId": "gateway", + "modelId": "devstral-small-2", + "apiModelId": "mistral/devstral-small-2", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "providerId": "gateway", + "modelId": "embed-v4-0", + "apiModelId": "cohere/embed-v4.0", + "endpointTypes": ["openai-embeddings"] + }, + { + "providerId": "gateway", + "modelId": "flux-kontext-max", + "apiModelId": "bfl/flux-kontext-max", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "endpointTypes": ["openai-image-generation"] + }, + { + "providerId": "gateway", + "modelId": "flux-kontext-pro", + "apiModelId": "bfl/flux-kontext-pro", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "endpointTypes": ["openai-image-generation"] + }, + { + "providerId": "gateway", + "modelId": "flux-pro-1-0-fill", + "apiModelId": "bfl/flux-pro-1.0-fill", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "endpointTypes": ["openai-image-generation"] + }, + { + "providerId": "gateway", + "modelId": "flux-pro-1-1", + "apiModelId": "bfl/flux-pro-1.1", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "endpointTypes": ["openai-image-generation"] + }, + { + "providerId": "gateway", + "modelId": "flux-pro-1-1-ultra", + "apiModelId": "bfl/flux-pro-1.1-ultra", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "endpointTypes": ["openai-image-generation"] + }, + { + "providerId": "gateway", + "modelId": "gemini-2-0-flash", + "apiModelId": "google/gemini-2.0-flash", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + } + } + }, + { + "providerId": "gateway", + "modelId": "gemini-2-0-flash-lite", + "apiModelId": "google/gemini-2.0-flash-lite", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.075 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + } + }, + { + "providerId": "gateway", + "modelId": "gemini-2-5-flash", + "apiModelId": "google/gemini-2.5-flash", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.5 + } + } + }, + { + "providerId": "gateway", + "modelId": "gemini-2-5-flash-image", + "apiModelId": "google/gemini-2.5-flash-image", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.5 + } + } + }, + { + "providerId": "gateway", + "modelId": "gemini-2-5-flash-lite", + "apiModelId": "google/gemini-2.5-flash-lite", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + } + } + }, + { + "providerId": "gateway", + "modelId": "gemini-2-5-flash-lite-preview-09-2025", + "apiModelId": "google/gemini-2.5-flash-lite-preview-09-2025", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + } + } + }, + { + "providerId": "gateway", + "modelId": "gemini-2-5-flash-preview-09-2025", + "apiModelId": "google/gemini-2.5-flash-preview-09-2025", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.5 + } + } + }, + { + "providerId": "gateway", + "modelId": "gemini-2-5-pro", + "apiModelId": "google/gemini-2.5-pro", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + } + }, + { + "providerId": "gateway", + "modelId": "gemini-3-1-flash-image-preview", + "apiModelId": "google/gemini-3.1-flash-image-preview" + }, + { + "providerId": "gateway", + "modelId": "gemini-3-1-flash-lite-preview", + "apiModelId": "google/gemini-3.1-flash-lite-preview", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + } + } + }, + { + "providerId": "gateway", + "modelId": "gemini-3-1-pro-preview", + "apiModelId": "google/gemini-3.1-pro-preview", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 12 + } + } + }, + { + "providerId": "gateway", + "modelId": "gemini-3-flash", + "apiModelId": "google/gemini-3-flash", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3 + } + } + }, + { + "providerId": "gateway", + "modelId": "gemini-3-pro-image", + "apiModelId": "google/gemini-3-pro-image", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 12 + } + } + }, + { + "providerId": "gateway", + "modelId": "gemini-3-pro-preview", + "apiModelId": "google/gemini-3-pro-preview", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 12 + } + } + }, + { + "providerId": "gateway", + "modelId": "gemini-embedding-001", + "apiModelId": "google/gemini-embedding-001", + "endpointTypes": ["openai-embeddings"] + }, + { + "providerId": "gateway", + "modelId": "glm-4-5", + "apiModelId": "zai/glm-4.5", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.2 + } + } + }, + { + "providerId": "gateway", + "modelId": "glm-4-5-air", + "apiModelId": "zai/glm-4.5-air", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.1 + } + } + }, + { + "providerId": "gateway", + "modelId": "glm-4-5v", + "apiModelId": "zai/glm-4.5v", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.7999999999999998 + } + } + }, + { + "providerId": "gateway", + "modelId": "glm-4-6", + "apiModelId": "zai/glm-4.6", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.44999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.7999999999999998 + } + } + }, + { + "providerId": "gateway", + "modelId": "glm-4-6v", + "apiModelId": "zai/glm-4.6v", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.8999999999999999 + } + } + }, + { + "providerId": "gateway", + "modelId": "glm-4-6v-flash", + "apiModelId": "zai/glm-4.6v-flash", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "providerId": "gateway", + "modelId": "glm-4-7", + "apiModelId": "zai/glm-4.7", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.43 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.75 + } + } + }, + { + "providerId": "gateway", + "modelId": "glm-4-7-flash", + "apiModelId": "zai/glm-4.7-flash", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.07 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + } + } + }, + { + "providerId": "gateway", + "modelId": "glm-4-7-flashx", + "apiModelId": "zai/glm-4.7-flashx", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.06 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + } + } + }, + { + "providerId": "gateway", + "modelId": "glm-5", + "apiModelId": "zai/glm-5", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.56 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-3-5-turbo", + "apiModelId": "openai/gpt-3.5-turbo", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-3-5-turbo-instruct", + "apiModelId": "openai/gpt-3.5-turbo-instruct" + }, + { + "providerId": "gateway", + "modelId": "gpt-4-1", + "apiModelId": "openai/gpt-4.1", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 8 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-4-1-mini", + "apiModelId": "openai/gpt-4.1-mini", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5999999999999999 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-4-1-nano", + "apiModelId": "openai/gpt-4.1-nano", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-4-turbo", + "apiModelId": "openai/gpt-4-turbo" + }, + { + "providerId": "gateway", + "modelId": "gpt-4o", + "apiModelId": "openai/gpt-4o", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-4o-mini", + "apiModelId": "openai/gpt-4o-mini", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-4o-mini-search-preview", + "apiModelId": "openai/gpt-4o-mini-search-preview", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-5", + "apiModelId": "openai/gpt-5", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-5-1", + "apiModelId": "openai/gpt-5.1-thinking", + "modelVariant": "thinking", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-5-1-codex", + "apiModelId": "openai/gpt-5.1-codex", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-5-1-codex-max", + "apiModelId": "openai/gpt-5.1-codex-max", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-5-1-codex-mini", + "apiModelId": "openai/gpt-5.1-codex-mini", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-5-1-instant", + "apiModelId": "openai/gpt-5.1-instant", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-5-2", + "apiModelId": "openai/gpt-5.2", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.75 + }, + "output": { + "currency": "USD", + "perMillionTokens": 14 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-5-2-chat", + "apiModelId": "openai/gpt-5.2-chat", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.75 + }, + "output": { + "currency": "USD", + "perMillionTokens": 14 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-5-2-codex", + "apiModelId": "openai/gpt-5.2-codex", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.75 + }, + "output": { + "currency": "USD", + "perMillionTokens": 14 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-5-2-pro", + "apiModelId": "openai/gpt-5.2-pro", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 21 + }, + "output": { + "currency": "USD", + "perMillionTokens": 168 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-5-3-chat", + "apiModelId": "openai/gpt-5.3-chat", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.75 + }, + "output": { + "currency": "USD", + "perMillionTokens": 14 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-5-3-codex", + "apiModelId": "openai/gpt-5.3-codex", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.75 + }, + "output": { + "currency": "USD", + "perMillionTokens": 14 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-5-4", + "apiModelId": "openai/gpt-5.4", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-5-4-pro", + "apiModelId": "openai/gpt-5.4-pro" + }, + { + "providerId": "gateway", + "modelId": "gpt-5-chat", + "apiModelId": "openai/gpt-5-chat", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-5-codex", + "apiModelId": "openai/gpt-5-codex", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-5-mini", + "apiModelId": "openai/gpt-5-mini", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-5-nano", + "apiModelId": "openai/gpt-5-nano", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-5-pro", + "apiModelId": "openai/gpt-5-pro", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 120 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-image-1", + "apiModelId": "openai/gpt-image-1", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 40 + } + }, + "endpointTypes": ["openai-image-generation"] + }, + { + "providerId": "gateway", + "modelId": "gpt-image-1-5", + "apiModelId": "openai/gpt-image-1.5", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 32 + } + }, + "endpointTypes": ["openai-image-generation"] + }, + { + "providerId": "gateway", + "modelId": "gpt-image-1-mini", + "apiModelId": "openai/gpt-image-1-mini", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 8 + } + }, + "endpointTypes": ["openai-image-generation"] + }, + { + "providerId": "gateway", + "modelId": "gpt-oss", + "apiModelId": "openai/gpt-oss-120b", + "modelVariant": "120b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-oss", + "apiModelId": "openai/gpt-oss-20b", + "modelVariant": "20b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.07 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + } + }, + { + "providerId": "gateway", + "modelId": "gpt-oss-safeguard", + "apiModelId": "openai/gpt-oss-safeguard-20b", + "modelVariant": "20b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.075 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + } + }, + { + "providerId": "gateway", + "modelId": "grok-2-vision", + "apiModelId": "xai/grok-2-vision", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + } + }, + { + "providerId": "gateway", + "modelId": "grok-3", + "apiModelId": "xai/grok-3", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + } + }, + { + "providerId": "gateway", + "modelId": "grok-3-fast", + "apiModelId": "xai/grok-3-fast", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 25 + } + } + }, + { + "providerId": "gateway", + "modelId": "grok-3-mini", + "apiModelId": "xai/grok-3-mini", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + } + } + }, + { + "providerId": "gateway", + "modelId": "grok-3-mini-fast", + "apiModelId": "xai/grok-3-mini-fast", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4 + } + } + }, + { + "providerId": "gateway", + "modelId": "grok-4", + "apiModelId": "xai/grok-4", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + } + }, + { + "providerId": "gateway", + "modelId": "grok-4-1-fast-non-reasoning", + "apiModelId": "xai/grok-4.1-fast-non-reasoning", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + } + } + }, + { + "providerId": "gateway", + "modelId": "grok-4-1-fast-reasoning", + "apiModelId": "xai/grok-4.1-fast-reasoning", + "modelVariant": "reasoning", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + } + } + }, + { + "providerId": "gateway", + "modelId": "grok-4-fast-non-reasoning", + "apiModelId": "xai/grok-4-fast-non-reasoning", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + } + } + }, + { + "providerId": "gateway", + "modelId": "grok-4-fast-reasoning", + "apiModelId": "xai/grok-4-fast-reasoning", + "modelVariant": "reasoning", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + } + } + }, + { + "providerId": "gateway", + "modelId": "grok-code-fast-1", + "apiModelId": "xai/grok-code-fast-1", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + } + } + }, + { + "providerId": "gateway", + "modelId": "grok-imagine-image", + "apiModelId": "xai/grok-imagine-image", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "endpointTypes": ["openai-image-generation"] + }, + { + "providerId": "gateway", + "modelId": "grok-imagine-image-pro", + "apiModelId": "xai/grok-imagine-image-pro", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "endpointTypes": ["openai-image-generation"] + }, + { + "providerId": "gateway", + "modelId": "imagen-4-0-fast-generate-001", + "apiModelId": "google/imagen-4.0-fast-generate-001", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "endpointTypes": ["openai-image-generation"] + }, + { + "providerId": "gateway", + "modelId": "imagen-4-0-generate-001", + "apiModelId": "google/imagen-4.0-generate-001", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "endpointTypes": ["openai-image-generation"] + }, + { + "providerId": "gateway", + "modelId": "imagen-4-0-ultra-generate-001", + "apiModelId": "google/imagen-4.0-ultra-generate-001", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "endpointTypes": ["openai-image-generation"] + }, + { + "providerId": "gateway", + "modelId": "intellect-3", + "apiModelId": "prime-intellect/intellect-3", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.1 + } + } + }, + { + "providerId": "gateway", + "modelId": "kat-coder-pro-v1", + "apiModelId": "kwaipilot/kat-coder-pro-v1", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.03 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + } + }, + { + "providerId": "gateway", + "modelId": "kimi-k2", + "apiModelId": "moonshotai/kimi-k2", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + } + }, + { + "providerId": "gateway", + "modelId": "kimi-k2", + "apiModelId": "moonshotai/kimi-k2-thinking", + "modelVariant": "thinking", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.47 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + } + }, + { + "providerId": "gateway", + "modelId": "kimi-k2-0905", + "apiModelId": "moonshotai/kimi-k2-0905", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.5 + } + } + }, + { + "providerId": "gateway", + "modelId": "kimi-k2-5", + "apiModelId": "moonshotai/kimi-k2.5", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.8 + } + } + }, + { + "providerId": "gateway", + "modelId": "kimi-k2-thinking-turbo", + "apiModelId": "moonshotai/kimi-k2-thinking-turbo", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 8 + } + } + }, + { + "providerId": "gateway", + "modelId": "kimi-k2-turbo", + "apiModelId": "moonshotai/kimi-k2-turbo" + }, + { + "providerId": "gateway", + "modelId": "llama-3-1", + "apiModelId": "meta/llama-3.1-70b", + "modelVariant": "70b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + } + } + }, + { + "providerId": "gateway", + "modelId": "llama-3-1", + "apiModelId": "meta/llama-3.1-8b", + "modelVariant": "8b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.03 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + } + } + }, + { + "providerId": "gateway", + "modelId": "llama-3-2", + "apiModelId": "meta/llama-3.2-11b", + "modelVariant": "11b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.16 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.16 + } + } + }, + { + "providerId": "gateway", + "modelId": "llama-3-2", + "apiModelId": "meta/llama-3.2-1b", + "modelVariant": "1b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + } + } + }, + { + "providerId": "gateway", + "modelId": "llama-3-2", + "apiModelId": "meta/llama-3.2-3b", + "modelVariant": "3b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.15 + } + } + }, + { + "providerId": "gateway", + "modelId": "llama-3-2", + "apiModelId": "meta/llama-3.2-90b", + "modelVariant": "90b" + }, + { + "providerId": "gateway", + "modelId": "llama-3-3", + "apiModelId": "meta/llama-3.3-70b", + "modelVariant": "70b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.72 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.72 + } + } + }, + { + "providerId": "gateway", + "modelId": "llama-4-maverick", + "apiModelId": "meta/llama-4-maverick" + }, + { + "providerId": "gateway", + "modelId": "llama-4-scout", + "apiModelId": "meta/llama-4-scout" + }, + { + "providerId": "gateway", + "modelId": "longcat-flash", + "apiModelId": "meituan/longcat-flash-thinking", + "modelVariant": "thinking" + }, + { + "providerId": "gateway", + "modelId": "longcat-flash-chat", + "apiModelId": "meituan/longcat-flash-chat", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "providerId": "gateway", + "modelId": "magistral", + "apiModelId": "mistral/magistral-medium", + "modelVariant": "medium" + }, + { + "providerId": "gateway", + "modelId": "magistral-small", + "apiModelId": "mistral/magistral-small" + }, + { + "providerId": "gateway", + "modelId": "mercury-2", + "apiModelId": "inception/mercury-2", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.75 + } + } + }, + { + "providerId": "gateway", + "modelId": "mercury-coder-small", + "apiModelId": "inception/mercury-coder-small" + }, + { + "providerId": "gateway", + "modelId": "mimo-v2-flash", + "apiModelId": "xiaomi/mimo-v2-flash", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.29 + } + } + }, + { + "providerId": "gateway", + "modelId": "minimax-m2", + "apiModelId": "minimax/minimax-m2", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + } + }, + { + "providerId": "gateway", + "modelId": "minimax-m2-1", + "apiModelId": "minimax/minimax-m2.1", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + } + }, + { + "providerId": "gateway", + "modelId": "minimax-m2-1-lightning", + "apiModelId": "minimax/minimax-m2.1-lightning", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.4 + } + } + }, + { + "providerId": "gateway", + "modelId": "minimax-m2-5", + "apiModelId": "minimax/minimax-m2.5", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + } + }, + { + "providerId": "gateway", + "modelId": "minimax-m2-5-highspeed", + "apiModelId": "minimax/minimax-m2.5-highspeed", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.4 + } + } + }, + { + "providerId": "gateway", + "modelId": "ministral", + "apiModelId": "mistral/ministral-14b", + "modelVariant": "14b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + } + } + }, + { + "providerId": "gateway", + "modelId": "ministral", + "apiModelId": "mistral/ministral-3b", + "modelVariant": "3b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.04 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.04 + } + } + }, + { + "providerId": "gateway", + "modelId": "ministral", + "apiModelId": "mistral/ministral-8b", + "modelVariant": "8b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + } + } + }, + { + "providerId": "gateway", + "modelId": "mistral", + "apiModelId": "mistral/mistral-medium", + "modelVariant": "medium", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + } + }, + { + "providerId": "gateway", + "modelId": "mistral-embed", + "apiModelId": "mistral/mistral-embed", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "endpointTypes": ["openai-embeddings"] + }, + { + "providerId": "gateway", + "modelId": "mistral-large-3", + "apiModelId": "mistral/mistral-large-3" + }, + { + "providerId": "gateway", + "modelId": "mistral-nemo", + "apiModelId": "mistral/mistral-nemo", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.02 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.04 + } + } + }, + { + "providerId": "gateway", + "modelId": "mistral-small", + "apiModelId": "mistral/mistral-small", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + } + }, + { + "providerId": "gateway", + "modelId": "mixtral-8x22b-instruct", + "apiModelId": "mistral/mixtral-8x22b-instruct", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + } + }, + { + "providerId": "gateway", + "modelId": "morph-v3-fast", + "apiModelId": "morph/morph-v3-fast", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + } + }, + { + "providerId": "gateway", + "modelId": "morph-v3-large", + "apiModelId": "morph/morph-v3-large", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.8999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.9 + } + } + }, + { + "providerId": "gateway", + "modelId": "nemotron-3-nano-a3b", + "apiModelId": "nvidia/nemotron-3-nano-30b-a3b", + "modelVariant": "30b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.06 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.24 + } + } + }, + { + "providerId": "gateway", + "modelId": "nemotron-nano-v2", + "apiModelId": "nvidia/nemotron-nano-9b-v2", + "modelVariant": "9b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.04 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.16 + } + } + }, + { + "providerId": "gateway", + "modelId": "nemotron-nano-v2-vl", + "apiModelId": "nvidia/nemotron-nano-12b-v2-vl", + "modelVariant": "12b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + } + }, + { + "providerId": "gateway", + "modelId": "nova-2-lite", + "apiModelId": "amazon/nova-2-lite" + }, + { + "providerId": "gateway", + "modelId": "nova-lite", + "apiModelId": "amazon/nova-lite", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.06 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.24 + } + } + }, + { + "providerId": "gateway", + "modelId": "nova-micro", + "apiModelId": "amazon/nova-micro", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.035 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.14 + } + } + }, + { + "providerId": "gateway", + "modelId": "nova-pro", + "apiModelId": "amazon/nova-pro", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.1999999999999997 + } + } + }, + { + "providerId": "gateway", + "modelId": "o1", + "apiModelId": "openai/o1", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 60 + } + } + }, + { + "providerId": "gateway", + "modelId": "o3", + "apiModelId": "openai/o3", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 8 + } + } + }, + { + "providerId": "gateway", + "modelId": "o3-deep-research", + "apiModelId": "openai/o3-deep-research", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 10 + }, + "output": { + "currency": "USD", + "perMillionTokens": 40 + } + } + }, + { + "providerId": "gateway", + "modelId": "o3-mini", + "apiModelId": "openai/o3-mini", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4.4 + } + } + }, + { + "providerId": "gateway", + "modelId": "o3-pro", + "apiModelId": "openai/o3-pro", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 20 + }, + "output": { + "currency": "USD", + "perMillionTokens": 80 + } + } + }, + { + "providerId": "gateway", + "modelId": "o4-mini", + "apiModelId": "openai/o4-mini", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4.4 + } + } + }, + { + "providerId": "gateway", + "modelId": "pixtral", + "apiModelId": "mistral/pixtral-12b", + "modelVariant": "12b" + }, + { + "providerId": "gateway", + "modelId": "pixtral-large", + "apiModelId": "mistral/pixtral-large" + }, + { + "providerId": "gateway", + "modelId": "qwen-3", + "apiModelId": "alibaba/qwen-3-14b", + "modelVariant": "14b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.06 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.24 + } + } + }, + { + "providerId": "gateway", + "modelId": "qwen-3", + "apiModelId": "alibaba/qwen-3-235b", + "modelVariant": "235b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.071 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.463 + } + } + }, + { + "providerId": "gateway", + "modelId": "qwen-3", + "apiModelId": "alibaba/qwen-3-30b", + "modelVariant": "30b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.08 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.29 + } + } + }, + { + "providerId": "gateway", + "modelId": "qwen-3", + "apiModelId": "alibaba/qwen-3-32b", + "modelVariant": "32b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + } + }, + { + "providerId": "gateway", + "modelId": "qwen3-5-flash", + "apiModelId": "alibaba/qwen3.5-flash" + }, + { + "providerId": "gateway", + "modelId": "qwen3-5-plus", + "apiModelId": "alibaba/qwen3.5-plus", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.4 + } + } + }, + { + "providerId": "gateway", + "modelId": "qwen3-a22b", + "apiModelId": "alibaba/qwen3-235b-a22b-thinking", + "modelVariant": "235b-thinking", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.9000000000000004 + } + } + }, + { + "providerId": "gateway", + "modelId": "qwen3-coder", + "apiModelId": "alibaba/qwen3-coder", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5999999999999999 + } + } + }, + { + "providerId": "gateway", + "modelId": "qwen3-coder-a3b", + "apiModelId": "alibaba/qwen3-coder-30b-a3b", + "modelVariant": "30b" + }, + { + "providerId": "gateway", + "modelId": "qwen3-coder-next", + "apiModelId": "alibaba/qwen3-coder-next", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + } + }, + { + "providerId": "gateway", + "modelId": "qwen3-coder-plus", + "apiModelId": "alibaba/qwen3-coder-plus", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + } + } + }, + { + "providerId": "gateway", + "modelId": "qwen3-embedding", + "apiModelId": "alibaba/qwen3-embedding-4b", + "modelVariant": "4b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.02 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "endpointTypes": ["openai-embeddings"] + }, + { + "providerId": "gateway", + "modelId": "qwen3-embedding", + "apiModelId": "alibaba/qwen3-embedding-0.6b", + "modelVariant": "6b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.01 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "endpointTypes": ["openai-embeddings"] + }, + { + "providerId": "gateway", + "modelId": "qwen3-embedding", + "apiModelId": "alibaba/qwen3-embedding-8b", + "modelVariant": "8b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "endpointTypes": ["openai-embeddings"] + }, + { + "providerId": "gateway", + "modelId": "qwen3-max", + "apiModelId": "alibaba/qwen3-max", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.845 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.38 + } + } + }, + { + "providerId": "gateway", + "modelId": "qwen3-max", + "apiModelId": "alibaba/qwen3-max-thinking", + "modelVariant": "thinking", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 6 + } + } + }, + { + "providerId": "gateway", + "modelId": "qwen3-max-preview", + "apiModelId": "alibaba/qwen3-max-preview", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 6 + } + } + }, + { + "providerId": "gateway", + "modelId": "qwen3-next-a3b", + "apiModelId": "alibaba/qwen3-next-80b-a3b-thinking", + "modelVariant": "80b-thinking", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + } + } + }, + { + "providerId": "gateway", + "modelId": "qwen3-next-a3b-instruct", + "apiModelId": "alibaba/qwen3-next-80b-a3b-instruct", + "modelVariant": "80b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.1 + } + } + }, + { + "providerId": "gateway", + "modelId": "qwen3-vl", + "apiModelId": "alibaba/qwen3-vl-thinking", + "modelVariant": "thinking", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.22 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.88 + } + } + }, + { + "providerId": "gateway", + "modelId": "qwen3-vl-instruct", + "apiModelId": "alibaba/qwen3-vl-instruct", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.22 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.88 + } + } + }, + { + "providerId": "gateway", + "modelId": "recraft-v2", + "apiModelId": "recraft/recraft-v2", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "endpointTypes": ["openai-image-generation"] + }, + { + "providerId": "gateway", + "modelId": "recraft-v3", + "apiModelId": "recraft/recraft-v3", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "endpointTypes": ["openai-image-generation"] + }, + { + "providerId": "gateway", + "modelId": "recraft-v4", + "apiModelId": "recraft/recraft-v4", + "endpointTypes": ["openai-image-generation"] + }, + { + "providerId": "gateway", + "modelId": "recraft-v4-pro", + "apiModelId": "recraft/recraft-v4-pro", + "endpointTypes": ["openai-image-generation"] + }, + { + "providerId": "gateway", + "modelId": "seed-1-6", + "apiModelId": "bytedance/seed-1.6", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + } + }, + { + "providerId": "gateway", + "modelId": "seed-1-8", + "apiModelId": "bytedance/seed-1.8", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + } + }, + { + "providerId": "gateway", + "modelId": "sonar-pro", + "apiModelId": "perplexity/sonar-pro" + }, + { + "providerId": "gateway", + "modelId": "sonar-reasoning", + "apiModelId": "perplexity/sonar", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1 + } + } + }, + { + "providerId": "gateway", + "modelId": "sonar-reasoning", + "apiModelId": "perplexity/sonar-reasoning", + "modelVariant": "reasoning" + }, + { + "providerId": "gateway", + "modelId": "sonar-reasoning-pro", + "apiModelId": "perplexity/sonar-reasoning-pro" + }, + { + "providerId": "gateway", + "modelId": "text-embedding-005", + "apiModelId": "google/text-embedding-005", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.024999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "endpointTypes": ["openai-embeddings"] + }, + { + "providerId": "gateway", + "modelId": "text-embedding-3-large", + "apiModelId": "openai/text-embedding-3-large", + "endpointTypes": ["openai-embeddings"] + }, + { + "providerId": "gateway", + "modelId": "text-embedding-3-small", + "apiModelId": "openai/text-embedding-3-small", + "endpointTypes": ["openai-embeddings"] + }, + { + "providerId": "gateway", + "modelId": "text-embedding-ada-002", + "apiModelId": "openai/text-embedding-ada-002", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "endpointTypes": ["openai-embeddings"] + }, + { + "providerId": "gateway", + "modelId": "text-multilingual-embedding-002", + "apiModelId": "google/text-multilingual-embedding-002", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.024999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "endpointTypes": ["openai-embeddings"] + }, + { + "providerId": "gateway", + "modelId": "titan-embed-text-v2", + "apiModelId": "amazon/titan-embed-text-v2", + "endpointTypes": ["openai-embeddings"] + }, + { + "providerId": "gateway", + "modelId": "trinity-large-preview", + "apiModelId": "arcee-ai/trinity-large-preview" + }, + { + "providerId": "gateway", + "modelId": "trinity-mini", + "apiModelId": "arcee-ai/trinity-mini", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.045 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.15 + } + } + }, + { + "providerId": "gateway", + "modelId": "v0-1-0-md", + "apiModelId": "vercel/v0-1.0-md" + }, + { + "providerId": "gateway", + "modelId": "v0-1-5-md", + "apiModelId": "vercel/v0-1.5-md" + }, + { + "providerId": "gateway", + "modelId": "voyage-3-5", + "apiModelId": "voyage/voyage-3.5", + "endpointTypes": ["openai-embeddings"] + }, + { + "providerId": "gateway", + "modelId": "voyage-3-5-lite", + "apiModelId": "voyage/voyage-3.5-lite", + "endpointTypes": ["openai-embeddings"] + }, + { + "providerId": "gateway", + "modelId": "voyage-3-large", + "apiModelId": "voyage/voyage-3-large", + "endpointTypes": ["openai-embeddings"] + }, + { + "providerId": "gateway", + "modelId": "voyage-4", + "apiModelId": "voyage/voyage-4", + "endpointTypes": ["openai-embeddings"] + }, + { + "providerId": "gateway", + "modelId": "voyage-4-large", + "apiModelId": "voyage/voyage-4-large", + "endpointTypes": ["openai-embeddings"] + }, + { + "providerId": "gateway", + "modelId": "voyage-4-lite", + "apiModelId": "voyage/voyage-4-lite", + "endpointTypes": ["openai-embeddings"] + }, + { + "providerId": "gateway", + "modelId": "voyage-code-2", + "apiModelId": "voyage/voyage-code-2", + "endpointTypes": ["openai-embeddings"] + }, + { + "providerId": "gateway", + "modelId": "voyage-code-3", + "apiModelId": "voyage/voyage-code-3", + "endpointTypes": ["openai-embeddings"] + }, + { + "providerId": "gateway", + "modelId": "voyage-finance-2", + "apiModelId": "voyage/voyage-finance-2", + "endpointTypes": ["openai-embeddings"] + }, + { + "providerId": "gateway", + "modelId": "voyage-law-2", + "apiModelId": "voyage/voyage-law-2", + "endpointTypes": ["openai-embeddings"] + }, + { + "providerId": "github", + "modelId": "codestral-2501", + "apiModelId": "mistral-ai/codestral-2501", + "limits": { + "contextWindow": 256000, + "maxOutputTokens": 4096 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "command-a", + "apiModelId": "cohere/cohere-command-a", + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 4096 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "command-r-08-2024", + "apiModelId": "cohere/cohere-command-r-08-2024", + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 4096 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "command-r-plus-08-2024", + "apiModelId": "cohere/cohere-command-r-plus-08-2024", + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 4096 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "deepseek-r1", + "apiModelId": "deepseek/deepseek-r1", + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "deepseek-r1-0528", + "apiModelId": "deepseek/deepseek-r1-0528", + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "deepseek-v3-0324", + "apiModelId": "deepseek/deepseek-v3-0324", + "limits": { + "contextWindow": 128000, + "maxOutputTokens": 4096 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "gpt-4-1", + "apiModelId": "openai/gpt-4.1", + "limits": { + "contextWindow": 1048576 + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "gpt-4-1-mini", + "apiModelId": "openai/gpt-4.1-mini", + "limits": { + "contextWindow": 1048576 + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "gpt-4-1-nano", + "apiModelId": "openai/gpt-4.1-nano", + "limits": { + "contextWindow": 1048576 + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "gpt-4o", + "apiModelId": "openai/gpt-4o", + "limits": { + "contextWindow": 131072 + }, + "inputModalities": ["text", "image", "audio"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "gpt-4o-mini", + "apiModelId": "openai/gpt-4o-mini", + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 4096 + }, + "inputModalities": ["text", "image", "audio"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "gpt-5", + "apiModelId": "openai/gpt-5", + "limits": { + "contextWindow": 200000, + "maxOutputTokens": 100000 + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "gpt-5-chat", + "apiModelId": "openai/gpt-5-chat", + "limits": { + "contextWindow": 200000, + "maxOutputTokens": 100000 + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "gpt-5-mini", + "apiModelId": "openai/gpt-5-mini", + "limits": { + "contextWindow": 200000, + "maxOutputTokens": 100000 + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "gpt-5-nano", + "apiModelId": "openai/gpt-5-nano", + "limits": { + "contextWindow": 200000, + "maxOutputTokens": 100000 + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "grok-3", + "apiModelId": "xai/grok-3", + "limits": { + "maxOutputTokens": 4096 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "grok-3-mini", + "apiModelId": "xai/grok-3-mini", + "limits": { + "maxOutputTokens": 4096 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "jamba-1-5-large", + "apiModelId": "ai21-labs/ai21-jamba-1.5-large", + "limits": { + "contextWindow": 262144 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "llama-3-1-instruct", + "apiModelId": "meta/meta-llama-3.1-405b-instruct", + "modelVariant": "405b", + "limits": { + "contextWindow": 131072 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "llama-3-1-instruct", + "apiModelId": "meta/meta-llama-3.1-8b-instruct", + "modelVariant": "8b", + "limits": { + "contextWindow": 131072 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "llama-3-2-vision-instruct", + "apiModelId": "meta/llama-3.2-11b-vision-instruct", + "modelVariant": "11b", + "inputModalities": ["text", "image", "audio"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "llama-3-2-vision-instruct", + "apiModelId": "meta/llama-3.2-90b-vision-instruct", + "modelVariant": "90b", + "inputModalities": ["text", "image", "audio"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "llama-3-3-instruct", + "apiModelId": "meta/llama-3.3-70b-instruct", + "modelVariant": "70b", + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "llama-4-maverick-128e-instruct-fp8", + "apiModelId": "meta/llama-4-maverick-17b-128e-instruct-fp8", + "modelVariant": "17b", + "limits": { + "maxOutputTokens": 4096 + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "llama-4-scout-16e-instruct", + "apiModelId": "meta/llama-4-scout-17b-16e-instruct", + "modelVariant": "17b", + "limits": { + "contextWindow": 10000000 + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "mai-ds-r1", + "apiModelId": "microsoft/mai-ds-r1", + "limits": { + "contextWindow": 128000, + "maxOutputTokens": 4096 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "ministral", + "apiModelId": "mistral-ai/ministral-3b", + "modelVariant": "3b", + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 4096 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "mistral-medium-2505", + "apiModelId": "mistral-ai/mistral-medium-2505", + "limits": { + "contextWindow": 128000, + "maxOutputTokens": 4096 + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "mistral-small-2503", + "apiModelId": "mistral-ai/mistral-small-2503", + "limits": { + "maxOutputTokens": 4096 + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "o1", + "apiModelId": "openai/o1", + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "o1-mini", + "apiModelId": "openai/o1-mini", + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "o1-preview", + "apiModelId": "openai/o1-preview", + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "o3", + "apiModelId": "openai/o3", + "limits": { + "contextWindow": 200000, + "maxOutputTokens": 100000 + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "o3-mini", + "apiModelId": "openai/o3-mini", + "limits": { + "contextWindow": 200000, + "maxOutputTokens": 100000 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "o4-mini", + "apiModelId": "openai/o4-mini", + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "phi-4", + "apiModelId": "microsoft/phi-4", + "limits": { + "contextWindow": 16384, + "maxOutputTokens": 16384 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "phi-4", + "apiModelId": "microsoft/phi-4-reasoning", + "modelVariant": "reasoning", + "limits": { + "contextWindow": 32768 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "phi-4-mini-instruct", + "apiModelId": "microsoft/phi-4-mini-instruct", + "limits": { + "contextWindow": 128000, + "maxOutputTokens": 4096 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "phi-4-mini-reasoning", + "apiModelId": "microsoft/phi-4-mini-reasoning", + "modelVariant": "reasoning", + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "phi-4-multimodal-instruct", + "apiModelId": "microsoft/phi-4-multimodal-instruct", + "limits": { + "contextWindow": 128000, + "maxOutputTokens": 4096 + }, + "inputModalities": ["audio", "image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "github", + "modelId": "text-embedding-3-large", + "apiModelId": "openai/text-embedding-3-large", + "limits": { + "contextWindow": 8191 + }, + "endpointTypes": ["openai-embeddings"], + "inputModalities": ["text"], + "outputModalities": ["vector"] + }, + { + "providerId": "github", + "modelId": "text-embedding-3-small", + "apiModelId": "openai/text-embedding-3-small", + "limits": { + "contextWindow": 8191 + }, + "endpointTypes": ["openai-embeddings"], + "inputModalities": ["text"], + "outputModalities": ["vector"] + }, + { + "providerId": "huggingface", + "modelId": "apertus-instruct-2509", + "apiModelId": "swiss-ai/Apertus-70B-Instruct-2509", + "modelVariant": "70b" + }, + { + "providerId": "huggingface", + "modelId": "apertus-instruct-2509", + "apiModelId": "swiss-ai/Apertus-8B-Instruct-2509", + "modelVariant": "8b" + }, + { + "providerId": "huggingface", + "modelId": "apriel-1-6-thinker", + "apiModelId": "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "modelVariant": "15b" + }, + { + "providerId": "huggingface", + "modelId": "arch-router", + "apiModelId": "katanemo/Arch-Router-1.5B", + "modelVariant": "5b" + }, + { + "providerId": "huggingface", + "modelId": "autoglm-phone-multilingual", + "apiModelId": "zai-org/AutoGLM-Phone-9B-Multilingual", + "modelVariant": "9b" + }, + { + "providerId": "huggingface", + "modelId": "aya-expanse", + "apiModelId": "CohereLabs/aya-expanse-32b", + "modelVariant": "32b" + }, + { + "providerId": "huggingface", + "modelId": "aya-vision", + "apiModelId": "CohereLabs/aya-vision-32b", + "modelVariant": "32b" + }, + { + "providerId": "huggingface", + "modelId": "c4ai-command-a-03-2025", + "apiModelId": "CohereLabs/c4ai-command-a-03-2025" + }, + { + "providerId": "huggingface", + "modelId": "c4ai-command-r-08-2024", + "apiModelId": "CohereLabs/c4ai-command-r-08-2024" + }, + { + "providerId": "huggingface", + "modelId": "c4ai-command-r7b-12-2024", + "apiModelId": "CohereLabs/c4ai-command-r7b-12-2024" + }, + { + "providerId": "huggingface", + "modelId": "c4ai-command-r7b-arabic-02-2025", + "apiModelId": "CohereLabs/c4ai-command-r7b-arabic-02-2025" + }, + { + "providerId": "huggingface", + "modelId": "cogito-v2-1", + "apiModelId": "deepcogito/cogito-671b-v2.1", + "modelVariant": "671b" + }, + { + "providerId": "huggingface", + "modelId": "cogito-v2-1-fp8", + "apiModelId": "deepcogito/cogito-671b-v2.1-FP8", + "modelVariant": "671b" + }, + { + "providerId": "huggingface", + "modelId": "command-a-reasoning-08-2025", + "apiModelId": "CohereLabs/command-a-reasoning-08-2025" + }, + { + "providerId": "huggingface", + "modelId": "command-a-translate-08-2025", + "apiModelId": "CohereLabs/command-a-translate-08-2025" + }, + { + "providerId": "huggingface", + "modelId": "command-a-vision-07-2025", + "apiModelId": "CohereLabs/command-a-vision-07-2025" + }, + { + "providerId": "huggingface", + "modelId": "deepseek-prover-v2", + "apiModelId": "deepseek-ai/DeepSeek-Prover-V2-671B", + "modelVariant": "671b" + }, + { + "providerId": "huggingface", + "modelId": "deepseek-r1", + "apiModelId": "deepseek-ai/DeepSeek-R1" + }, + { + "providerId": "huggingface", + "modelId": "deepseek-r1-0528", + "apiModelId": "deepseek-ai/DeepSeek-R1-0528" + }, + { + "providerId": "huggingface", + "modelId": "deepseek-r1-distill-llama", + "apiModelId": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "modelVariant": "70b" + }, + { + "providerId": "huggingface", + "modelId": "deepseek-r1-distill-llama", + "apiModelId": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", + "modelVariant": "8b" + }, + { + "providerId": "huggingface", + "modelId": "deepseek-r1-distill-qwen", + "apiModelId": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "modelVariant": "32b" + }, + { + "providerId": "huggingface", + "modelId": "deepseek-r1-distill-qwen", + "apiModelId": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "modelVariant": "5b" + }, + { + "providerId": "huggingface", + "modelId": "deepseek-r1-distill-qwen", + "apiModelId": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", + "modelVariant": "7b" + }, + { + "providerId": "huggingface", + "modelId": "deepseek-v3", + "apiModelId": "deepseek-ai/DeepSeek-V3" + }, + { + "providerId": "huggingface", + "modelId": "deepseek-v3-0324", + "apiModelId": "deepseek-ai/DeepSeek-V3-0324" + }, + { + "providerId": "huggingface", + "modelId": "deepseek-v3-1", + "apiModelId": "deepseek-ai/DeepSeek-V3.1" + }, + { + "providerId": "huggingface", + "modelId": "deepseek-v3-1-terminus", + "apiModelId": "deepseek-ai/DeepSeek-V3.1-Terminus" + }, + { + "providerId": "huggingface", + "modelId": "deepseek-v3-2", + "apiModelId": "deepseek-ai/DeepSeek-V3.2" + }, + { + "providerId": "huggingface", + "modelId": "deepseek-v3-2-exp", + "apiModelId": "deepseek-ai/DeepSeek-V3.2-Exp" + }, + { + "providerId": "huggingface", + "modelId": "dictalm-3-0", + "apiModelId": "dicta-il/DictaLM-3.0-24B-Thinking", + "modelVariant": "24b-thinking" + }, + { + "providerId": "huggingface", + "modelId": "ernie-4-5-a3b-pt", + "apiModelId": "baidu/ERNIE-4.5-21B-A3B-PT", + "modelVariant": "21b" + }, + { + "providerId": "huggingface", + "modelId": "ernie-4-5-a47b-base-pt", + "apiModelId": "baidu/ERNIE-4.5-300B-A47B-Base-PT", + "modelVariant": "300b" + }, + { + "providerId": "huggingface", + "modelId": "ernie-4-5-vl-a3b-pt", + "apiModelId": "baidu/ERNIE-4.5-VL-28B-A3B-PT", + "modelVariant": "28b" + }, + { + "providerId": "huggingface", + "modelId": "ernie-4-5-vl-a47b-base-pt", + "apiModelId": "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT", + "modelVariant": "424b" + }, + { + "providerId": "huggingface", + "modelId": "eurollm-instruct-2512", + "apiModelId": "utter-project/EuroLLM-22B-Instruct-2512", + "modelVariant": "22b" + }, + { + "providerId": "huggingface", + "modelId": "gemma-3-it", + "apiModelId": "google/gemma-3-27b-it", + "modelVariant": "27b" + }, + { + "providerId": "huggingface", + "modelId": "gemma-3n-e4b-it", + "apiModelId": "google/gemma-3n-E4B-it" + }, + { + "providerId": "huggingface", + "modelId": "gemma-sea-lion-v4-it", + "apiModelId": "aisingapore/Gemma-SEA-LION-v4-27B-IT", + "modelVariant": "27b" + }, + { + "providerId": "huggingface", + "modelId": "glm-4-0414", + "apiModelId": "zai-org/GLM-4-32B-0414", + "modelVariant": "32b" + }, + { + "providerId": "huggingface", + "modelId": "glm-4-5", + "apiModelId": "zai-org/GLM-4.5" + }, + { + "providerId": "huggingface", + "modelId": "glm-4-5-air", + "apiModelId": "zai-org/GLM-4.5-Air" + }, + { + "providerId": "huggingface", + "modelId": "glm-4-5-air-fp8", + "apiModelId": "zai-org/GLM-4.5-Air-FP8" + }, + { + "providerId": "huggingface", + "modelId": "glm-4-5v", + "apiModelId": "zai-org/GLM-4.5V" + }, + { + "providerId": "huggingface", + "modelId": "glm-4-5v-fp8", + "apiModelId": "zai-org/GLM-4.5V-FP8" + }, + { + "providerId": "huggingface", + "modelId": "glm-4-6", + "apiModelId": "zai-org/GLM-4.6" + }, + { + "providerId": "huggingface", + "modelId": "glm-4-6-fp8", + "apiModelId": "zai-org/GLM-4.6-FP8" + }, + { + "providerId": "huggingface", + "modelId": "glm-4-6v", + "apiModelId": "zai-org/GLM-4.6V" + }, + { + "providerId": "huggingface", + "modelId": "glm-4-6v-flash", + "apiModelId": "zai-org/GLM-4.6V-Flash" + }, + { + "providerId": "huggingface", + "modelId": "glm-4-6v-fp8", + "apiModelId": "zai-org/GLM-4.6V-FP8" + }, + { + "providerId": "huggingface", + "modelId": "glm-4-7", + "apiModelId": "zai-org/GLM-4.7" + }, + { + "providerId": "huggingface", + "modelId": "glm-4-7-flash", + "apiModelId": "zai-org/GLM-4.7-Flash" + }, + { + "providerId": "huggingface", + "modelId": "glm-4-7-fp8", + "apiModelId": "zai-org/GLM-4.7-FP8" + }, + { + "providerId": "huggingface", + "modelId": "glm-5", + "apiModelId": "zai-org/GLM-5" + }, + { + "providerId": "huggingface", + "modelId": "gpt-oss", + "apiModelId": "openai/gpt-oss-120b", + "modelVariant": "120b" + }, + { + "providerId": "huggingface", + "modelId": "gpt-oss", + "apiModelId": "openai/gpt-oss-20b", + "modelVariant": "20b" + }, + { + "providerId": "huggingface", + "modelId": "gpt-oss-safeguard", + "apiModelId": "openai/gpt-oss-safeguard-20b", + "modelVariant": "20b" + }, + { + "providerId": "huggingface", + "modelId": "hermes-2-pro-llama-3", + "apiModelId": "NousResearch/Hermes-2-Pro-Llama-3-8B", + "modelVariant": "8b" + }, + { + "providerId": "huggingface", + "modelId": "kimi-k2", + "apiModelId": "moonshotai/Kimi-K2-Thinking", + "modelVariant": "thinking" + }, + { + "providerId": "huggingface", + "modelId": "kimi-k2-5", + "apiModelId": "moonshotai/Kimi-K2.5" + }, + { + "providerId": "huggingface", + "modelId": "kimi-k2-instruct", + "apiModelId": "moonshotai/Kimi-K2-Instruct" + }, + { + "providerId": "huggingface", + "modelId": "kimi-k2-instruct-0905", + "apiModelId": "moonshotai/Kimi-K2-Instruct-0905" + }, + { + "providerId": "huggingface", + "modelId": "l3-euryale-v2-1", + "apiModelId": "Sao10K/L3-70B-Euryale-v2.1", + "modelVariant": "70b" + }, + { + "providerId": "huggingface", + "modelId": "l3-lunaris-v1", + "apiModelId": "Sao10K/L3-8B-Lunaris-v1", + "modelVariant": "8b" + }, + { + "providerId": "huggingface", + "modelId": "l3-stheno-v3-2", + "apiModelId": "Sao10K/L3-8B-Stheno-v3.2", + "modelVariant": "8b" + }, + { + "providerId": "huggingface", + "modelId": "llama-3-1-instruct", + "apiModelId": "meta-llama/Llama-3.1-70B-Instruct", + "modelVariant": "70b" + }, + { + "providerId": "huggingface", + "modelId": "llama-3-1-instruct", + "apiModelId": "meta-llama/Llama-3.1-8B-Instruct", + "modelVariant": "8b" + }, + { + "providerId": "huggingface", + "modelId": "llama-3-2-instruct", + "apiModelId": "meta-llama/Llama-3.2-1B-Instruct", + "modelVariant": "1b" + }, + { + "providerId": "huggingface", + "modelId": "llama-3-3-instruct", + "apiModelId": "meta-llama/Llama-3.3-70B-Instruct", + "modelVariant": "70b" + }, + { + "providerId": "huggingface", + "modelId": "llama-3-instruct", + "apiModelId": "meta-llama/Meta-Llama-3-70B-Instruct", + "modelVariant": "70b" + }, + { + "providerId": "huggingface", + "modelId": "llama-3-instruct", + "apiModelId": "meta-llama/Meta-Llama-3-8B-Instruct", + "modelVariant": "8b" + }, + { + "providerId": "huggingface", + "modelId": "llama-4-maverick-128e-instruct", + "apiModelId": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", + "modelVariant": "17b" + }, + { + "providerId": "huggingface", + "modelId": "llama-4-maverick-128e-instruct-fp8", + "apiModelId": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "modelVariant": "17b" + }, + { + "providerId": "huggingface", + "modelId": "llama-4-scout-16e-instruct", + "apiModelId": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "modelVariant": "17b" + }, + { + "providerId": "huggingface", + "modelId": "llama-guard-4", + "apiModelId": "meta-llama/Llama-Guard-4-12B", + "modelVariant": "12b" + }, + { + "providerId": "huggingface", + "modelId": "mimo-v2-flash", + "apiModelId": "XiaomiMiMo/MiMo-V2-Flash" + }, + { + "providerId": "huggingface", + "modelId": "minimax-m1-80k", + "apiModelId": "MiniMaxAI/MiniMax-M1-80k" + }, + { + "providerId": "huggingface", + "modelId": "minimax-m2", + "apiModelId": "MiniMaxAI/MiniMax-M2" + }, + { + "providerId": "huggingface", + "modelId": "minimax-m2-1", + "apiModelId": "MiniMaxAI/MiniMax-M2.1" + }, + { + "providerId": "huggingface", + "modelId": "minimax-m2-5", + "apiModelId": "MiniMaxAI/MiniMax-M2.5" + }, + { + "providerId": "huggingface", + "modelId": "molmo2", + "apiModelId": "allenai/Molmo2-8B", + "modelVariant": "8b" + }, + { + "providerId": "huggingface", + "modelId": "olmo-3-1-instruct", + "apiModelId": "allenai/Olmo-3.1-32B-Instruct", + "modelVariant": "32b" + }, + { + "providerId": "huggingface", + "modelId": "olmo-3-1-think", + "apiModelId": "allenai/Olmo-3.1-32B-Think", + "modelVariant": "32b-think" + }, + { + "providerId": "huggingface", + "modelId": "olmo-3-instruct", + "apiModelId": "allenai/Olmo-3-7B-Instruct", + "modelVariant": "7b" + }, + { + "providerId": "huggingface", + "modelId": "qwen-sea-lion-v4-it", + "apiModelId": "aisingapore/Qwen-SEA-LION-v4-32B-IT", + "modelVariant": "32b" + }, + { + "providerId": "huggingface", + "modelId": "qwen2-5-coder-instruct", + "apiModelId": "Qwen/Qwen2.5-Coder-32B-Instruct", + "modelVariant": "32b" + }, + { + "providerId": "huggingface", + "modelId": "qwen2-5-coder-instruct", + "apiModelId": "Qwen/Qwen2.5-Coder-3B-Instruct", + "modelVariant": "3b" + }, + { + "providerId": "huggingface", + "modelId": "qwen2-5-coder-instruct", + "apiModelId": "Qwen/Qwen2.5-Coder-7B-Instruct", + "modelVariant": "7b" + }, + { + "providerId": "huggingface", + "modelId": "qwen2-5-instruct", + "apiModelId": "Qwen/Qwen2.5-72B-Instruct", + "modelVariant": "72b" + }, + { + "providerId": "huggingface", + "modelId": "qwen2-5-instruct", + "apiModelId": "Qwen/Qwen2.5-7B-Instruct", + "modelVariant": "7b" + }, + { + "providerId": "huggingface", + "modelId": "qwen2-5-vl-instruct", + "apiModelId": "Qwen/Qwen2.5-VL-72B-Instruct", + "modelVariant": "72b" + }, + { + "providerId": "huggingface", + "modelId": "qwen2-5-vl-instruct", + "apiModelId": "Qwen/Qwen2.5-VL-7B-Instruct", + "modelVariant": "7b" + }, + { + "providerId": "huggingface", + "modelId": "qwen3", + "apiModelId": "Qwen/Qwen3-14B", + "modelVariant": "14b" + }, + { + "providerId": "huggingface", + "modelId": "qwen3", + "apiModelId": "Qwen/Qwen3-32B", + "modelVariant": "32b" + }, + { + "providerId": "huggingface", + "modelId": "qwen3", + "apiModelId": "Qwen/Qwen3-8B", + "modelVariant": "8b" + }, + { + "providerId": "huggingface", + "modelId": "qwen3-5", + "apiModelId": "Qwen/Qwen3.5-27B", + "modelVariant": "27b" + }, + { + "providerId": "huggingface", + "modelId": "qwen3-5-a10b", + "apiModelId": "Qwen/Qwen3.5-122B-A10B", + "modelVariant": "122b" + }, + { + "providerId": "huggingface", + "modelId": "qwen3-5-a17b", + "apiModelId": "Qwen/Qwen3.5-397B-A17B", + "modelVariant": "397b" + }, + { + "providerId": "huggingface", + "modelId": "qwen3-5-a3b", + "apiModelId": "Qwen/Qwen3.5-35B-A3B", + "modelVariant": "35b" + }, + { + "providerId": "huggingface", + "modelId": "qwen3-a22b", + "apiModelId": "Qwen/Qwen3-235B-A22B", + "modelVariant": "235b" + }, + { + "providerId": "huggingface", + "modelId": "qwen3-a22b-instruct-2507", + "apiModelId": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "modelVariant": "235b" + }, + { + "providerId": "huggingface", + "modelId": "qwen3-a22b-thinking-2507", + "apiModelId": "Qwen/Qwen3-235B-A22B-Thinking-2507", + "modelVariant": "235b" + }, + { + "providerId": "huggingface", + "modelId": "qwen3-a3b", + "apiModelId": "Qwen/Qwen3-30B-A3B", + "modelVariant": "30b" + }, + { + "providerId": "huggingface", + "modelId": "qwen3-coder-a35b-instruct", + "apiModelId": "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "modelVariant": "480b" + }, + { + "providerId": "huggingface", + "modelId": "qwen3-coder-a35b-instruct-fp8", + "apiModelId": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8", + "modelVariant": "480b" + }, + { + "providerId": "huggingface", + "modelId": "qwen3-coder-a3b-instruct", + "apiModelId": "Qwen/Qwen3-Coder-30B-A3B-Instruct", + "modelVariant": "30b" + }, + { + "providerId": "huggingface", + "modelId": "qwen3-coder-next", + "apiModelId": "Qwen/Qwen3-Coder-Next" + }, + { + "providerId": "huggingface", + "modelId": "qwen3-coder-next-fp8", + "apiModelId": "Qwen/Qwen3-Coder-Next-FP8" + }, + { + "providerId": "huggingface", + "modelId": "qwen3-instruct-2507", + "apiModelId": "Qwen/Qwen3-4B-Instruct-2507", + "modelVariant": "4b" + }, + { + "providerId": "huggingface", + "modelId": "qwen3-next-a3b", + "apiModelId": "Qwen/Qwen3-Next-80B-A3B-Thinking", + "modelVariant": "80b-thinking" + }, + { + "providerId": "huggingface", + "modelId": "qwen3-next-a3b-instruct", + "apiModelId": "Qwen/Qwen3-Next-80B-A3B-Instruct", + "modelVariant": "80b" + }, + { + "providerId": "huggingface", + "modelId": "qwen3-thinking-2507", + "apiModelId": "Qwen/Qwen3-4B-Thinking-2507", + "modelVariant": "4b" + }, + { + "providerId": "huggingface", + "modelId": "qwen3-vl-a22b", + "apiModelId": "Qwen/Qwen3-VL-235B-A22B-Thinking", + "modelVariant": "235b-thinking" + }, + { + "providerId": "huggingface", + "modelId": "qwen3-vl-a22b-instruct", + "apiModelId": "Qwen/Qwen3-VL-235B-A22B-Instruct", + "modelVariant": "235b" + }, + { + "providerId": "huggingface", + "modelId": "qwen3-vl-a3b", + "apiModelId": "Qwen/Qwen3-VL-30B-A3B-Thinking", + "modelVariant": "30b-thinking" + }, + { + "providerId": "huggingface", + "modelId": "qwen3-vl-a3b-instruct", + "apiModelId": "Qwen/Qwen3-VL-30B-A3B-Instruct", + "modelVariant": "30b" + }, + { + "providerId": "huggingface", + "modelId": "qwen3-vl-instruct", + "apiModelId": "Qwen/Qwen3-VL-8B-Instruct", + "modelVariant": "8b" + }, + { + "providerId": "huggingface", + "modelId": "rnj-1-instruct", + "apiModelId": "EssentialAI/rnj-1-instruct" + }, + { + "providerId": "huggingface", + "modelId": "tiny-aya-earth", + "apiModelId": "CohereLabs/tiny-aya-earth" + }, + { + "providerId": "huggingface", + "modelId": "tiny-aya-fire", + "apiModelId": "CohereLabs/tiny-aya-fire" + }, + { + "providerId": "huggingface", + "modelId": "tiny-aya-global", + "apiModelId": "CohereLabs/tiny-aya-global" + }, + { + "providerId": "huggingface", + "modelId": "tiny-aya-water", + "apiModelId": "CohereLabs/tiny-aya-water" + }, + { + "providerId": "huggingface", + "modelId": "trinity-mini", + "apiModelId": "arcee-ai/Trinity-Mini" + }, + { + "providerId": "huggingface", + "modelId": "wizardlm-2-8x22b", + "apiModelId": "alpindale/WizardLM-2-8x22B" + }, + { + "providerId": "jina", + "modelId": "jina-clip-v1", + "apiModelId": "jina-ai/jina-clip-v1", + "limits": { + "contextWindow": 8192, + "maxOutputTokens": 768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "providerId": "jina", + "modelId": "jina-clip-v2", + "apiModelId": "jina-ai/jina-clip-v2", + "limits": { + "contextWindow": 8192, + "maxOutputTokens": 1024 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "providerId": "jina", + "modelId": "jina-code-embeddings", + "apiModelId": "jina-ai/jina-code-embeddings-0.5b", + "modelVariant": "5b" + }, + { + "providerId": "jina", + "modelId": "jina-code-embeddings", + "apiModelId": "jina-ai/jina-code-embeddings-1.5b", + "modelVariant": "5b", + "limits": { + "maxOutputTokens": 1536 + } + }, + { + "providerId": "jina", + "modelId": "jina-colbert-v1-en", + "apiModelId": "jina-ai/jina-colbert-v1-en", + "limits": { + "contextWindow": 8192, + "maxOutputTokens": 128 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "providerId": "jina", + "modelId": "jina-colbert-v2", + "apiModelId": "jina-ai/jina-colbert-v2", + "limits": { + "contextWindow": 8192, + "maxOutputTokens": 128 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "providerId": "jina", + "modelId": "jina-embedding-b-en-v1", + "apiModelId": "jina-ai/jina-embedding-b-en-v1" + }, + { + "providerId": "jina", + "modelId": "jina-embeddings-v2-base-code", + "apiModelId": "jina-ai/jina-embeddings-v2-base-code", + "limits": { + "contextWindow": 8192, + "maxOutputTokens": 768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "providerId": "jina", + "modelId": "jina-embeddings-v2-base-de", + "apiModelId": "jina-ai/jina-embeddings-v2-base-de", + "limits": { + "contextWindow": 8192, + "maxOutputTokens": 768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "providerId": "jina", + "modelId": "jina-embeddings-v2-base-en", + "apiModelId": "jina-ai/jina-embeddings-v2-base-en", + "limits": { + "contextWindow": 8192, + "maxOutputTokens": 768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "providerId": "jina", + "modelId": "jina-embeddings-v2-base-es", + "apiModelId": "jina-ai/jina-embeddings-v2-base-es", + "limits": { + "contextWindow": 8192, + "maxOutputTokens": 768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "providerId": "jina", + "modelId": "jina-embeddings-v2-base-zh", + "apiModelId": "jina-ai/jina-embeddings-v2-base-zh", + "limits": { + "contextWindow": 8192, + "maxOutputTokens": 768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "providerId": "jina", + "modelId": "jina-embeddings-v3", + "apiModelId": "jina-ai/jina-embeddings-v3", + "limits": { + "contextWindow": 8192, + "maxOutputTokens": 1024 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "providerId": "jina", + "modelId": "jina-embeddings-v4", + "apiModelId": "jina-ai/jina-embeddings-v4", + "limits": { + "contextWindow": 32768, + "maxOutputTokens": 2048 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "providerId": "jina", + "modelId": "jina-embeddings-v5-text-nano", + "apiModelId": "jina-ai/jina-embeddings-v5-text-nano", + "limits": { + "contextWindow": 8192, + "maxOutputTokens": 768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.02 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "providerId": "jina", + "modelId": "jina-embeddings-v5-text-small", + "apiModelId": "jina-ai/jina-embeddings-v5-text-small", + "limits": { + "contextWindow": 32768, + "maxOutputTokens": 1024 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "providerId": "jina", + "modelId": "jina-reranker-m0", + "apiModelId": "jina-ai/jina-reranker-m0", + "limits": { + "contextWindow": 10240 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "endpointTypes": ["jina-rerank"] + }, + { + "providerId": "jina", + "modelId": "jina-reranker-v1-base-en", + "apiModelId": "jina-ai/jina-reranker-v1-base-en", + "limits": { + "contextWindow": 8192 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "endpointTypes": ["jina-rerank"] + }, + { + "providerId": "jina", + "modelId": "jina-reranker-v1-tiny-en", + "apiModelId": "jina-ai/jina-reranker-v1-tiny-en", + "limits": { + "contextWindow": 8192 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "endpointTypes": ["jina-rerank"] + }, + { + "providerId": "jina", + "modelId": "jina-reranker-v1-turbo-en", + "apiModelId": "jina-ai/jina-reranker-v1-turbo-en", + "limits": { + "contextWindow": 8192 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "endpointTypes": ["jina-rerank"] + }, + { + "providerId": "jina", + "modelId": "jina-reranker-v2-base-multilingual", + "apiModelId": "jina-ai/jina-reranker-v2-base-multilingual", + "limits": { + "contextWindow": 1024 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "endpointTypes": ["jina-rerank"] + }, + { + "providerId": "jina", + "modelId": "jina-reranker-v3", + "apiModelId": "jina-ai/jina-reranker-v3", + "limits": { + "contextWindow": 134144, + "maxOutputTokens": 256 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "endpointTypes": ["jina-rerank"] + }, + { + "providerId": "jina", + "modelId": "jina-vlm", + "apiModelId": "jina-ai/jina-vlm" + }, + { + "providerId": "jina", + "modelId": "reader-lm", + "apiModelId": "jina-ai/reader-lm-0.5b", + "modelVariant": "5b" + }, + { + "providerId": "jina", + "modelId": "reader-lm", + "apiModelId": "jina-ai/reader-lm-1.5b", + "modelVariant": "5b" + }, + { + "providerId": "jina", + "modelId": "readerlm-v2", + "apiModelId": "jina-ai/ReaderLM-v2" + }, + { + "providerId": "lanyun", + "modelId": "baichuan-m2", + "apiModelId": "/maas/baichuan/Baichuan-M2-32B", + "modelVariant": "32b" + }, + { + "providerId": "lanyun", + "modelId": "deepseek-r1-0528" + }, + { + "providerId": "lanyun", + "modelId": "deepseek-v3-1", + "apiModelId": "deepseek-v3.1" + }, + { + "providerId": "lanyun", + "modelId": "deepseek-v3-2", + "apiModelId": "/maas/deepseek-ai/DeepSeek-V3.2" + }, + { + "providerId": "lanyun", + "modelId": "deepseek-v3-2-exp", + "apiModelId": "deepseek-v3.2-exp" + }, + { + "providerId": "lanyun", + "modelId": "gemma3", + "apiModelId": "/maas/google/Gemma3-27B", + "modelVariant": "27b" + }, + { + "providerId": "lanyun", + "modelId": "glm-4-7", + "apiModelId": "/maas/zhipuai/GLM-4.7" + }, + { + "providerId": "lanyun", + "modelId": "glm-5", + "apiModelId": "/maas/zhipuai/GLM-5" + }, + { + "providerId": "lanyun", + "modelId": "kimi-k2-thinking-251104" + }, + { + "providerId": "lanyun", + "modelId": "minimax-hailuo-02", + "apiModelId": "MiniMax-Hailuo-02" + }, + { + "providerId": "lanyun", + "modelId": "minimax-hailuo-2-3", + "apiModelId": "MiniMax-Hailuo-2.3" + }, + { + "providerId": "lanyun", + "modelId": "minimax-hailuo-2-3-fast", + "apiModelId": "MiniMax-Hailuo-2.3-Fast" + }, + { + "providerId": "lanyun", + "modelId": "minimax-m2-1", + "apiModelId": "MiniMax-M2.1" + }, + { + "providerId": "lanyun", + "modelId": "minimax-m2-5", + "apiModelId": "/maas/minimax/MiniMax-M2.5" + }, + { + "providerId": "lanyun", + "modelId": "qwen2-5-instruct", + "apiModelId": "/maas/qwen/Qwen2.5-72B-Instruct", + "modelVariant": "72b" + }, + { + "providerId": "lanyun", + "modelId": "qwen3", + "apiModelId": "qwen3-32b", + "modelVariant": "32b" + }, + { + "providerId": "lanyun", + "modelId": "qwen3-a22b", + "apiModelId": "/maas/qwen/Qwen3-235B-A22B", + "modelVariant": "235b" + }, + { + "providerId": "lanyun", + "modelId": "qwen3-vl-instruct", + "apiModelId": "/maas/qwen/Qwen3-VL-32B-Instruct", + "modelVariant": "32b" + }, + { + "providerId": "lanyun", + "modelId": "qwq", + "apiModelId": "/maas/qwen/QwQ-32B", + "modelVariant": "32b" + }, + { + "providerId": "lanyun", + "modelId": "speech_paraformer-large", + "apiModelId": "/maas/alibaba/speech_paraformer-large" + }, + { + "providerId": "lanyun", + "modelId": "step-3-5-flash", + "apiModelId": "/maas/jieyue/step-3.5-flash" + }, + { + "providerId": "modelscope", + "modelId": "c4ai-command-r-plus-08-2024", + "apiModelId": "LLM-Research/c4ai-command-r-plus-08-2024" + }, + { + "providerId": "modelscope", + "modelId": "compassjudger-1-instruct", + "apiModelId": "opencompass/CompassJudger-1-32B-Instruct", + "modelVariant": "32b" + }, + { + "providerId": "modelscope", + "modelId": "deepseek-r1-0528", + "apiModelId": "deepseek-ai/DeepSeek-R1-0528" + }, + { + "providerId": "modelscope", + "modelId": "deepseek-r1-distill-llama", + "apiModelId": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "modelVariant": "70b" + }, + { + "providerId": "modelscope", + "modelId": "deepseek-r1-distill-llama", + "apiModelId": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", + "modelVariant": "8b" + }, + { + "providerId": "modelscope", + "modelId": "deepseek-r1-distill-qwen", + "apiModelId": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", + "modelVariant": "14b" + }, + { + "providerId": "modelscope", + "modelId": "deepseek-r1-distill-qwen", + "apiModelId": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "modelVariant": "32b" + }, + { + "providerId": "modelscope", + "modelId": "deepseek-r1-distill-qwen", + "apiModelId": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "modelVariant": "5b" + }, + { + "providerId": "modelscope", + "modelId": "deepseek-r1-distill-qwen", + "apiModelId": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", + "modelVariant": "7b" + }, + { + "providerId": "modelscope", + "modelId": "deepseek-v3-2", + "apiModelId": "deepseek-ai/DeepSeek-V3.2" + }, + { + "providerId": "modelscope", + "modelId": "ernie-4-5-a3b-pt", + "apiModelId": "PaddlePaddle/ERNIE-4.5-21B-A3B-PT", + "modelVariant": "21b" + }, + { + "providerId": "modelscope", + "modelId": "ernie-4-5-a47b-pt", + "apiModelId": "PaddlePaddle/ERNIE-4.5-300B-A47B-PT", + "modelVariant": "300b" + }, + { + "providerId": "modelscope", + "modelId": "ernie-4-5-pt", + "apiModelId": "PaddlePaddle/ERNIE-4.5-0.3B-PT", + "modelVariant": "3b" + }, + { + "providerId": "modelscope", + "modelId": "ernie-4-5-vl-a3b-pt", + "apiModelId": "PaddlePaddle/ERNIE-4.5-VL-28B-A3B-PT", + "modelVariant": "28b" + }, + { + "providerId": "modelscope", + "modelId": "glm-4-7-flash", + "apiModelId": "ZhipuAI/GLM-4.7-Flash" + }, + { + "providerId": "modelscope", + "modelId": "glm-5", + "apiModelId": "ZhipuAI/GLM-5" + }, + { + "providerId": "modelscope", + "modelId": "intern-s1", + "apiModelId": "Shanghai_AI_Laboratory/Intern-S1" + }, + { + "providerId": "modelscope", + "modelId": "intern-s1-mini", + "apiModelId": "Shanghai_AI_Laboratory/Intern-S1-mini" + }, + { + "providerId": "modelscope", + "modelId": "internvl3_5-a28b", + "apiModelId": "OpenGVLab/InternVL3_5-241B-A28B", + "modelVariant": "241b" + }, + { + "providerId": "modelscope", + "modelId": "kimi-k2-5", + "apiModelId": "moonshotai/Kimi-K2.5" + }, + { + "providerId": "modelscope", + "modelId": "llama-4-maverick-128e-instruct", + "apiModelId": "LLM-Research/Llama-4-Maverick-17B-128E-Instruct", + "modelVariant": "17b" + }, + { + "providerId": "modelscope", + "modelId": "longcat-flash-lite", + "apiModelId": "meituan-longcat/LongCat-Flash-Lite" + }, + { + "providerId": "modelscope", + "modelId": "mimo-v2-flash", + "apiModelId": "XiaomiMiMo/MiMo-V2-Flash" + }, + { + "providerId": "modelscope", + "modelId": "minimax-m1-80k", + "apiModelId": "MiniMax/MiniMax-M1-80k" + }, + { + "providerId": "modelscope", + "modelId": "minimax-m2-5", + "apiModelId": "MiniMax/MiniMax-M2.5" + }, + { + "providerId": "modelscope", + "modelId": "ministral-instruct-2410", + "apiModelId": "mistralai/Ministral-8B-Instruct-2410", + "modelVariant": "8b" + }, + { + "providerId": "modelscope", + "modelId": "mistral-large-instruct-2407", + "apiModelId": "mistralai/Mistral-Large-Instruct-2407" + }, + { + "providerId": "modelscope", + "modelId": "mistral-small-instruct-2409", + "apiModelId": "mistralai/Mistral-Small-Instruct-2409" + }, + { + "providerId": "modelscope", + "modelId": "qvq-preview", + "apiModelId": "Qwen/QVQ-72B-Preview", + "modelVariant": "72b" + }, + { + "providerId": "modelscope", + "modelId": "qwen-image-edit", + "apiModelId": "MusePublic/Qwen-Image-Edit" + }, + { + "providerId": "modelscope", + "modelId": "qwen-image-edit", + "apiModelId": "Qwen/Qwen-Image-Edit" + }, + { + "providerId": "modelscope", + "modelId": "qwen3", + "apiModelId": "Qwen/Qwen3-14B", + "modelVariant": "14b" + }, + { + "providerId": "modelscope", + "modelId": "qwen3", + "apiModelId": "Qwen/Qwen3-32B", + "modelVariant": "32b" + }, + { + "providerId": "modelscope", + "modelId": "qwen3", + "apiModelId": "Qwen/Qwen3-4B", + "modelVariant": "4b" + }, + { + "providerId": "modelscope", + "modelId": "qwen3", + "apiModelId": "Qwen/Qwen3-0.6B", + "modelVariant": "6b" + }, + { + "providerId": "modelscope", + "modelId": "qwen3", + "apiModelId": "Qwen/Qwen3-1.7B", + "modelVariant": "7b" + }, + { + "providerId": "modelscope", + "modelId": "qwen3", + "apiModelId": "Qwen/Qwen3-8B", + "modelVariant": "8b" + }, + { + "providerId": "modelscope", + "modelId": "qwen3-5", + "apiModelId": "Qwen/Qwen3.5-27B", + "modelVariant": "27b" + }, + { + "providerId": "modelscope", + "modelId": "qwen3-5-a10b", + "apiModelId": "Qwen/Qwen3.5-122B-A10B", + "modelVariant": "122b" + }, + { + "providerId": "modelscope", + "modelId": "qwen3-5-a17b", + "apiModelId": "Qwen/Qwen3.5-397B-A17B", + "modelVariant": "397b" + }, + { + "providerId": "modelscope", + "modelId": "qwen3-5-a3b", + "apiModelId": "Qwen/Qwen3.5-35B-A3B", + "modelVariant": "35b" + }, + { + "providerId": "modelscope", + "modelId": "qwen3-a22b", + "apiModelId": "Qwen/Qwen3-235B-A22B", + "modelVariant": "235b" + }, + { + "providerId": "modelscope", + "modelId": "qwen3-a22b-instruct-2507", + "apiModelId": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "modelVariant": "235b" + }, + { + "providerId": "modelscope", + "modelId": "qwen3-a22b-thinking-2507", + "apiModelId": "Qwen/Qwen3-235B-A22B-Thinking-2507", + "modelVariant": "235b" + }, + { + "providerId": "modelscope", + "modelId": "qwen3-a3b", + "apiModelId": "Qwen/Qwen3-30B-A3B", + "modelVariant": "30b" + }, + { + "providerId": "modelscope", + "modelId": "qwen3-a3b-thinking-2507", + "apiModelId": "Qwen/Qwen3-30B-A3B-Thinking-2507", + "modelVariant": "30b" + }, + { + "providerId": "modelscope", + "modelId": "qwen3-coder-a35b-instruct", + "apiModelId": "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "modelVariant": "480b" + }, + { + "providerId": "modelscope", + "modelId": "qwen3-coder-a3b-instruct", + "apiModelId": "Qwen/Qwen3-Coder-30B-A3B-Instruct", + "modelVariant": "30b" + }, + { + "providerId": "modelscope", + "modelId": "qwen3-next-a3b", + "apiModelId": "Qwen/Qwen3-Next-80B-A3B-Thinking", + "modelVariant": "80b-thinking" + }, + { + "providerId": "modelscope", + "modelId": "qwen3-next-a3b-instruct", + "apiModelId": "Qwen/Qwen3-Next-80B-A3B-Instruct", + "modelVariant": "80b" + }, + { + "providerId": "modelscope", + "modelId": "qwen3-vl", + "apiModelId": "Qwen/Qwen3-VL-8B-Thinking", + "modelVariant": "8b-thinking" + }, + { + "providerId": "modelscope", + "modelId": "qwen3-vl-a22b-instruct", + "apiModelId": "Qwen/Qwen3-VL-235B-A22B-Instruct", + "modelVariant": "235b" + }, + { + "providerId": "modelscope", + "modelId": "qwen3-vl-instruct", + "apiModelId": "Qwen/Qwen3-VL-8B-Instruct", + "modelVariant": "8b" + }, + { + "providerId": "modelscope", + "modelId": "qwq", + "apiModelId": "Qwen/QwQ-32B", + "modelVariant": "32b" + }, + { + "providerId": "modelscope", + "modelId": "qwq-preview", + "apiModelId": "Qwen/QwQ-32B-Preview", + "modelVariant": "32b" + }, + { + "providerId": "modelscope", + "modelId": "step-3-5-flash", + "apiModelId": "stepfun-ai/Step-3.5-Flash" + }, + { + "providerId": "modelscope", + "modelId": "xiyansql-qwencoder-2412", + "apiModelId": "XGenerationLab/XiYanSQL-QwenCoder-32B-2412", + "modelVariant": "32b" + }, + { + "providerId": "modelscope", + "modelId": "xiyansql-qwencoder-2504", + "apiModelId": "XGenerationLab/XiYanSQL-QwenCoder-32B-2504", + "modelVariant": "32b" + }, + { + "providerId": "nvidia", + "modelId": "arctic-embed-l", + "apiModelId": "snowflake/arctic-embed-l" + }, + { + "providerId": "nvidia", + "modelId": "baichuan2-chat", + "apiModelId": "baichuan-inc/baichuan2-13b-chat", + "modelVariant": "13b" + }, + { + "providerId": "nvidia", + "modelId": "bge-m3", + "apiModelId": "baai/bge-m3" + }, + { + "providerId": "nvidia", + "modelId": "bielik-v2-3-instruct", + "apiModelId": "speakleash/bielik-11b-v2.3-instruct", + "modelVariant": "11b" + }, + { + "providerId": "nvidia", + "modelId": "bielik-v2-6-instruct", + "apiModelId": "speakleash/bielik-11b-v2.6-instruct", + "modelVariant": "11b" + }, + { + "providerId": "nvidia", + "modelId": "breeze-instruct", + "apiModelId": "mediatek/breeze-7b-instruct", + "modelVariant": "7b" + }, + { + "providerId": "nvidia", + "modelId": "chatglm3", + "apiModelId": "thudm/chatglm3-6b", + "modelVariant": "6b" + }, + { + "providerId": "nvidia", + "modelId": "codegemma", + "apiModelId": "google/codegemma-7b", + "modelVariant": "7b" + }, + { + "providerId": "nvidia", + "modelId": "codegemma-1-1", + "apiModelId": "google/codegemma-1.1-7b", + "modelVariant": "7b" + }, + { + "providerId": "nvidia", + "modelId": "codellama", + "apiModelId": "meta/codellama-70b", + "modelVariant": "70b" + }, + { + "providerId": "nvidia", + "modelId": "codestral-instruct-v0-1", + "apiModelId": "mistralai/codestral-22b-instruct-v0.1", + "modelVariant": "22b" + }, + { + "providerId": "nvidia", + "modelId": "colosseum_355b_instruct_16k", + "apiModelId": "igenius/colosseum_355b_instruct_16k" + }, + { + "providerId": "nvidia", + "modelId": "cosmos-reason2", + "apiModelId": "nvidia/cosmos-reason2-8b", + "modelVariant": "8b" + }, + { + "providerId": "nvidia", + "modelId": "dbrx-instruct", + "apiModelId": "databricks/dbrx-instruct" + }, + { + "providerId": "nvidia", + "modelId": "deepseek-coder-instruct", + "apiModelId": "deepseek-ai/deepseek-coder-6.7b-instruct", + "modelVariant": "7b" + }, + { + "providerId": "nvidia", + "modelId": "deepseek-r1-distill-llama", + "apiModelId": "deepseek-ai/deepseek-r1-distill-llama-8b", + "modelVariant": "8b" + }, + { + "providerId": "nvidia", + "modelId": "deepseek-r1-distill-qwen", + "apiModelId": "deepseek-ai/deepseek-r1-distill-qwen-14b", + "modelVariant": "14b" + }, + { + "providerId": "nvidia", + "modelId": "deepseek-r1-distill-qwen", + "apiModelId": "deepseek-ai/deepseek-r1-distill-qwen-32b", + "modelVariant": "32b" + }, + { + "providerId": "nvidia", + "modelId": "deepseek-r1-distill-qwen", + "apiModelId": "deepseek-ai/deepseek-r1-distill-qwen-7b", + "modelVariant": "7b" + }, + { + "providerId": "nvidia", + "modelId": "deepseek-v3-1", + "apiModelId": "deepseek-ai/deepseek-v3.1" + }, + { + "providerId": "nvidia", + "modelId": "deepseek-v3-1-terminus", + "apiModelId": "deepseek-ai/deepseek-v3.1-terminus" + }, + { + "providerId": "nvidia", + "modelId": "deepseek-v3-2", + "apiModelId": "deepseek-ai/deepseek-v3.2" + }, + { + "providerId": "nvidia", + "modelId": "deplot", + "apiModelId": "google/deplot" + }, + { + "providerId": "nvidia", + "modelId": "devstral-2-instruct-2512", + "apiModelId": "mistralai/devstral-2-123b-instruct-2512", + "modelVariant": "123b" + }, + { + "providerId": "nvidia", + "modelId": "dracarys-llama-3-1-instruct", + "apiModelId": "abacusai/dracarys-llama-3.1-70b-instruct", + "modelVariant": "70b" + }, + { + "providerId": "nvidia", + "modelId": "embed-qa-4", + "apiModelId": "nvidia/embed-qa-4" + }, + { + "providerId": "nvidia", + "modelId": "eurollm-instruct", + "apiModelId": "utter-project/eurollm-9b-instruct", + "modelVariant": "9b" + }, + { + "providerId": "nvidia", + "modelId": "falcon3-instruct", + "apiModelId": "tiiuae/falcon3-7b-instruct", + "modelVariant": "7b" + }, + { + "providerId": "nvidia", + "modelId": "fuyu", + "apiModelId": "adept/fuyu-8b", + "modelVariant": "8b" + }, + { + "providerId": "nvidia", + "modelId": "gemma", + "apiModelId": "google/gemma-2b", + "modelVariant": "2b" + }, + { + "providerId": "nvidia", + "modelId": "gemma", + "apiModelId": "google/gemma-7b", + "modelVariant": "7b" + }, + { + "providerId": "nvidia", + "modelId": "gemma-2-cpt-sahabatai-instruct", + "apiModelId": "gotocompany/gemma-2-9b-cpt-sahabatai-instruct", + "modelVariant": "9b" + }, + { + "providerId": "nvidia", + "modelId": "gemma-2-it", + "apiModelId": "google/gemma-2-27b-it", + "modelVariant": "27b" + }, + { + "providerId": "nvidia", + "modelId": "gemma-2-it", + "apiModelId": "google/gemma-2-2b-it", + "modelVariant": "2b" + }, + { + "providerId": "nvidia", + "modelId": "gemma-2-it", + "apiModelId": "google/gemma-2-9b-it", + "modelVariant": "9b" + }, + { + "providerId": "nvidia", + "modelId": "gemma-3-it", + "apiModelId": "google/gemma-3-12b-it", + "modelVariant": "12b" + }, + { + "providerId": "nvidia", + "modelId": "gemma-3-it", + "apiModelId": "google/gemma-3-1b-it", + "modelVariant": "1b" + }, + { + "providerId": "nvidia", + "modelId": "gemma-3-it", + "apiModelId": "google/gemma-3-27b-it", + "modelVariant": "27b" + }, + { + "providerId": "nvidia", + "modelId": "gemma-3-it", + "apiModelId": "google/gemma-3-4b-it", + "modelVariant": "4b" + }, + { + "providerId": "nvidia", + "modelId": "gemma-3n-e2b-it", + "apiModelId": "google/gemma-3n-e2b-it" + }, + { + "providerId": "nvidia", + "modelId": "gemma-3n-e4b-it", + "apiModelId": "google/gemma-3n-e4b-it" + }, + { + "providerId": "nvidia", + "modelId": "gliner-pii", + "apiModelId": "nvidia/gliner-pii" + }, + { + "providerId": "nvidia", + "modelId": "glm4-7", + "apiModelId": "z-ai/glm4.7" + }, + { + "providerId": "nvidia", + "modelId": "glm5", + "apiModelId": "z-ai/glm5" + }, + { + "providerId": "nvidia", + "modelId": "gpt-oss", + "apiModelId": "openai/gpt-oss-120b", + "modelVariant": "120b" + }, + { + "providerId": "nvidia", + "modelId": "gpt-oss", + "apiModelId": "openai/gpt-oss-120b", + "modelVariant": "120b" + }, + { + "providerId": "nvidia", + "modelId": "gpt-oss", + "apiModelId": "openai/gpt-oss-20b", + "modelVariant": "20b" + }, + { + "providerId": "nvidia", + "modelId": "gpt-oss", + "apiModelId": "openai/gpt-oss-20b", + "modelVariant": "20b" + }, + { + "providerId": "nvidia", + "modelId": "granite-3-0-a800m-instruct", + "apiModelId": "ibm/granite-3.0-3b-a800m-instruct", + "modelVariant": "3b" + }, + { + "providerId": "nvidia", + "modelId": "granite-3-0-instruct", + "apiModelId": "ibm/granite-3.0-8b-instruct", + "modelVariant": "8b" + }, + { + "providerId": "nvidia", + "modelId": "granite-3-3-instruct", + "apiModelId": "ibm/granite-3.3-8b-instruct", + "modelVariant": "8b" + }, + { + "providerId": "nvidia", + "modelId": "granite-code-instruct", + "apiModelId": "ibm/granite-34b-code-instruct", + "modelVariant": "34b" + }, + { + "providerId": "nvidia", + "modelId": "granite-code-instruct", + "apiModelId": "ibm/granite-8b-code-instruct", + "modelVariant": "8b" + }, + { + "providerId": "nvidia", + "modelId": "granite-guardian-3-0", + "apiModelId": "ibm/granite-guardian-3.0-8b", + "modelVariant": "8b" + }, + { + "providerId": "nvidia", + "modelId": "italia_10b_instruct_16k", + "apiModelId": "igenius/italia_10b_instruct_16k" + }, + { + "providerId": "nvidia", + "modelId": "jamba-1-5-large-instruct", + "apiModelId": "ai21labs/jamba-1.5-large-instruct" + }, + { + "providerId": "nvidia", + "modelId": "jamba-1-5-mini-instruct", + "apiModelId": "ai21labs/jamba-1.5-mini-instruct" + }, + { + "providerId": "nvidia", + "modelId": "kimi-k2", + "apiModelId": "moonshotai/kimi-k2-thinking", + "modelVariant": "thinking" + }, + { + "providerId": "nvidia", + "modelId": "kimi-k2-5", + "apiModelId": "moonshotai/kimi-k2.5" + }, + { + "providerId": "nvidia", + "modelId": "kimi-k2-instruct", + "apiModelId": "moonshotai/kimi-k2-instruct" + }, + { + "providerId": "nvidia", + "modelId": "kimi-k2-instruct-0905", + "apiModelId": "moonshotai/kimi-k2-instruct-0905" + }, + { + "providerId": "nvidia", + "modelId": "kosmos-2", + "apiModelId": "microsoft/kosmos-2" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-1-instruct", + "apiModelId": "meta/llama-3.1-405b-instruct", + "modelVariant": "405b" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-1-instruct", + "apiModelId": "meta/llama-3.1-70b-instruct", + "modelVariant": "70b" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-1-instruct", + "apiModelId": "meta/llama-3.1-8b-instruct", + "modelVariant": "8b" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-1-nemoguard-content-safety", + "apiModelId": "nvidia/llama-3.1-nemoguard-8b-content-safety", + "modelVariant": "8b" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-1-nemoguard-topic-control", + "apiModelId": "nvidia/llama-3.1-nemoguard-8b-topic-control", + "modelVariant": "8b" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-1-nemotron-instruct", + "apiModelId": "nvidia/llama-3.1-nemotron-51b-instruct", + "modelVariant": "51b" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-1-nemotron-instruct", + "apiModelId": "nvidia/llama-3.1-nemotron-70b-instruct", + "modelVariant": "70b" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-1-nemotron-nano-v1", + "apiModelId": "nvidia/llama-3.1-nemotron-nano-8b-v1", + "modelVariant": "8b" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-1-nemotron-nano-v1-1", + "apiModelId": "nvidia/llama-3.1-nemotron-nano-4b-v1.1", + "modelVariant": "4b" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-1-nemotron-nano-vl-v1", + "apiModelId": "nvidia/llama-3.1-nemotron-nano-vl-8b-v1", + "modelVariant": "8b" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-1-nemotron-reward", + "apiModelId": "nvidia/llama-3.1-nemotron-70b-reward", + "modelVariant": "70b" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-1-nemotron-safety-guard-v3", + "apiModelId": "nvidia/llama-3.1-nemotron-safety-guard-8b-v3", + "modelVariant": "8b" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-1-nemotron-ultra-v1", + "apiModelId": "nvidia/llama-3.1-nemotron-ultra-253b-v1", + "modelVariant": "253b" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-1-swallow-instruct-v0-1", + "apiModelId": "institute-of-science-tokyo/llama-3.1-swallow-70b-instruct-v0.1", + "modelVariant": "70b" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-1-swallow-instruct-v0-1", + "apiModelId": "institute-of-science-tokyo/llama-3.1-swallow-8b-instruct-v0.1", + "modelVariant": "8b" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-2-instruct", + "apiModelId": "meta/llama-3.2-1b-instruct", + "modelVariant": "1b" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-2-instruct", + "apiModelId": "meta/llama-3.2-3b-instruct", + "modelVariant": "3b" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-2-nemoretriever-300m-embed-v1", + "apiModelId": "nvidia/llama-3.2-nemoretriever-300m-embed-v1" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-2-nemoretriever-vlm-embed-v1", + "apiModelId": "nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1", + "modelVariant": "1b" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-2-nv-embedqa-v1", + "apiModelId": "nvidia/llama-3.2-nv-embedqa-1b-v1", + "modelVariant": "1b" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-2-nv-embedqa-v2", + "apiModelId": "nvidia/llama-3.2-nv-embedqa-1b-v2", + "modelVariant": "1b" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-2-vision-instruct", + "apiModelId": "meta/llama-3.2-11b-vision-instruct", + "modelVariant": "11b" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-2-vision-instruct", + "apiModelId": "meta/llama-3.2-90b-vision-instruct", + "modelVariant": "90b" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-3-instruct", + "apiModelId": "meta/llama-3.3-70b-instruct", + "modelVariant": "70b" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-3-nemotron-super-v1", + "apiModelId": "nvidia/llama-3.3-nemotron-super-49b-v1", + "modelVariant": "49b" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-3-nemotron-super-v1-5", + "apiModelId": "nvidia/llama-3.3-nemotron-super-49b-v1.5", + "modelVariant": "49b" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-swallow-instruct-v0-1", + "apiModelId": "tokyotech-llm/llama-3-swallow-70b-instruct-v0.1", + "modelVariant": "70b" + }, + { + "providerId": "nvidia", + "modelId": "llama-3-taiwan-instruct", + "apiModelId": "yentinglin/llama-3-taiwan-70b-instruct", + "modelVariant": "70b" + }, + { + "providerId": "nvidia", + "modelId": "llama-4-maverick-128e-instruct", + "apiModelId": "meta/llama-4-maverick-17b-128e-instruct", + "modelVariant": "17b" + }, + { + "providerId": "nvidia", + "modelId": "llama-4-scout-16e-instruct", + "apiModelId": "meta/llama-4-scout-17b-16e-instruct", + "modelVariant": "17b" + }, + { + "providerId": "nvidia", + "modelId": "llama-guard-4", + "apiModelId": "meta/llama-guard-4-12b", + "modelVariant": "12b" + }, + { + "providerId": "nvidia", + "modelId": "llama-nemotron-embed-v2", + "apiModelId": "nvidia/llama-nemotron-embed-1b-v2", + "modelVariant": "1b" + }, + { + "providerId": "nvidia", + "modelId": "llama-nemotron-embed-vl-v2", + "apiModelId": "nvidia/llama-nemotron-embed-vl-1b-v2", + "modelVariant": "1b" + }, + { + "providerId": "nvidia", + "modelId": "llama2", + "apiModelId": "meta/llama2-70b", + "modelVariant": "70b" + }, + { + "providerId": "nvidia", + "modelId": "llama3-chatqa-1-5", + "apiModelId": "nvidia/llama3-chatqa-1.5-70b", + "modelVariant": "70b" + }, + { + "providerId": "nvidia", + "modelId": "llama3-chatqa-1-5", + "apiModelId": "nvidia/llama3-chatqa-1.5-8b", + "modelVariant": "8b" + }, + { + "providerId": "nvidia", + "modelId": "llama3-instruct", + "apiModelId": "meta/llama3-70b-instruct", + "modelVariant": "70b" + }, + { + "providerId": "nvidia", + "modelId": "llama3-instruct", + "apiModelId": "meta/llama3-8b-instruct", + "modelVariant": "8b" + }, + { + "providerId": "nvidia", + "modelId": "magistral-small-2506", + "apiModelId": "mistralai/magistral-small-2506" + }, + { + "providerId": "nvidia", + "modelId": "mamba-codestral-v0-1", + "apiModelId": "mistralai/mamba-codestral-7b-v0.1", + "modelVariant": "7b" + }, + { + "providerId": "nvidia", + "modelId": "marin-instruct", + "apiModelId": "marin/marin-8b-instruct", + "modelVariant": "8b" + }, + { + "providerId": "nvidia", + "modelId": "mathstral-v0-1", + "apiModelId": "mistralai/mathstral-7b-v0.1", + "modelVariant": "7b" + }, + { + "providerId": "nvidia", + "modelId": "minimax-m2-1", + "apiModelId": "minimaxai/minimax-m2.1" + }, + { + "providerId": "nvidia", + "modelId": "minimax-m2-5", + "apiModelId": "minimaxai/minimax-m2.5" + }, + { + "providerId": "nvidia", + "modelId": "ministral-instruct-2512", + "apiModelId": "mistralai/ministral-14b-instruct-2512", + "modelVariant": "14b" + }, + { + "providerId": "nvidia", + "modelId": "mistral-instruct-v0-2", + "apiModelId": "mistralai/mistral-7b-instruct-v0.2", + "modelVariant": "7b" + }, + { + "providerId": "nvidia", + "modelId": "mistral-instruct-v0-3", + "apiModelId": "mistralai/mistral-7b-instruct-v0.3", + "modelVariant": "7b" + }, + { + "providerId": "nvidia", + "modelId": "mistral-large", + "apiModelId": "mistralai/mistral-large" + }, + { + "providerId": "nvidia", + "modelId": "mistral-large-2-instruct", + "apiModelId": "mistralai/mistral-large-2-instruct" + }, + { + "providerId": "nvidia", + "modelId": "mistral-large-3-instruct-2512", + "apiModelId": "mistralai/mistral-large-3-675b-instruct-2512", + "modelVariant": "675b" + }, + { + "providerId": "nvidia", + "modelId": "mistral-medium-3-instruct", + "apiModelId": "mistralai/mistral-medium-3-instruct" + }, + { + "providerId": "nvidia", + "modelId": "mistral-nemo-instruct", + "apiModelId": "nv-mistralai/mistral-nemo-12b-instruct", + "modelVariant": "12b" + }, + { + "providerId": "nvidia", + "modelId": "mistral-nemo-minitron-8k-instruct", + "apiModelId": "nvidia/mistral-nemo-minitron-8b-8k-instruct", + "modelVariant": "8b" + }, + { + "providerId": "nvidia", + "modelId": "mistral-nemo-minitron-base", + "apiModelId": "nvidia/mistral-nemo-minitron-8b-base", + "modelVariant": "8b" + }, + { + "providerId": "nvidia", + "modelId": "mistral-nemotron", + "apiModelId": "mistralai/mistral-nemotron" + }, + { + "providerId": "nvidia", + "modelId": "mistral-small-3-1-instruct-2503", + "apiModelId": "mistralai/mistral-small-3.1-24b-instruct-2503", + "modelVariant": "24b" + }, + { + "providerId": "nvidia", + "modelId": "mistral-small-instruct", + "apiModelId": "mistralai/mistral-small-24b-instruct", + "modelVariant": "24b" + }, + { + "providerId": "nvidia", + "modelId": "mixtral-8x22b-instruct-v0-1", + "apiModelId": "mistralai/mixtral-8x22b-instruct-v0.1" + }, + { + "providerId": "nvidia", + "modelId": "mixtral-8x22b-v0-1", + "apiModelId": "mistralai/mixtral-8x22b-v0.1" + }, + { + "providerId": "nvidia", + "modelId": "mixtral-8x7b-instruct-v0-1", + "apiModelId": "mistralai/mixtral-8x7b-instruct-v0.1" + }, + { + "providerId": "nvidia", + "modelId": "nemoretriever-parse", + "apiModelId": "nvidia/nemoretriever-parse" + }, + { + "providerId": "nvidia", + "modelId": "nemotron-3-nano-a3b", + "apiModelId": "nvidia/nemotron-3-nano-30b-a3b", + "modelVariant": "30b" + }, + { + "providerId": "nvidia", + "modelId": "nemotron-4-instruct", + "apiModelId": "nvidia/nemotron-4-340b-instruct", + "modelVariant": "340b" + }, + { + "providerId": "nvidia", + "modelId": "nemotron-4-mini-hindi-instruct", + "apiModelId": "nvidia/nemotron-4-mini-hindi-4b-instruct", + "modelVariant": "4b" + }, + { + "providerId": "nvidia", + "modelId": "nemotron-4-reward", + "apiModelId": "nvidia/nemotron-4-340b-reward", + "modelVariant": "340b" + }, + { + "providerId": "nvidia", + "modelId": "nemotron-content-safety-reasoning", + "apiModelId": "nvidia/nemotron-content-safety-reasoning-4b", + "modelVariant": "4b" + }, + { + "providerId": "nvidia", + "modelId": "nemotron-mini-instruct", + "apiModelId": "nvidia/nemotron-mini-4b-instruct", + "modelVariant": "4b" + }, + { + "providerId": "nvidia", + "modelId": "nemotron-nano-3-a3b", + "apiModelId": "nvidia/nemotron-nano-3-30b-a3b", + "modelVariant": "30b" + }, + { + "providerId": "nvidia", + "modelId": "nemotron-nano-v2", + "apiModelId": "nvidia/nvidia-nemotron-nano-9b-v2", + "modelVariant": "9b" + }, + { + "providerId": "nvidia", + "modelId": "nemotron-nano-v2-vl", + "apiModelId": "nvidia/nemotron-nano-12b-v2-vl", + "modelVariant": "12b" + }, + { + "providerId": "nvidia", + "modelId": "nemotron-parse", + "apiModelId": "nvidia/nemotron-parse" + }, + { + "providerId": "nvidia", + "modelId": "neva", + "apiModelId": "nvidia/neva-22b", + "modelVariant": "22b" + }, + { + "providerId": "nvidia", + "modelId": "nv-embed-v1", + "apiModelId": "nvidia/nv-embed-v1" + }, + { + "providerId": "nvidia", + "modelId": "nv-embedcode-v1", + "apiModelId": "nvidia/nv-embedcode-7b-v1", + "modelVariant": "7b" + }, + { + "providerId": "nvidia", + "modelId": "nv-embedqa-e5-v5", + "apiModelId": "nvidia/nv-embedqa-e5-v5" + }, + { + "providerId": "nvidia", + "modelId": "nv-embedqa-mistral-v2", + "apiModelId": "nvidia/nv-embedqa-mistral-7b-v2", + "modelVariant": "7b" + }, + { + "providerId": "nvidia", + "modelId": "nvclip", + "apiModelId": "nvidia/nvclip" + }, + { + "providerId": "nvidia", + "modelId": "paligemma", + "apiModelId": "google/paligemma" + }, + { + "providerId": "nvidia", + "modelId": "palmyra-creative", + "apiModelId": "writer/palmyra-creative-122b", + "modelVariant": "122b" + }, + { + "providerId": "nvidia", + "modelId": "palmyra-fin-32k", + "apiModelId": "writer/palmyra-fin-70b-32k", + "modelVariant": "70b" + }, + { + "providerId": "nvidia", + "modelId": "palmyra-med", + "apiModelId": "writer/palmyra-med-70b", + "modelVariant": "70b" + }, + { + "providerId": "nvidia", + "modelId": "palmyra-med-32k", + "apiModelId": "writer/palmyra-med-70b-32k", + "modelVariant": "70b" + }, + { + "providerId": "nvidia", + "modelId": "phi-3-5-mini-instruct", + "apiModelId": "microsoft/phi-3.5-mini-instruct" + }, + { + "providerId": "nvidia", + "modelId": "phi-3-5-moe-instruct", + "apiModelId": "microsoft/phi-3.5-moe-instruct" + }, + { + "providerId": "nvidia", + "modelId": "phi-3-5-vision-instruct", + "apiModelId": "microsoft/phi-3.5-vision-instruct" + }, + { + "providerId": "nvidia", + "modelId": "phi-3-medium-128k-instruct", + "apiModelId": "microsoft/phi-3-medium-128k-instruct" + }, + { + "providerId": "nvidia", + "modelId": "phi-3-medium-4k-instruct", + "apiModelId": "microsoft/phi-3-medium-4k-instruct" + }, + { + "providerId": "nvidia", + "modelId": "phi-3-mini-128k-instruct", + "apiModelId": "microsoft/phi-3-mini-128k-instruct" + }, + { + "providerId": "nvidia", + "modelId": "phi-3-mini-4k-instruct", + "apiModelId": "microsoft/phi-3-mini-4k-instruct" + }, + { + "providerId": "nvidia", + "modelId": "phi-3-small-128k-instruct", + "apiModelId": "microsoft/phi-3-small-128k-instruct" + }, + { + "providerId": "nvidia", + "modelId": "phi-3-small-8k-instruct", + "apiModelId": "microsoft/phi-3-small-8k-instruct" + }, + { + "providerId": "nvidia", + "modelId": "phi-3-vision-128k-instruct", + "apiModelId": "microsoft/phi-3-vision-128k-instruct" + }, + { + "providerId": "nvidia", + "modelId": "phi-4-mini-flash", + "apiModelId": "microsoft/phi-4-mini-flash-reasoning", + "modelVariant": "reasoning" + }, + { + "providerId": "nvidia", + "modelId": "phi-4-mini-instruct", + "apiModelId": "microsoft/phi-4-mini-instruct" + }, + { + "providerId": "nvidia", + "modelId": "phi-4-multimodal-instruct", + "apiModelId": "microsoft/phi-4-multimodal-instruct" + }, + { + "providerId": "nvidia", + "modelId": "qwen2-5-coder-instruct", + "apiModelId": "qwen/qwen2.5-coder-32b-instruct", + "modelVariant": "32b" + }, + { + "providerId": "nvidia", + "modelId": "qwen2-5-coder-instruct", + "apiModelId": "qwen/qwen2.5-coder-7b-instruct", + "modelVariant": "7b" + }, + { + "providerId": "nvidia", + "modelId": "qwen2-5-instruct", + "apiModelId": "qwen/qwen2.5-7b-instruct", + "modelVariant": "7b" + }, + { + "providerId": "nvidia", + "modelId": "qwen2-instruct", + "apiModelId": "qwen/qwen2-7b-instruct", + "modelVariant": "7b" + }, + { + "providerId": "nvidia", + "modelId": "qwen3-5-a10b", + "apiModelId": "qwen/qwen3.5-122b-a10b", + "modelVariant": "122b" + }, + { + "providerId": "nvidia", + "modelId": "qwen3-5-a17b", + "apiModelId": "qwen/qwen3.5-397b-a17b", + "modelVariant": "397b" + }, + { + "providerId": "nvidia", + "modelId": "qwen3-coder-a35b-instruct", + "apiModelId": "qwen/qwen3-coder-480b-a35b-instruct", + "modelVariant": "480b" + }, + { + "providerId": "nvidia", + "modelId": "qwen3-next-a3b", + "apiModelId": "qwen/qwen3-next-80b-a3b-thinking", + "modelVariant": "80b-thinking" + }, + { + "providerId": "nvidia", + "modelId": "qwen3-next-a3b-instruct", + "apiModelId": "qwen/qwen3-next-80b-a3b-instruct", + "modelVariant": "80b" + }, + { + "providerId": "nvidia", + "modelId": "qwq", + "apiModelId": "qwen/qwq-32b", + "modelVariant": "32b" + }, + { + "providerId": "nvidia", + "modelId": "rakutenai-chat", + "apiModelId": "rakuten/rakutenai-7b-chat", + "modelVariant": "7b" + }, + { + "providerId": "nvidia", + "modelId": "rakutenai-instruct", + "apiModelId": "rakuten/rakutenai-7b-instruct", + "modelVariant": "7b" + }, + { + "providerId": "nvidia", + "modelId": "recurrentgemma", + "apiModelId": "google/recurrentgemma-2b", + "modelVariant": "2b" + }, + { + "providerId": "nvidia", + "modelId": "riva-translate-instruct", + "apiModelId": "nvidia/riva-translate-4b-instruct", + "modelVariant": "4b" + }, + { + "providerId": "nvidia", + "modelId": "riva-translate-instruct-v1-1", + "apiModelId": "nvidia/riva-translate-4b-instruct-v1.1", + "modelVariant": "4b" + }, + { + "providerId": "nvidia", + "modelId": "sarvam-m", + "apiModelId": "sarvamai/sarvam-m" + }, + { + "providerId": "nvidia", + "modelId": "sea-lion-instruct", + "apiModelId": "aisingapore/sea-lion-7b-instruct", + "modelVariant": "7b" + }, + { + "providerId": "nvidia", + "modelId": "seed-oss-instruct", + "apiModelId": "bytedance/seed-oss-36b-instruct", + "modelVariant": "36b" + }, + { + "providerId": "nvidia", + "modelId": "shieldgemma", + "apiModelId": "google/shieldgemma-9b", + "modelVariant": "9b" + }, + { + "providerId": "nvidia", + "modelId": "solar-instruct", + "apiModelId": "upstage/solar-10.7b-instruct", + "modelVariant": "7b" + }, + { + "providerId": "nvidia", + "modelId": "starcoder2", + "apiModelId": "bigcode/starcoder2-15b", + "modelVariant": "15b" + }, + { + "providerId": "nvidia", + "modelId": "starcoder2", + "apiModelId": "bigcode/starcoder2-7b", + "modelVariant": "7b" + }, + { + "providerId": "nvidia", + "modelId": "step-3-5-flash", + "apiModelId": "stepfun-ai/step-3.5-flash" + }, + { + "providerId": "nvidia", + "modelId": "stockmark-2-instruct", + "apiModelId": "stockmark/stockmark-2-100b-instruct", + "modelVariant": "100b" + }, + { + "providerId": "nvidia", + "modelId": "streampetr", + "apiModelId": "nvidia/streampetr" + }, + { + "providerId": "nvidia", + "modelId": "teuken-instruct-commercial-v0-4", + "apiModelId": "opengpt-x/teuken-7b-instruct-commercial-v0.4", + "modelVariant": "7b" + }, + { + "providerId": "nvidia", + "modelId": "usdcode-llama-3-1-instruct", + "apiModelId": "nvidia/usdcode-llama-3.1-70b-instruct", + "modelVariant": "70b" + }, + { + "providerId": "nvidia", + "modelId": "vila", + "apiModelId": "nvidia/vila" + }, + { + "providerId": "nvidia", + "modelId": "yi-large", + "apiModelId": "01-ai/yi-large" + }, + { + "providerId": "nvidia", + "modelId": "zamba2-instruct", + "apiModelId": "zyphra/zamba2-7b-instruct", + "modelVariant": "7b" + }, + { + "providerId": "openrouter", + "modelId": "aion-1-0", + "apiModelId": "aion-labs/aion-1.0", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "aion-1-0-mini", + "apiModelId": "aion-labs/aion-1.0-mini", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "aion-2-0", + "apiModelId": "aion-labs/aion-2.0", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "aion-rp-llama-3-1", + "apiModelId": "aion-labs/aion-rp-llama-3.1-8b", + "modelVariant": "8b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "all-minilm-l12-v2", + "apiModelId": "sentence-transformers/all-minilm-l12-v2", + "inputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "all-minilm-l6-v2", + "apiModelId": "sentence-transformers/all-minilm-l6-v2", + "inputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "all-mpnet-base-v2", + "apiModelId": "sentence-transformers/all-mpnet-base-v2", + "inputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "auto", + "apiModelId": "openrouter/auto", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 2000000 + }, + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text", "image"] + }, + { + "providerId": "openrouter", + "modelId": "bge-base-en-v1-5", + "apiModelId": "baai/bge-base-en-v1.5", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 512 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.005 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "bge-large-en-v1-5", + "apiModelId": "baai/bge-large-en-v1.5", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 512 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.01 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "bge-m3", + "apiModelId": "baai/bge-m3", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 8192 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.01 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "bodybuilder", + "apiModelId": "openrouter/bodybuilder", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "claude-3-5-haiku", + "apiModelId": "anthropic/claude-3.5-haiku", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.08 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "claude-3-5-sonnet", + "apiModelId": "anthropic/claude-3.5-sonnet", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 30 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "claude-3-7-sonnet", + "apiModelId": "anthropic/claude-3.7-sonnet", + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "claude-3-7-sonnet", + "apiModelId": "anthropic/claude-3.7-sonnet:thinking", + "modelVariant": "thinking", + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "claude-3-haiku", + "apiModelId": "anthropic/claude-3-haiku", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "claude-haiku-4-5", + "apiModelId": "anthropic/claude-haiku-4.5", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + } + }, + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "claude-opus-4", + "apiModelId": "anthropic/claude-opus-4", + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "claude-opus-4-1", + "apiModelId": "anthropic/claude-opus-4.1", + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "claude-opus-4-5", + "apiModelId": "anthropic/claude-opus-4.5", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "claude-opus-4-6", + "apiModelId": "anthropic/claude-opus-4.6", + "limits": { + "contextWindow": 1000000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "claude-sonnet-4", + "apiModelId": "anthropic/claude-sonnet-4", + "limits": { + "contextWindow": 200000, + "maxOutputTokens": 64000 + }, + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "claude-sonnet-4-5", + "apiModelId": "anthropic/claude-sonnet-4.5", + "limits": { + "contextWindow": 1000000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "claude-sonnet-4-6", + "apiModelId": "anthropic/claude-sonnet-4.6", + "limits": { + "contextWindow": 1000000, + "maxOutputTokens": 128000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "codellama-instruct-solidity", + "apiModelId": "alfredpros/codellama-7b-instruct-solidity", + "modelVariant": "7b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "coder-large", + "apiModelId": "arcee-ai/coder-large", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "codestral-2508", + "apiModelId": "mistralai/codestral-2508", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.8999999999999999 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "codestral-embed-2505", + "apiModelId": "mistralai/codestral-embed-2505", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "cogito-v2-1", + "apiModelId": "deepcogito/cogito-v2.1-671b", + "modelVariant": "671b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "command-a", + "apiModelId": "cohere/command-a", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "maxOutputTokens": 8192 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "command-r-08-2024", + "apiModelId": "cohere/command-r-08-2024", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "command-r-plus-08-2024", + "apiModelId": "cohere/command-r-plus-08-2024", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "command-r7b-12-2024", + "apiModelId": "cohere/command-r7b-12-2024", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "cydonia-v4-1", + "apiModelId": "thedrummer/cydonia-24b-v4.1", + "modelVariant": "24b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "deepseek-chat", + "apiModelId": "deepseek/deepseek-chat", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 163840, + "maxOutputTokens": 163840 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.32 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.8899999999999999 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "deepseek-chat-v3-0324", + "apiModelId": "deepseek/deepseek-chat-v3-0324", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 163840, + "maxOutputTokens": 163840 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.77 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.13 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "deepseek-chat-v3-1", + "apiModelId": "deepseek/deepseek-chat-v3.1", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 32768, + "maxOutputTokens": 7168 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.75 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "deepseek-r1", + "apiModelId": "deepseek/deepseek-r1", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 64000, + "maxOutputTokens": 16000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.5 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "deepseek-r1-0528", + "apiModelId": "deepseek/deepseek-r1-0528", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 163840, + "maxOutputTokens": 65536 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.44999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.1500000000000004 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.22499999999999998 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "deepseek-r1-distill-llama", + "apiModelId": "deepseek/deepseek-r1-distill-llama-70b", + "modelVariant": "70b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 16384 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.7999999999999999 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "deepseek-r1-distill-qwen", + "apiModelId": "deepseek/deepseek-r1-distill-qwen-32b", + "modelVariant": "32b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 32768, + "maxOutputTokens": 32768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.29 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.29 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "deepseek-r1t2-chimera", + "apiModelId": "tngtech/deepseek-r1t2-chimera", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "deepseek-v3-1-nex-n1", + "apiModelId": "nex-agi/deepseek-v3.1-nex-n1", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 163840 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.27 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "deepseek-v3-1-terminus", + "apiModelId": "deepseek/deepseek-v3.1-terminus", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 163840 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.21 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.7899999999999999 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.1300000002 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "deepseek-v3-1-terminus", + "apiModelId": "deepseek/deepseek-v3.1-terminus:exacto", + "modelVariant": "exacto", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 163840 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.21 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.7899999999999999 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.16799999999999998 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "deepseek-v3-2", + "apiModelId": "deepseek/deepseek-v3.2", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 163840, + "maxOutputTokens": 65536 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "deepseek-v3-2-exp", + "apiModelId": "deepseek/deepseek-v3.2-exp", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "maxOutputTokens": 65536 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.27 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.41 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "deepseek-v3-2-speciale", + "apiModelId": "deepseek/deepseek-v3.2-speciale", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "maxOutputTokens": 163840 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "devstral", + "apiModelId": "mistralai/devstral-medium", + "modelVariant": "medium", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "devstral-2512", + "apiModelId": "mistralai/devstral-2512", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "devstral-small", + "apiModelId": "mistralai/devstral-small", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "dolphin-mistral-venice-edition", + "apiModelId": "cognitivecomputations/dolphin-mistral-24b-venice-edition:free", + "modelVariant": "24b-free", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "e5-base-v2", + "apiModelId": "intfloat/e5-base-v2", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "e5-large-v2", + "apiModelId": "intfloat/e5-large-v2", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "ernie-4-5-a3b", + "apiModelId": "baidu/ernie-4.5-21b-a3b", + "modelVariant": "21b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "ernie-4-5-a3b", + "apiModelId": "baidu/ernie-4.5-21b-a3b-thinking", + "modelVariant": "21b-thinking", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 65536 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "ernie-4-5-a47b", + "apiModelId": "baidu/ernie-4.5-300b-a47b", + "modelVariant": "300b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 123000, + "maxOutputTokens": 12000 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "ernie-4-5-vl-a3b", + "apiModelId": "baidu/ernie-4.5-vl-28b-a3b", + "modelVariant": "28b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 30000, + "maxOutputTokens": 8000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.14 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.56 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "ernie-4-5-vl-a47b", + "apiModelId": "baidu/ernie-4.5-vl-424b-a47b", + "modelVariant": "424b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "free", + "apiModelId": "openrouter/free", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gemini-2-0-flash-001", + "apiModelId": "google/gemini-2.0-flash-001", + "limits": { + "contextWindow": 1048576 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.024999999999999998 + } + }, + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gemini-2-0-flash-lite-001", + "apiModelId": "google/gemini-2.0-flash-lite-001", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.075 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gemini-2-5-flash", + "apiModelId": "google/gemini-2.5-flash", + "limits": { + "maxOutputTokens": 65535 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.03 + } + }, + "inputModalities": ["image", "text", "audio", "video"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gemini-2-5-flash-image", + "apiModelId": "google/gemini-2.5-flash-image", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.03 + } + }, + "inputModalities": ["image", "text"], + "outputModalities": ["image", "text"] + }, + { + "providerId": "openrouter", + "modelId": "gemini-2-5-flash-lite", + "apiModelId": "google/gemini-2.5-flash-lite", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.01 + } + }, + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gemini-2-5-flash-lite-preview-09-2025", + "apiModelId": "google/gemini-2.5-flash-lite-preview-09-2025", + "limits": { + "contextWindow": 1048576 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.01 + } + }, + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gemini-2-5-pro", + "apiModelId": "google/gemini-2.5-pro", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.125 + } + }, + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gemini-2-5-pro-preview", + "apiModelId": "google/gemini-2.5-pro-preview", + "inputModalities": ["image", "text", "audio"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gemini-2-5-pro-preview-05-06", + "apiModelId": "google/gemini-2.5-pro-preview-05-06", + "limits": { + "maxOutputTokens": 65535 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.125 + } + }, + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gemini-3-1-flash-image-preview", + "apiModelId": "google/gemini-3.1-flash-image-preview", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["image", "text"], + "outputModalities": ["image", "text"] + }, + { + "providerId": "openrouter", + "modelId": "gemini-3-1-flash-lite-preview", + "apiModelId": "google/gemini-3.1-flash-lite-preview", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text", "image", "video", "audio"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gemini-3-1-pro-preview", + "apiModelId": "google/gemini-3.1-pro-preview", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 1048576, + "maxOutputTokens": 65536 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 12 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + } + }, + "inputModalities": ["audio", "image", "text", "video"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gemini-3-1-pro-preview-customtools", + "apiModelId": "google/gemini-3.1-pro-preview-customtools", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 12 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + } + }, + "inputModalities": ["text", "audio", "image", "video"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gemini-3-flash-preview", + "apiModelId": "google/gemini-3-flash-preview", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + } + }, + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gemini-3-pro-image-preview", + "apiModelId": "google/gemini-3-pro-image-preview", + "limits": { + "contextWindow": 65536, + "maxOutputTokens": 32768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 12 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + } + }, + "inputModalities": ["image", "text"], + "outputModalities": ["image", "text"] + }, + { + "providerId": "openrouter", + "modelId": "gemini-3-pro-preview", + "apiModelId": "google/gemini-3-pro-preview", + "limits": { + "contextWindow": 1048576, + "maxOutputTokens": 65536 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 12 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + } + }, + "inputModalities": ["text", "image", "audio", "video"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gemini-embedding-001", + "apiModelId": "google/gemini-embedding-001", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 20000 + }, + "inputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gemma-2-it", + "apiModelId": "google/gemma-2-27b-it", + "modelVariant": "27b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 8192, + "maxOutputTokens": 2048 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.65 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.65 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gemma-2-it", + "apiModelId": "google/gemma-2-9b-it", + "modelVariant": "9b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 8192 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.03 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.09 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gemma-3-it", + "apiModelId": "google/gemma-3-12b-it", + "modelVariant": "12b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.04 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.13 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gemma-3-it", + "apiModelId": "google/gemma-3-12b-it:free", + "modelVariant": "12b-free", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 32768, + "maxOutputTokens": 8192 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gemma-3-it", + "apiModelId": "google/gemma-3-27b-it", + "modelVariant": "27b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "maxOutputTokens": 65536 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.03 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.11 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.015 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gemma-3-it", + "apiModelId": "google/gemma-3-27b-it:free", + "modelVariant": "27b-free", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 8192 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gemma-3-it", + "apiModelId": "google/gemma-3-4b-it", + "modelVariant": "4b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.04 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.08 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gemma-3-it", + "apiModelId": "google/gemma-3-4b-it:free", + "modelVariant": "4b-free", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 32768, + "maxOutputTokens": 8192 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gemma-3n-e2b-it", + "apiModelId": "google/gemma-3n-e2b-it:free", + "modelVariant": "free", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 8192, + "maxOutputTokens": 2048 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gemma-3n-e4b-it", + "apiModelId": "google/gemma-3n-e4b-it", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 32768 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gemma-3n-e4b-it", + "apiModelId": "google/gemma-3n-e4b-it:free", + "modelVariant": "free", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 8192, + "maxOutputTokens": 2048 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "glm-4", + "apiModelId": "z-ai/glm-4-32b", + "modelVariant": "32b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "glm-4-5", + "apiModelId": "z-ai/glm-4.5", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.11 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "glm-4-5-air", + "apiModelId": "z-ai/glm-4.5-air", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.13 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.85 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.024999999999999998 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "glm-4-5-air", + "apiModelId": "z-ai/glm-4.5-air:free", + "modelVariant": "free", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "maxOutputTokens": 96000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "glm-4-5v", + "apiModelId": "z-ai/glm-4.5v", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 65536 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.7999999999999998 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.11 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "glm-4-6", + "apiModelId": "z-ai/glm-4.6", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 204800, + "maxOutputTokens": 204800 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.9 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "glm-4-6", + "apiModelId": "z-ai/glm-4.6:exacto", + "modelVariant": "exacto", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 204800 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.44 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.76 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.11 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "glm-4-6v", + "apiModelId": "z-ai/glm-4.6v", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.8999999999999999 + } + }, + "inputModalities": ["image", "text", "video"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "glm-4-7", + "apiModelId": "z-ai/glm-4.7", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 202752 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.38 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.9800000000000002 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.19 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "glm-4-7-flash", + "apiModelId": "z-ai/glm-4.7-flash", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 202752 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.06 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.0100000002 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "glm-5", + "apiModelId": "z-ai/glm-5", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 202752 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.56 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.16 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "goliath", + "apiModelId": "alpindale/goliath-120b", + "modelVariant": "120b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-3-5-turbo", + "apiModelId": "openai/gpt-3.5-turbo", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-3-5-turbo-0613", + "apiModelId": "openai/gpt-3.5-turbo-0613", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 4095, + "maxOutputTokens": 4096 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-3-5-turbo-16k", + "apiModelId": "openai/gpt-3.5-turbo-16k", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-3-5-turbo-instruct", + "apiModelId": "openai/gpt-3.5-turbo-instruct", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 4095 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-4", + "apiModelId": "openai/gpt-4", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 8191, + "maxOutputTokens": 4096 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 30 + }, + "output": { + "currency": "USD", + "perMillionTokens": 60 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-4-0314", + "apiModelId": "openai/gpt-4-0314", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-4-1", + "apiModelId": "openai/gpt-4.1", + "limits": { + "contextWindow": 1047576 + }, + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-4-1-mini", + "apiModelId": "openai/gpt-4.1-mini", + "limits": { + "contextWindow": 1047576 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5999999999999999 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + } + }, + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-4-1-nano", + "apiModelId": "openai/gpt-4.1-nano", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 1047576 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.024999999999999998 + } + }, + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-4-1106-preview", + "apiModelId": "openai/gpt-4-1106-preview", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-4-turbo", + "apiModelId": "openai/gpt-4-turbo", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-4-turbo-preview", + "apiModelId": "openai/gpt-4-turbo-preview", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-4o", + "apiModelId": "openai/gpt-4o", + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-4o", + "apiModelId": "openai/gpt-4o:extended", + "modelVariant": "extended", + "limits": { + "maxOutputTokens": 64000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 18 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-4o-2024-05-13", + "apiModelId": "openai/gpt-4o-2024-05-13", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-4o-2024-08-06", + "apiModelId": "openai/gpt-4o-2024-08-06", + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-4o-2024-11-20", + "apiModelId": "openai/gpt-4o-2024-11-20", + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-4o-audio-preview", + "apiModelId": "openai/gpt-4o-audio-preview", + "inputModalities": ["audio", "text"], + "outputModalities": ["text", "audio"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-4o-mini", + "apiModelId": "openai/gpt-4o-mini", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.075 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-4o-mini-2024-07-18", + "apiModelId": "openai/gpt-4o-mini-2024-07-18", + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-4o-mini-search-preview", + "apiModelId": "openai/gpt-4o-mini-search-preview", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-4o-search-preview", + "apiModelId": "openai/gpt-4o-search-preview", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-5", + "apiModelId": "openai/gpt-5", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.125 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-5-1", + "apiModelId": "openai/gpt-5.1", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.125 + } + }, + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-5-1-chat", + "apiModelId": "openai/gpt-5.1-chat", + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-5-1-codex", + "apiModelId": "openai/gpt-5.1-codex", + "limits": { + "contextWindow": 400000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.125 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-5-1-codex-max", + "apiModelId": "openai/gpt-5.1-codex-max", + "limits": { + "contextWindow": 400000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.125 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-5-1-codex-mini", + "apiModelId": "openai/gpt-5.1-codex-mini", + "limits": { + "contextWindow": 400000, + "maxOutputTokens": 100000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.024999999999999998 + } + }, + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-5-2", + "apiModelId": "openai/gpt-5.2", + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-5-2-chat", + "apiModelId": "openai/gpt-5.2-chat", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.75 + }, + "output": { + "currency": "USD", + "perMillionTokens": 14 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.175 + } + }, + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-5-2-codex", + "apiModelId": "openai/gpt-5.2-codex", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.75 + }, + "output": { + "currency": "USD", + "perMillionTokens": 14 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.175 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-5-2-pro", + "apiModelId": "openai/gpt-5.2-pro", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 21 + }, + "output": { + "currency": "USD", + "perMillionTokens": 168 + } + }, + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-5-3-chat", + "apiModelId": "openai/gpt-5.3-chat", + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-5-3-codex", + "apiModelId": "openai/gpt-5.3-codex", + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-5-4", + "apiModelId": "openai/gpt-5.4", + "limits": { + "contextWindow": 1050000, + "maxOutputTokens": 128000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.25 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-5-4-pro", + "apiModelId": "openai/gpt-5.4-pro", + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-5-chat", + "apiModelId": "openai/gpt-5-chat", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.125 + } + }, + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-5-codex", + "apiModelId": "openai/gpt-5-codex", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.125 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-5-image", + "apiModelId": "openai/gpt-5-image", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 10 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.25 + } + }, + "inputModalities": ["image", "text"], + "outputModalities": ["image", "text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-5-image-mini", + "apiModelId": "openai/gpt-5-image-mini", + "inputModalities": ["image", "text"], + "outputModalities": ["image", "text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-5-mini", + "apiModelId": "openai/gpt-5-mini", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.024999999999999998 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-5-nano", + "apiModelId": "openai/gpt-5-nano", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.005 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-5-pro", + "apiModelId": "openai/gpt-5-pro", + "limits": { + "maxOutputTokens": 128000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 120 + } + }, + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-audio", + "apiModelId": "openai/gpt-audio", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text", "audio"], + "outputModalities": ["text", "audio"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-audio-mini", + "apiModelId": "openai/gpt-audio-mini", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text", "audio"], + "outputModalities": ["text", "audio"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-oss", + "apiModelId": "openai/gpt-oss-120b", + "modelVariant": "120b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.039 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.19 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-oss", + "apiModelId": "openai/gpt-oss-120b:exacto", + "modelVariant": "120b-exacto", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.039 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.19 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-oss", + "apiModelId": "openai/gpt-oss-120b:free", + "modelVariant": "120b-free", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-oss", + "apiModelId": "openai/gpt-oss-20b", + "modelVariant": "20b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.03 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.14 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-oss", + "apiModelId": "openai/gpt-oss-20b:free", + "modelVariant": "20b-free", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gpt-oss-safeguard", + "apiModelId": "openai/gpt-oss-safeguard-20b", + "modelVariant": "20b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.075 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.037 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "granite-4-0-h-micro", + "apiModelId": "ibm-granite/granite-4.0-h-micro", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131000 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "grok-3", + "apiModelId": "x-ai/grok-3", + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "grok-3-beta", + "apiModelId": "x-ai/grok-3-beta", + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "grok-3-mini", + "apiModelId": "x-ai/grok-3-mini", + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "grok-3-mini-beta", + "apiModelId": "x-ai/grok-3-mini-beta", + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "grok-4", + "apiModelId": "x-ai/grok-4", + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "grok-4-1-fast-reasoning", + "apiModelId": "x-ai/grok-4.1-fast", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "grok-4-fast-reasoning", + "apiModelId": "x-ai/grok-4-fast", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "grok-code-fast-1", + "apiModelId": "x-ai/grok-code-fast-1", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.02 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gte-base", + "apiModelId": "thenlper/gte-base", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "gte-large", + "apiModelId": "thenlper/gte-large", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "hermes-2-pro-llama-3", + "apiModelId": "nousresearch/hermes-2-pro-llama-3-8b", + "modelVariant": "8b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "hermes-3-llama-3-1", + "apiModelId": "nousresearch/hermes-3-llama-3.1-405b", + "modelVariant": "405b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 16384 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "hermes-3-llama-3-1", + "apiModelId": "nousresearch/hermes-3-llama-3.1-405b:free", + "modelVariant": "405b-free", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "hermes-3-llama-3-1", + "apiModelId": "nousresearch/hermes-3-llama-3.1-70b", + "modelVariant": "70b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 65536, + "maxOutputTokens": 65536 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "hermes-4", + "apiModelId": "nousresearch/hermes-4-405b", + "modelVariant": "405b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "hermes-4", + "apiModelId": "nousresearch/hermes-4-70b", + "modelVariant": "70b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.13 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "hunyuan-a13b-instruct", + "apiModelId": "tencent/hunyuan-a13b-instruct", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.14 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5700000000000001 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "inflection-3-pi", + "apiModelId": "inflection/inflection-3-pi", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "inflection-3-productivity", + "apiModelId": "inflection/inflection-3-productivity", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "intellect-3", + "apiModelId": "prime-intellect/intellect-3", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.1 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "jamba-large-1-7", + "apiModelId": "ai21/jamba-large-1.7", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "kat-coder-pro", + "apiModelId": "kwaipilot/kat-coder-pro", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.207 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.828 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.0414 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "kimi-k2", + "apiModelId": "moonshotai/kimi-k2", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.55 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.2 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "kimi-k2", + "apiModelId": "moonshotai/kimi-k2-thinking", + "modelVariant": "thinking", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.47 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.14100000000000001 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "kimi-k2-0905", + "apiModelId": "moonshotai/kimi-k2-0905", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.15 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "kimi-k2-0905", + "apiModelId": "moonshotai/kimi-k2-0905:exacto", + "modelVariant": "exacto", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.5 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "kimi-k2-5", + "apiModelId": "moonshotai/kimi-k2.5", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "maxOutputTokens": 65535 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.44999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.22499999999999998 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "l3-1-euryale", + "apiModelId": "sao10k/l3.1-euryale-70b", + "modelVariant": "70b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "l3-1-hanami-x1", + "apiModelId": "sao10k/l3.1-70b-hanami-x1", + "modelVariant": "70b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "l3-3-euryale", + "apiModelId": "sao10k/l3.3-euryale-70b", + "modelVariant": "70b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "l3-euryale", + "apiModelId": "sao10k/l3-euryale-70b", + "modelVariant": "70b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "l3-lunaris", + "apiModelId": "sao10k/l3-lunaris-8b", + "modelVariant": "8b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.04 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "lfm-2-2", + "apiModelId": "liquid/lfm-2.2-6b", + "modelVariant": "6b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "lfm-2-5-instruct", + "apiModelId": "liquid/lfm-2.5-1.2b-instruct:free", + "modelVariant": "2b-free", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 32768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "lfm-2-5-thinking", + "apiModelId": "liquid/lfm-2.5-1.2b-thinking:free", + "modelVariant": "2b-free", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 32768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "lfm-2-a2b", + "apiModelId": "liquid/lfm-2-24b-a2b", + "modelVariant": "24b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "lfm2-a1b", + "apiModelId": "liquid/lfm2-8b-a1b", + "modelVariant": "8b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "llama-3-1", + "apiModelId": "meta-llama/llama-3.1-405b", + "modelVariant": "405b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 32768, + "maxOutputTokens": 32768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "llama-3-1-instruct", + "apiModelId": "meta-llama/llama-3.1-405b-instruct", + "modelVariant": "405b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 4 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "llama-3-1-instruct", + "apiModelId": "meta-llama/llama-3.1-70b-instruct", + "modelVariant": "70b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "llama-3-1-instruct", + "apiModelId": "meta-llama/llama-3.1-8b-instruct", + "modelVariant": "8b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 16384, + "maxOutputTokens": 16384 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.02 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "llama-3-1-lumimaid", + "apiModelId": "neversleep/llama-3.1-lumimaid-8b", + "modelVariant": "8b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "llama-3-1-nemotron-instruct", + "apiModelId": "nvidia/llama-3.1-nemotron-70b-instruct", + "modelVariant": "70b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 16384 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "llama-3-2-instruct", + "apiModelId": "meta-llama/llama-3.2-1b-instruct", + "modelVariant": "1b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 60000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.027 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "llama-3-2-instruct", + "apiModelId": "meta-llama/llama-3.2-3b-instruct", + "modelVariant": "3b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 80000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.051 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.33999999999999997 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "llama-3-2-instruct", + "apiModelId": "meta-llama/llama-3.2-3b-instruct:free", + "modelVariant": "3b-free", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "llama-3-2-vision-instruct", + "apiModelId": "meta-llama/llama-3.2-11b-vision-instruct", + "modelVariant": "11b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 16384 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.049 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "llama-3-3-instruct", + "apiModelId": "meta-llama/llama-3.3-70b-instruct", + "modelVariant": "70b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 16384 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.32 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "llama-3-3-instruct", + "apiModelId": "meta-llama/llama-3.3-70b-instruct:free", + "modelVariant": "70b-free", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "maxOutputTokens": 128000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "llama-3-3-nemotron-super-v1-5", + "apiModelId": "nvidia/llama-3.3-nemotron-super-49b-v1.5", + "modelVariant": "49b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "llama-3-instruct", + "apiModelId": "meta-llama/llama-3-70b-instruct", + "modelVariant": "70b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "llama-3-instruct", + "apiModelId": "meta-llama/llama-3-8b-instruct", + "modelVariant": "8b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "maxOutputTokens": 16384 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.03 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.04 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "llama-4-maverick", + "apiModelId": "meta-llama/llama-4-maverick", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 1048576, + "maxOutputTokens": 16384 + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "llama-4-scout", + "apiModelId": "meta-llama/llama-4-scout", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 327680, + "maxOutputTokens": 16384 + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "llama-guard-2", + "apiModelId": "meta-llama/llama-guard-2-8b", + "modelVariant": "8b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "llama-guard-3", + "apiModelId": "meta-llama/llama-guard-3-8b", + "modelVariant": "8b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.02 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.06 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "llama-guard-4", + "apiModelId": "meta-llama/llama-guard-4-12b", + "modelVariant": "12b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 163840 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.18 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.18 + } + }, + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "llama-nemotron-embed-vl-v2", + "apiModelId": "nvidia/llama-nemotron-embed-vl-1b-v2:free", + "modelVariant": "1b-free", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text", "image"] + }, + { + "providerId": "openrouter", + "modelId": "llemma_7b", + "apiModelId": "eleutherai/llemma_7b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "longcat-flash-chat", + "apiModelId": "meituan/longcat-flash-chat", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.7999999999999999 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "maestro-reasoning", + "apiModelId": "arcee-ai/maestro-reasoning", + "modelVariant": "reasoning", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "magnum-v4", + "apiModelId": "anthracite-org/magnum-v4-72b", + "modelVariant": "72b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "mercury", + "apiModelId": "inception/mercury", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "maxOutputTokens": 32000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.75 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.024999999999999998 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "mercury-2", + "apiModelId": "inception/mercury-2", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "mercury-coder", + "apiModelId": "inception/mercury-coder", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "maxOutputTokens": 32000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.75 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.024999999999999998 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "mimo-v2-flash", + "apiModelId": "xiaomi/mimo-v2-flash", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 262144, + "maxOutputTokens": 65536 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.29 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.045 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "minimax-01", + "apiModelId": "minimax/minimax-01", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 1000192, + "maxOutputTokens": 1000192 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.1 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "minimax-m1", + "apiModelId": "minimax/minimax-m1", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "maxOutputTokens": 40000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.2 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "minimax-m2", + "apiModelId": "minimax/minimax-m2", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 196608, + "maxOutputTokens": 196608 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.255 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.03 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "minimax-m2-1", + "apiModelId": "minimax/minimax-m2.1", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 196608 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.27 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.95 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.0290000007 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "minimax-m2-5", + "apiModelId": "minimax/minimax-m2.5", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 196608, + "maxOutputTokens": 196608 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.295 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.03 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "minimax-m2-her", + "apiModelId": "minimax/minimax-m2-her", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "ministral-2512", + "apiModelId": "mistralai/ministral-14b-2512", + "modelVariant": "14b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 262144 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "ministral-2512", + "apiModelId": "mistralai/ministral-3b-2512", + "modelVariant": "3b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "ministral-2512", + "apiModelId": "mistralai/ministral-8b-2512", + "modelVariant": "8b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 262144 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.15 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "mistral-embed-2312", + "apiModelId": "mistralai/mistral-embed-2312", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "mistral-instruct-v0-1", + "apiModelId": "mistralai/mistral-7b-instruct-v0.1", + "modelVariant": "7b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 2824 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "mistral-large", + "apiModelId": "mistralai/mistral-large", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "mistral-large-2407", + "apiModelId": "mistralai/mistral-large-2407", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "mistral-large-2411", + "apiModelId": "mistralai/mistral-large-2411", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "mistral-large-2512", + "apiModelId": "mistralai/mistral-large-2512", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 262144 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "mistral-medium-3", + "apiModelId": "mistralai/mistral-medium-3", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "mistral-medium-3-1", + "apiModelId": "mistralai/mistral-medium-3.1", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "mistral-nemo", + "apiModelId": "mistralai/mistral-nemo", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 16384 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.02 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.04 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "mistral-saba", + "apiModelId": "mistralai/mistral-saba", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "mistral-small-3-1-instruct", + "apiModelId": "mistralai/mistral-small-3.1-24b-instruct", + "modelVariant": "24b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.35 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.56 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "mistral-small-3-1-instruct", + "apiModelId": "mistralai/mistral-small-3.1-24b-instruct:free", + "modelVariant": "24b-free", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "mistral-small-3-2-instruct", + "apiModelId": "mistralai/mistral-small-3.2-24b-instruct", + "modelVariant": "24b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 131072 + }, + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "mistral-small-creative", + "apiModelId": "mistralai/mistral-small-creative", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "mistral-small-instruct-2501", + "apiModelId": "mistralai/mistral-small-24b-instruct-2501", + "modelVariant": "24b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "maxOutputTokens": 16384 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.08 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "mixtral-8x22b-instruct", + "apiModelId": "mistralai/mixtral-8x22b-instruct", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 65536 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "mixtral-8x7b-instruct", + "apiModelId": "mistralai/mixtral-8x7b-instruct", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "molmo-2", + "apiModelId": "allenai/molmo-2-8b", + "modelVariant": "8b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + } + }, + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "morph-v3-fast", + "apiModelId": "morph/morph-v3-fast", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 81920, + "maxOutputTokens": 38000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "morph-v3-large", + "apiModelId": "morph/morph-v3-large", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 262144, + "maxOutputTokens": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.8999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.9 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "multi-qa-mpnet-base-dot-v1", + "apiModelId": "sentence-transformers/multi-qa-mpnet-base-dot-v1", + "inputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "multilingual-e5-large", + "apiModelId": "intfloat/multilingual-e5-large", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.01 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "mythomax-l2", + "apiModelId": "gryphe/mythomax-l2-13b", + "modelVariant": "13b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "maxOutputTokens": 4096 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.06 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.06 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "nemotron-3-nano-a3b", + "apiModelId": "nvidia/nemotron-3-nano-30b-a3b", + "modelVariant": "30b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 262144 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "nemotron-3-nano-a3b", + "apiModelId": "nvidia/nemotron-3-nano-30b-a3b:free", + "modelVariant": "30b-free", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 256000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "nemotron-nano-v2", + "apiModelId": "nvidia/nemotron-nano-9b-v2", + "modelVariant": "9b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.04 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.16 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "nemotron-nano-v2", + "apiModelId": "nvidia/nemotron-nano-9b-v2:free", + "modelVariant": "9b-free", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 128000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "nemotron-nano-v2-vl", + "apiModelId": "nvidia/nemotron-nano-12b-v2-vl", + "modelVariant": "12b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "inputModalities": ["image", "text", "video"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "nemotron-nano-v2-vl", + "apiModelId": "nvidia/nemotron-nano-12b-v2-vl:free", + "modelVariant": "12b-free", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 128000, + "maxOutputTokens": 128000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["image", "text", "video"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "noromaid", + "apiModelId": "neversleep/noromaid-20b", + "modelVariant": "20b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "nova-2-lite-v1", + "apiModelId": "amazon/nova-2-lite-v1", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "maxOutputTokens": 65535 + }, + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "nova-lite-v1", + "apiModelId": "amazon/nova-lite-v1", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "nova-micro-v1", + "apiModelId": "amazon/nova-micro-v1", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "nova-premier-v1", + "apiModelId": "amazon/nova-premier-v1", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "nova-pro-v1", + "apiModelId": "amazon/nova-pro-v1", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "maxOutputTokens": 5120 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.1999999999999997 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "o1", + "apiModelId": "openai/o1", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "o1-pro", + "apiModelId": "openai/o1-pro", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 150 + }, + "output": { + "currency": "USD", + "perMillionTokens": 600 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "o3", + "apiModelId": "openai/o3", + "limits": { + "contextWindow": 200000, + "maxOutputTokens": 100000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 8 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "o3-deep-research", + "apiModelId": "openai/o3-deep-research", + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "o3-mini", + "apiModelId": "openai/o3-mini", + "limits": { + "contextWindow": 200000, + "maxOutputTokens": 100000 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "o3-mini", + "apiModelId": "openai/o3-mini-high", + "modelVariant": "high", + "limits": { + "contextWindow": 200000, + "maxOutputTokens": 100000 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "o3-pro", + "apiModelId": "openai/o3-pro", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 20 + }, + "output": { + "currency": "USD", + "perMillionTokens": 80 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "o4-mini", + "apiModelId": "openai/o4-mini", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.275 + } + }, + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "o4-mini", + "apiModelId": "openai/o4-mini-high", + "modelVariant": "high", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.275 + } + }, + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "o4-mini-deep-research", + "apiModelId": "openai/o4-mini-deep-research", + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "olmo-2-0325-instruct", + "apiModelId": "allenai/olmo-2-0325-32b-instruct", + "modelVariant": "32b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "olmo-3-1-instruct", + "apiModelId": "allenai/olmo-3.1-32b-instruct", + "modelVariant": "32b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "olmo-3-1-think", + "apiModelId": "allenai/olmo-3.1-32b-think", + "modelVariant": "32b-think", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "olmo-3-instruct", + "apiModelId": "allenai/olmo-3-7b-instruct", + "modelVariant": "7b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "olmo-3-think", + "apiModelId": "allenai/olmo-3-32b-think", + "modelVariant": "32b-think", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "olmo-3-think", + "apiModelId": "allenai/olmo-3-7b-think", + "modelVariant": "7b-think", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.12 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "palmyra-x5", + "apiModelId": "writer/palmyra-x5", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "paraphrase-minilm-l6-v2", + "apiModelId": "sentence-transformers/paraphrase-minilm-l6-v2", + "inputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "phi-4", + "apiModelId": "microsoft/phi-4", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 16384, + "maxOutputTokens": 16384 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.06 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.14 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "pixtral-large-2411", + "apiModelId": "mistralai/pixtral-large-2411", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen-2-5-coder-instruct", + "apiModelId": "qwen/qwen-2.5-coder-32b-instruct", + "modelVariant": "32b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.2 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen-2-5-instruct", + "apiModelId": "qwen/qwen-2.5-72b-instruct", + "modelVariant": "72b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 32768, + "maxOutputTokens": 16384 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.12 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen-2-5-instruct", + "apiModelId": "qwen/qwen-2.5-7b-instruct", + "modelVariant": "7b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 32768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.04 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen-2-5-vl-instruct", + "apiModelId": "qwen/qwen-2.5-vl-7b-instruct", + "modelVariant": "7b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen-max", + "apiModelId": "qwen/qwen-max", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.04 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4.16 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.20800000000000002 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen-plus", + "apiModelId": "qwen/qwen-plus", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.08 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen-plus-2025-07-28", + "apiModelId": "qwen/qwen-plus-2025-07-28", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.26 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.78 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen-plus-2025-07-28", + "apiModelId": "qwen/qwen-plus-2025-07-28:thinking", + "modelVariant": "thinking", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.26 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.78 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen-turbo", + "apiModelId": "qwen/qwen-turbo", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 8192 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.0325 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.13 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.006500000000000001 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen-vl-max", + "apiModelId": "qwen/qwen-vl-max", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "maxOutputTokens": 32768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.1999999999999997 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen-vl-plus", + "apiModelId": "qwen/qwen-vl-plus", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1365 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.40950000000000003 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.027299999999999998 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen2-5-coder-instruct", + "apiModelId": "qwen/qwen2.5-coder-7b-instruct", + "modelVariant": "7b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 32768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.03 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.09 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen2-5-vl-instruct", + "apiModelId": "qwen/qwen2.5-vl-32b-instruct", + "modelVariant": "32b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 128000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen2-5-vl-instruct", + "apiModelId": "qwen/qwen2.5-vl-72b-instruct", + "modelVariant": "72b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 32768, + "maxOutputTokens": 32768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.7999999999999999 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3", + "apiModelId": "qwen/qwen3-14b", + "modelVariant": "14b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 40960, + "maxOutputTokens": 40960 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.06 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.24 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3", + "apiModelId": "qwen/qwen3-32b", + "modelVariant": "32b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 40960, + "maxOutputTokens": 40960 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.08 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.24 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.04 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3", + "apiModelId": "qwen/qwen3-4b:free", + "modelVariant": "4b-free", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 40960 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3", + "apiModelId": "qwen/qwen3-8b", + "modelVariant": "8b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 40960 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-5", + "apiModelId": "qwen/qwen3.5-27b", + "modelVariant": "27b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.195 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.56 + } + }, + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-5-a10b", + "apiModelId": "qwen/qwen3.5-122b-a10b", + "modelVariant": "122b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.26 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.08 + } + }, + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-5-a17b", + "apiModelId": "qwen/qwen3.5-397b-a17b", + "modelVariant": "397b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.34 + } + }, + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-5-a3b", + "apiModelId": "qwen/qwen3.5-35b-a3b", + "modelVariant": "35b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.1625 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.3 + } + }, + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-5-flash-02-23", + "apiModelId": "qwen/qwen3.5-flash-02-23", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-5-plus-02-15", + "apiModelId": "qwen/qwen3.5-plus-02-15", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.26 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.56 + } + }, + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-a22b", + "apiModelId": "qwen/qwen3-235b-a22b", + "modelVariant": "235b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 8192 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.45499999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.8199999999999998 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-a22b-2507", + "apiModelId": "qwen/qwen3-235b-a22b-2507", + "modelVariant": "235b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.071 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-a22b-thinking-2507", + "apiModelId": "qwen/qwen3-235b-a22b-thinking-2507", + "modelVariant": "235b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 262144, + "maxOutputTokens": 262144 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.11 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.055 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-a3b", + "apiModelId": "qwen/qwen3-30b-a3b", + "modelVariant": "30b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 40960, + "maxOutputTokens": 40960 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.08 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.28 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-a3b-instruct-2507", + "apiModelId": "qwen/qwen3-30b-a3b-instruct-2507", + "modelVariant": "30b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 262144, + "maxOutputTokens": 262144 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-a3b-thinking-2507", + "apiModelId": "qwen/qwen3-30b-a3b-thinking-2507", + "modelVariant": "30b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 32768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.051 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.33999999999999997 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-coder", + "apiModelId": "qwen/qwen3-coder", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.22 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.022 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-coder", + "apiModelId": "qwen/qwen3-coder:exacto", + "modelVariant": "exacto", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "maxOutputTokens": 65536 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.22 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.7999999999999998 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.022 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-coder", + "apiModelId": "qwen/qwen3-coder:free", + "modelVariant": "free", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 262000, + "maxOutputTokens": 262000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-coder-a3b-instruct", + "apiModelId": "qwen/qwen3-coder-30b-a3b-instruct", + "modelVariant": "30b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 160000, + "maxOutputTokens": 32768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.07 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.27 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-coder-flash", + "apiModelId": "qwen/qwen3-coder-flash", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.195 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.975 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.039 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-coder-next", + "apiModelId": "qwen/qwen3-coder-next", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.12 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.75 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.06 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-coder-plus", + "apiModelId": "qwen/qwen3-coder-plus", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 1000000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.65 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.13 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-embedding", + "apiModelId": "qwen/qwen3-embedding-4b", + "modelVariant": "4b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 32768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.02 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-embedding", + "apiModelId": "qwen/qwen3-embedding-8b", + "modelVariant": "8b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 32000, + "maxOutputTokens": 32000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.01 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-max", + "apiModelId": "qwen/qwen3-max", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "maxOutputTokens": 32768 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-max", + "apiModelId": "qwen/qwen3-max-thinking", + "modelVariant": "thinking", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "maxOutputTokens": 32768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.78 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.9 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-next-a3b", + "apiModelId": "qwen/qwen3-next-80b-a3b-thinking", + "modelVariant": "80b-thinking", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 128000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-next-a3b-instruct", + "apiModelId": "qwen/qwen3-next-80b-a3b-instruct", + "modelVariant": "80b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 262144 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.1 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-next-a3b-instruct", + "apiModelId": "qwen/qwen3-next-80b-a3b-instruct:free", + "modelVariant": "80b-free", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 262144 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-vl", + "apiModelId": "qwen/qwen3-vl-8b-thinking", + "modelVariant": "8b-thinking", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "maxOutputTokens": 32768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.117 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.365 + } + }, + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-vl-a22b", + "apiModelId": "qwen/qwen3-vl-235b-a22b-thinking", + "modelVariant": "235b-thinking", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-vl-a22b-instruct", + "apiModelId": "qwen/qwen3-vl-235b-a22b-instruct", + "modelVariant": "235b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 262144 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.88 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.11 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-vl-a3b", + "apiModelId": "qwen/qwen3-vl-30b-a3b-thinking", + "modelVariant": "30b-thinking", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-vl-a3b-instruct", + "apiModelId": "qwen/qwen3-vl-30b-a3b-instruct", + "modelVariant": "30b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 32768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.13 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.52 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-vl-instruct", + "apiModelId": "qwen/qwen3-vl-32b-instruct", + "modelVariant": "32b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "maxOutputTokens": 32768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.10400000000000001 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.41600000000000004 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwen3-vl-instruct", + "apiModelId": "qwen/qwen3-vl-8b-instruct", + "modelVariant": "8b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "maxOutputTokens": 32768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.08 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + } + }, + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "qwq", + "apiModelId": "qwen/qwq-32b", + "modelVariant": "32b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 32768, + "maxOutputTokens": 32768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "relace-apply-3", + "apiModelId": "relace/relace-apply-3", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "relace-search", + "apiModelId": "relace/relace-search", + "modelVariant": "search", + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "remm-slerp-l2", + "apiModelId": "undi95/remm-slerp-l2-13b", + "modelVariant": "13b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "rnj-1-instruct", + "apiModelId": "essentialai/rnj-1-instruct", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "rocinante", + "apiModelId": "thedrummer/rocinante-12b", + "modelVariant": "12b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "router", + "apiModelId": "switchpoint/router", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.85 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.4 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "seed-1-6", + "apiModelId": "bytedance-seed/seed-1.6", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "limits": { + "contextWindow": 262144, + "maxOutputTokens": 32768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + }, + "inputModalities": ["image", "text", "video"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "seed-1-6-flash", + "apiModelId": "bytedance-seed/seed-1.6-flash", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["image", "text", "video"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "seed-2-0-mini", + "apiModelId": "bytedance-seed/seed-2.0-mini", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text", "image", "video"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "skyfall-v2", + "apiModelId": "thedrummer/skyfall-36b-v2", + "modelVariant": "36b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "solar-pro-3", + "apiModelId": "upstage/solar-pro-3", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.015 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "sonar-deep-research", + "apiModelId": "perplexity/sonar-deep-research", + "limits": { + "contextWindow": 128000 + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "sonar-pro", + "apiModelId": "perplexity/sonar-pro", + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "sonar-pro", + "apiModelId": "perplexity/sonar-pro-search", + "modelVariant": "search", + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "sonar-reasoning", + "apiModelId": "perplexity/sonar", + "limits": { + "contextWindow": 127072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1 + } + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "sonar-reasoning-pro", + "apiModelId": "perplexity/sonar-reasoning-pro", + "limits": { + "contextWindow": 128000 + }, + "inputModalities": ["text", "image"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "sorcererlm-8x22b", + "apiModelId": "raifle/sorcererlm-8x22b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "spotlight", + "apiModelId": "arcee-ai/spotlight", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "step-3-5-flash", + "apiModelId": "stepfun/step-3.5-flash", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.02 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "step-3-5-flash", + "apiModelId": "stepfun/step-3.5-flash:free", + "modelVariant": "free", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "text-embedding-3-large", + "apiModelId": "openai/text-embedding-3-large", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "text-embedding-3-small", + "apiModelId": "openai/text-embedding-3-small", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "text-embedding-ada-002", + "apiModelId": "openai/text-embedding-ada-002", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "tongyi-deepresearch-a3b", + "apiModelId": "alibaba/tongyi-deepresearch-30b-a3b", + "modelVariant": "30b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "trinity-large-preview", + "apiModelId": "arcee-ai/trinity-large-preview:free", + "modelVariant": "free", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "trinity-mini", + "apiModelId": "arcee-ai/trinity-mini", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.045 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.15 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "trinity-mini", + "apiModelId": "arcee-ai/trinity-mini:free", + "modelVariant": "free", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "ui-tars-1-5", + "apiModelId": "bytedance/ui-tars-1.5-7b", + "modelVariant": "7b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["image", "text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "unslopnemo", + "apiModelId": "thedrummer/unslopnemo-12b", + "modelVariant": "12b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "virtuoso-large", + "apiModelId": "arcee-ai/virtuoso-large", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "voxtral-small-2507", + "apiModelId": "mistralai/voxtral-small-24b-2507", + "modelVariant": "24b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + }, + "inputModalities": ["text", "audio"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "weaver", + "apiModelId": "mancer/weaver", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "openrouter", + "modelId": "wizardlm-2-8x22b", + "apiModelId": "microsoft/wizardlm-2-8x22b", + "capabilities": { + "add": [], + "remove": [], + "force": [] + }, + "inputModalities": ["text"], + "outputModalities": ["text"] + }, + { + "providerId": "ppio", + "modelId": "autoglm-phone-multilingual", + "apiModelId": "zai-org/autoglm-phone-9b-multilingual", + "modelVariant": "9b", + "limits": { + "maxOutputTokens": 32000 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 1 + } + } + }, + { + "providerId": "ppio", + "modelId": "bge-m3", + "apiModelId": "baai/bge-m3", + "limits": { + "contextWindow": 8192, + "maxOutputTokens": 16000 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 0.07 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 0 + } + }, + "endpointTypes": ["openai-embeddings"] + }, + { + "providerId": "ppio", + "modelId": "bge-reranker-v2-m3", + "apiModelId": "baai/bge-reranker-v2-m3", + "limits": { + "contextWindow": 8000, + "maxOutputTokens": 8000 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 0.07 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 0.07 + } + }, + "endpointTypes": ["jina-rerank"] + }, + { + "providerId": "ppio", + "modelId": "community", + "apiModelId": "deepseek/deepseek-v3/community", + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 2 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 8 + } + } + }, + { + "providerId": "ppio", + "modelId": "community", + "apiModelId": "deepseek/deepseek-r1/community", + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 4 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 16 + } + } + }, + { + "providerId": "ppio", + "modelId": "deepseek-ocr-2", + "apiModelId": "deepseek/deepseek-ocr-2", + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 0.216 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 0.216 + } + } + }, + { + "providerId": "ppio", + "modelId": "deepseek-prover-v2", + "apiModelId": "deepseek/deepseek-prover-v2-671b", + "modelVariant": "671b", + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 4 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 16 + } + } + }, + { + "providerId": "ppio", + "modelId": "deepseek-r1-0528", + "apiModelId": "deepseek/deepseek-r1-0528", + "limits": { + "contextWindow": 163840, + "maxOutputTokens": 32768 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 4 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 16 + } + } + }, + { + "providerId": "ppio", + "modelId": "deepseek-r1-distill-llama", + "apiModelId": "deepseek/deepseek-r1-distill-llama-70b", + "modelVariant": "70b", + "limits": { + "contextWindow": 32000, + "maxOutputTokens": 8000 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 5.8 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 5.8 + } + } + }, + { + "providerId": "ppio", + "modelId": "deepseek-r1-turbo", + "apiModelId": "deepseek/deepseek-r1-turbo", + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 4 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 16 + } + } + }, + { + "providerId": "ppio", + "modelId": "deepseek-v3-0324", + "apiModelId": "deepseek/deepseek-v3-0324", + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 2 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 8 + } + } + }, + { + "providerId": "ppio", + "modelId": "deepseek-v3-1", + "apiModelId": "deepseek/deepseek-v3.1", + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 32768 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 4 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 12 + } + } + }, + { + "providerId": "ppio", + "modelId": "deepseek-v3-1-terminus", + "apiModelId": "deepseek/deepseek-v3.1-terminus", + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 65536 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 4 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 12 + } + } + }, + { + "providerId": "ppio", + "modelId": "deepseek-v3-2", + "apiModelId": "deepseek/deepseek-v3.2", + "limits": { + "contextWindow": 163840, + "maxOutputTokens": 65536 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 2 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 3 + } + } + }, + { + "providerId": "ppio", + "modelId": "deepseek-v3-2-exp", + "apiModelId": "deepseek/deepseek-v3.2-exp", + "limits": { + "maxOutputTokens": 65536 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 2 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 3 + } + } + }, + { + "providerId": "ppio", + "modelId": "deepseek-v3-turbo", + "apiModelId": "deepseek/deepseek-v3-turbo", + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 2 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 8 + } + } + }, + { + "providerId": "ppio", + "modelId": "ernie-4-5", + "apiModelId": "baidu/ernie-4.5-0.3b", + "modelVariant": "3b", + "limits": { + "contextWindow": 120000, + "maxOutputTokens": 8000 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 0 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 0 + } + } + }, + { + "providerId": "ppio", + "modelId": "ernie-4-5-a3b", + "apiModelId": "baidu/ernie-4.5-21B-a3b", + "modelVariant": "21b", + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 2 + } + } + }, + { + "providerId": "ppio", + "modelId": "ernie-4-5-a3b", + "apiModelId": "baidu/ernie-4.5-21b-a3b-thinking", + "modelVariant": "21b-thinking", + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 65536 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 2 + } + } + }, + { + "providerId": "ppio", + "modelId": "ernie-4-5-a47b-paddle", + "apiModelId": "baidu/ernie-4.5-300b-a47b-paddle", + "modelVariant": "300b", + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 2 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 7 + } + } + }, + { + "providerId": "ppio", + "modelId": "ernie-4-5-vl-a3b", + "apiModelId": "baidu/ernie-4.5-vl-28b-a3b", + "modelVariant": "28b", + "limits": { + "contextWindow": 30000, + "maxOutputTokens": 8000 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 1 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 4 + } + } + }, + { + "providerId": "ppio", + "modelId": "ernie-4-5-vl-a47b", + "apiModelId": "baidu/ernie-4.5-vl-424b-a47b", + "modelVariant": "424b", + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 3 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 9 + } + } + }, + { + "providerId": "ppio", + "modelId": "glm-4-5", + "apiModelId": "zai-org/glm-4.5", + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 4 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 16 + } + } + }, + { + "providerId": "ppio", + "modelId": "glm-4-5-air", + "apiModelId": "zai-org/glm-4.5-air", + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 1.2 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 2 + } + } + }, + { + "providerId": "ppio", + "modelId": "glm-4-5v", + "apiModelId": "zai-org/glm-4.5v", + "limits": { + "contextWindow": 65536 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 4 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 12 + } + } + }, + { + "providerId": "ppio", + "modelId": "glm-4-6", + "apiModelId": "zai-org/glm-4.6", + "limits": { + "contextWindow": 204800 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 4 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 16 + } + } + }, + { + "providerId": "ppio", + "modelId": "glm-4-6v", + "apiModelId": "zai-org/glm-4.6v", + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 2 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 6 + } + } + }, + { + "providerId": "ppio", + "modelId": "glm-4-7", + "apiModelId": "zai-org/glm-4.7", + "limits": { + "contextWindow": 204800 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 4 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 16 + } + } + }, + { + "providerId": "ppio", + "modelId": "glm-4-7-flash", + "apiModelId": "zai-org/glm-4.7-flash", + "limits": { + "maxOutputTokens": 128000 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 3 + } + } + }, + { + "providerId": "ppio", + "modelId": "glm-5", + "apiModelId": "zai-org/glm-5", + "limits": { + "contextWindow": 202800 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 6 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 22 + } + } + }, + { + "providerId": "ppio", + "modelId": "kat-coder", + "limits": { + "maxOutputTokens": 128000 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 2.1 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 8.4 + } + } + }, + { + "providerId": "ppio", + "modelId": "kimi-k2", + "apiModelId": "moonshotai/kimi-k2-thinking", + "modelVariant": "thinking", + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 4 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 16 + } + } + }, + { + "providerId": "ppio", + "modelId": "kimi-k2-0905", + "apiModelId": "moonshotai/kimi-k2-0905", + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 4 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 16 + } + } + }, + { + "providerId": "ppio", + "modelId": "kimi-k2-5", + "apiModelId": "moonshotai/kimi-k2.5", + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 4 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 21 + } + } + }, + { + "providerId": "ppio", + "modelId": "kimi-k2-instruct", + "apiModelId": "moonshotai/kimi-k2-instruct", + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 128000 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 4 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 16 + } + } + }, + { + "providerId": "ppio", + "modelId": "mimo-v2-flash", + "apiModelId": "xiaomimimo/mimo-v2-flash", + "limits": { + "contextWindow": 262144 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 0.7 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 2.1 + } + } + }, + { + "providerId": "ppio", + "modelId": "minimax-m1-80k", + "apiModelId": "minimaxai/minimax-m1-80k", + "limits": { + "contextWindow": 128000 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 4 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 16 + } + } + }, + { + "providerId": "ppio", + "modelId": "minimax-m2", + "apiModelId": "minimax/minimax-m2", + "limits": { + "contextWindow": 204800, + "maxOutputTokens": 131072 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 2.1 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 8.4 + } + } + }, + { + "providerId": "ppio", + "modelId": "minimax-m2-1", + "apiModelId": "minimax/minimax-m2.1", + "limits": { + "contextWindow": 204800 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 2.1 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 8.4 + } + } + }, + { + "providerId": "ppio", + "modelId": "minimax-m2-5", + "apiModelId": "minimax/minimax-m2.5", + "limits": { + "maxOutputTokens": 131100 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 2.1 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 8.4 + } + } + }, + { + "providerId": "ppio", + "modelId": "minimax-m2-5-highspeed", + "apiModelId": "minimax/minimax-m2.5-highspeed", + "limits": { + "maxOutputTokens": 131100 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 4.2 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 16.8 + } + } + }, + { + "providerId": "ppio", + "modelId": "qwen-2-5-instruct", + "apiModelId": "qwen/qwen-2.5-72b-instruct", + "modelVariant": "72b", + "limits": { + "maxOutputTokens": 16000 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 4 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 12 + } + } + }, + { + "providerId": "ppio", + "modelId": "qwen2-5-instruct", + "apiModelId": "qwen/qwen2.5-7b-instruct", + "modelVariant": "7b", + "limits": { + "contextWindow": 32000, + "maxOutputTokens": 32000 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 0.35 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 0.35 + } + } + }, + { + "providerId": "ppio", + "modelId": "qwen2-5-vl-instruct", + "apiModelId": "qwen/qwen2.5-vl-72b-instruct", + "modelVariant": "72b", + "limits": { + "contextWindow": 32000, + "maxOutputTokens": 32000 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 4.2 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 4.2 + } + } + }, + { + "providerId": "ppio", + "modelId": "qwen3-5", + "apiModelId": "qwen/qwen3.5-27b", + "modelVariant": "27b" + }, + { + "providerId": "ppio", + "modelId": "qwen3-5-a10b", + "apiModelId": "qwen/qwen3.5-122b-a10b", + "modelVariant": "122b" + }, + { + "providerId": "ppio", + "modelId": "qwen3-5-a17b", + "apiModelId": "qwen/qwen3.5-397b-a17b", + "modelVariant": "397b", + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 3 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 18 + } + } + }, + { + "providerId": "ppio", + "modelId": "qwen3-5-a3b", + "apiModelId": "qwen/qwen3.5-35b-a3b", + "modelVariant": "35b" + }, + { + "providerId": "ppio", + "modelId": "qwen3-5-plus", + "apiModelId": "qwen/qwen3.5-plus", + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 4 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 24 + } + } + }, + { + "providerId": "ppio", + "modelId": "qwen3-a22b-fp8", + "apiModelId": "qwen/qwen3-235b-a22b-fp8", + "modelVariant": "235b", + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 1.45 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 5.8 + } + } + }, + { + "providerId": "ppio", + "modelId": "qwen3-a22b-instruct-2507", + "apiModelId": "qwen/qwen3-235b-a22b-instruct-2507", + "modelVariant": "235b", + "limits": { + "contextWindow": 262144, + "maxOutputTokens": 260000 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 1.45 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 5.8 + } + } + }, + { + "providerId": "ppio", + "modelId": "qwen3-a22b-thinking-2507", + "apiModelId": "qwen/qwen3-235b-a22b-thinking-2507", + "modelVariant": "235b", + "limits": { + "maxOutputTokens": 114688 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 2 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 20 + } + } + }, + { + "providerId": "ppio", + "modelId": "qwen3-a3b-fp8", + "apiModelId": "qwen/qwen3-30b-a3b-fp8", + "modelVariant": "30b", + "limits": { + "contextWindow": 128000 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 0.72 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 3.26 + } + } + }, + { + "providerId": "ppio", + "modelId": "qwen3-coder-a35b-instruct", + "apiModelId": "qwen/qwen3-coder-480b-a35b-instruct", + "modelVariant": "480b", + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 4 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 16 + } + } + }, + { + "providerId": "ppio", + "modelId": "qwen3-coder-a3b-instruct", + "apiModelId": "qwen/qwen3-coder-30b-a3b-instruct", + "modelVariant": "30b", + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 2.25 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 9 + } + } + }, + { + "providerId": "ppio", + "modelId": "qwen3-coder-next", + "apiModelId": "qwen/qwen3-coder-next", + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 1.4 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 10.5 + } + } + }, + { + "providerId": "ppio", + "modelId": "qwen3-embedding", + "apiModelId": "qwen/qwen3-embedding-0.6b", + "modelVariant": "6b", + "limits": { + "contextWindow": 32768, + "maxOutputTokens": 32768 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 0.07 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 0.07 + } + }, + "endpointTypes": ["openai-embeddings"] + }, + { + "providerId": "ppio", + "modelId": "qwen3-embedding", + "apiModelId": "qwen/qwen3-embedding-8b", + "modelVariant": "8b", + "limits": { + "contextWindow": 32768, + "maxOutputTokens": 4096 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 0.35 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 0.35 + } + }, + "endpointTypes": ["openai-embeddings"] + }, + { + "providerId": "ppio", + "modelId": "qwen3-fp8", + "apiModelId": "qwen/qwen3-32b-fp8", + "modelVariant": "32b", + "limits": { + "contextWindow": 128000 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 0.72 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 3.26 + } + } + }, + { + "providerId": "ppio", + "modelId": "qwen3-fp8", + "apiModelId": "qwen/qwen3-4b-fp8", + "modelVariant": "4b", + "limits": { + "contextWindow": 128000 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 0.2145 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 0.2145 + } + } + }, + { + "providerId": "ppio", + "modelId": "qwen3-next-a3b", + "apiModelId": "qwen/qwen3-next-80b-a3b-thinking", + "modelVariant": "80b-thinking", + "limits": { + "contextWindow": 65536, + "maxOutputTokens": 65536 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 1 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 10 + } + } + }, + { + "providerId": "ppio", + "modelId": "qwen3-next-a3b-instruct", + "apiModelId": "qwen/qwen3-next-80b-a3b-instruct", + "modelVariant": "80b", + "limits": { + "contextWindow": 65536, + "maxOutputTokens": 65536 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 1 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 4 + } + } + }, + { + "providerId": "ppio", + "modelId": "qwen3-omni-a3b", + "apiModelId": "qwen/qwen3-omni-30b-a3b-thinking", + "modelVariant": "30b-thinking", + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 0 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 0 + } + } + }, + { + "providerId": "ppio", + "modelId": "qwen3-omni-a3b-instruct", + "apiModelId": "qwen/qwen3-omni-30b-a3b-instruct", + "modelVariant": "30b", + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 0 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 0 + } + } + }, + { + "providerId": "ppio", + "modelId": "qwen3-vl-a22b", + "apiModelId": "qwen/qwen3-vl-235b-a22b-thinking", + "modelVariant": "235b-thinking", + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 2 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 20 + } + } + }, + { + "providerId": "ppio", + "modelId": "qwen3-vl-a22b-instruct", + "apiModelId": "qwen/qwen3-vl-235b-a22b-instruct", + "modelVariant": "235b", + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 2 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 8 + } + } + }, + { + "providerId": "ppio", + "modelId": "qwen3-vl-a3b", + "apiModelId": "qwen/qwen3-vl-30b-a3b-thinking", + "modelVariant": "30b-thinking", + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 0.75 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 7.5 + } + } + }, + { + "providerId": "ppio", + "modelId": "qwen3-vl-a3b-instruct", + "apiModelId": "qwen/qwen3-vl-30b-a3b-instruct", + "modelVariant": "30b", + "limits": { + "contextWindow": 131072, + "maxOutputTokens": 32768 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 0.75 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 3 + } + } + }, + { + "providerId": "ppio", + "modelId": "qwen3-vl-instruct", + "apiModelId": "qwen/qwen3-vl-8b-instruct", + "modelVariant": "8b", + "limits": { + "maxOutputTokens": 32768 + }, + "pricing": { + "input": { + "currency": "CNY", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "CNY", + "perMillionTokens": 2 + } + } + }, + { + "providerId": "qiniu", + "modelId": "deepseek-r1" + }, + { + "providerId": "qiniu", + "modelId": "deepseek-r1-0528" + }, + { + "providerId": "qiniu", + "modelId": "deepseek-v3" + }, + { + "providerId": "qiniu", + "modelId": "deepseek-v3-0324" + }, + { + "providerId": "qiniu", + "modelId": "deepseek-v3-1", + "apiModelId": "deepseek-v3.1" + }, + { + "providerId": "qiniu", + "modelId": "deepseek-v3-1-terminus", + "apiModelId": "deepseek/deepseek-v3.1-terminus" + }, + { + "providerId": "qiniu", + "modelId": "deepseek-v3-1-terminus", + "apiModelId": "deepseek/deepseek-v3.1-terminus-thinking", + "modelVariant": "thinking" + }, + { + "providerId": "qiniu", + "modelId": "deepseek-v3-2-251201", + "apiModelId": "deepseek/deepseek-v3.2-251201" + }, + { + "providerId": "qiniu", + "modelId": "deepseek-v3-2-exp", + "apiModelId": "deepseek/deepseek-v3.2-exp" + }, + { + "providerId": "qiniu", + "modelId": "deepseek-v3-2-exp", + "apiModelId": "deepseek/deepseek-v3.2-exp-thinking", + "modelVariant": "thinking" + }, + { + "providerId": "qiniu", + "modelId": "doubao-1-5-pro-32k", + "apiModelId": "doubao-1.5-pro-32k" + }, + { + "providerId": "qiniu", + "modelId": "doubao-1-5-thinking-pro", + "apiModelId": "doubao-1.5-thinking-pro" + }, + { + "providerId": "qiniu", + "modelId": "doubao-1-5-vision-pro", + "apiModelId": "doubao-1.5-vision-pro" + }, + { + "providerId": "qiniu", + "modelId": "doubao-seed-1-6", + "apiModelId": "doubao-seed-1.6" + }, + { + "providerId": "qiniu", + "modelId": "doubao-seed-1-6", + "apiModelId": "doubao-seed-1.6-thinking", + "modelVariant": "thinking" + }, + { + "providerId": "qiniu", + "modelId": "doubao-seed-1-6-flash", + "apiModelId": "doubao-seed-1.6-flash" + }, + { + "providerId": "qiniu", + "modelId": "glm-4-5", + "apiModelId": "glm-4.5" + }, + { + "providerId": "qiniu", + "modelId": "glm-4-5-air", + "apiModelId": "glm-4.5-air" + }, + { + "providerId": "qiniu", + "modelId": "glm-4-6", + "apiModelId": "z-ai/glm-4.6" + }, + { + "providerId": "qiniu", + "modelId": "glm-4-7", + "apiModelId": "z-ai/glm-4.7" + }, + { + "providerId": "qiniu", + "modelId": "glm-5", + "apiModelId": "z-ai/glm-5" + }, + { + "providerId": "qiniu", + "modelId": "gpt-5-4", + "apiModelId": "openai/gpt-5.4" + }, + { + "providerId": "qiniu", + "modelId": "kimi-k2" + }, + { + "providerId": "qiniu", + "modelId": "kimi-k2", + "apiModelId": "moonshotai/kimi-k2-thinking", + "modelVariant": "thinking" + }, + { + "providerId": "qiniu", + "modelId": "kimi-k2-0905", + "apiModelId": "moonshotai/kimi-k2-0905" + }, + { + "providerId": "qiniu", + "modelId": "kimi-k2-5", + "apiModelId": "moonshotai/kimi-k2.5" + }, + { + "providerId": "qiniu", + "modelId": "longcat-flash-lite", + "apiModelId": "meituan/longcat-flash-lite" + }, + { + "providerId": "qiniu", + "modelId": "mimo-v2-flash", + "apiModelId": "xiaomi/mimo-v2-flash" + }, + { + "providerId": "qiniu", + "modelId": "minimax-m1", + "apiModelId": "MiniMax-M1" + }, + { + "providerId": "qiniu", + "modelId": "minimax-m2", + "apiModelId": "minimax/minimax-m2" + }, + { + "providerId": "qiniu", + "modelId": "minimax-m2-1", + "apiModelId": "minimax/minimax-m2.1" + }, + { + "providerId": "qiniu", + "modelId": "minimax-m2-5", + "apiModelId": "minimax/minimax-m2.5" + }, + { + "providerId": "qiniu", + "modelId": "qwen-max-2025-01-25" + }, + { + "providerId": "qiniu", + "modelId": "qwen-turbo" + }, + { + "providerId": "qiniu", + "modelId": "qwen-vl-max-2025-01-25" + }, + { + "providerId": "qiniu", + "modelId": "qwen2-5-vl-instruct", + "apiModelId": "qwen2.5-vl-72b-instruct", + "modelVariant": "72b" + }, + { + "providerId": "qiniu", + "modelId": "qwen2-5-vl-instruct", + "apiModelId": "qwen2.5-vl-7b-instruct", + "modelVariant": "7b" + }, + { + "providerId": "qiniu", + "modelId": "qwen3", + "apiModelId": "qwen3-32b", + "modelVariant": "32b" + }, + { + "providerId": "qiniu", + "modelId": "qwen3-a22b", + "apiModelId": "qwen3-235b-a22b", + "modelVariant": "235b" + }, + { + "providerId": "qiniu", + "modelId": "qwen3-a22b-instruct-2507", + "apiModelId": "qwen3-235b-a22b-instruct-2507", + "modelVariant": "235b" + }, + { + "providerId": "qiniu", + "modelId": "qwen3-a22b-thinking-2507", + "apiModelId": "qwen3-235b-a22b-thinking-2507", + "modelVariant": "235b" + }, + { + "providerId": "qiniu", + "modelId": "qwen3-a3b", + "apiModelId": "qwen3-30b-a3b", + "modelVariant": "30b" + }, + { + "providerId": "qiniu", + "modelId": "qwen3-a3b-instruct-2507", + "apiModelId": "qwen3-30b-a3b-instruct-2507", + "modelVariant": "30b" + }, + { + "providerId": "qiniu", + "modelId": "qwen3-a3b-thinking-2507", + "apiModelId": "qwen3-30b-a3b-thinking-2507", + "modelVariant": "30b" + }, + { + "providerId": "qiniu", + "modelId": "qwen3-coder-a35b-instruct", + "apiModelId": "qwen3-coder-480b-a35b-instruct", + "modelVariant": "480b" + }, + { + "providerId": "qiniu", + "modelId": "qwen3-max" + }, + { + "providerId": "qiniu", + "modelId": "qwen3-max-preview" + }, + { + "providerId": "qiniu", + "modelId": "qwen3-next-a3b", + "apiModelId": "qwen3-next-80b-a3b-thinking", + "modelVariant": "80b-thinking" + }, + { + "providerId": "qiniu", + "modelId": "qwen3-next-a3b-instruct", + "apiModelId": "qwen3-next-80b-a3b-instruct", + "modelVariant": "80b" + }, + { + "providerId": "qiniu", + "modelId": "qwen3-vl-a3b", + "apiModelId": "qwen3-vl-30b-a3b-thinking", + "modelVariant": "30b-thinking" + }, + { + "providerId": "sophnet", + "modelId": "deepseek-r1-0528", + "apiModelId": "DeepSeek-R1-0528" + }, + { + "providerId": "sophnet", + "modelId": "deepseek-v3", + "apiModelId": "DeepSeek-v3" + }, + { + "providerId": "tokenflux", + "modelId": "claude-3-5-haiku", + "apiModelId": "anthropic/claude-3.5-haiku", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.08 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "claude-3-5-sonnet", + "apiModelId": "anthropic/claude-3.5-sonnet", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 30 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "claude-3-7-sonnet", + "apiModelId": "anthropic/claude-3.7-sonnet" + }, + { + "providerId": "tokenflux", + "modelId": "claude-3-haiku", + "apiModelId": "anthropic/claude-3-haiku" + }, + { + "providerId": "tokenflux", + "modelId": "claude-haiku-4-5", + "apiModelId": "anthropic/claude-haiku-4.5", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "claude-opus-4", + "apiModelId": "anthropic/claude-opus-4" + }, + { + "providerId": "tokenflux", + "modelId": "claude-opus-4-1", + "apiModelId": "anthropic/claude-opus-4.1" + }, + { + "providerId": "tokenflux", + "modelId": "claude-opus-4-5", + "apiModelId": "anthropic/claude-opus-4.5", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.5 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "claude-opus-4-6", + "apiModelId": "anthropic/claude-opus-4.6", + "limits": { + "contextWindow": 1000000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 25 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.5 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "claude-sonnet-4", + "apiModelId": "anthropic/claude-sonnet-4", + "limits": { + "contextWindow": 1000000 + } + }, + { + "providerId": "tokenflux", + "modelId": "claude-sonnet-4-5", + "apiModelId": "anthropic/claude-sonnet-4.5", + "limits": { + "contextWindow": 1000000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "claude-sonnet-4-6", + "apiModelId": "anthropic/claude-sonnet-4.6", + "limits": { + "contextWindow": 1000000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.3 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "deepseek-chat", + "apiModelId": "deepseek/deepseek-chat", + "limits": { + "contextWindow": 163840 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.15 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "deepseek-chat-v3-0324", + "apiModelId": "deepseek/deepseek-chat-v3-0324", + "limits": { + "contextWindow": 163840 + } + }, + { + "providerId": "tokenflux", + "modelId": "deepseek-chat-v3-1", + "apiModelId": "deepseek/deepseek-chat-v3.1", + "limits": { + "contextWindow": 32768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.75 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "deepseek-r1", + "apiModelId": "deepseek/deepseek-r1", + "limits": { + "contextWindow": 64000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.5 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "deepseek-r1-0528", + "apiModelId": "deepseek/deepseek-r1-0528", + "limits": { + "contextWindow": 163840 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.75 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "deepseek-v3-1-terminus", + "apiModelId": "deepseek/deepseek-v3.1-terminus", + "limits": { + "contextWindow": 163840 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.21 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.7899999999999999 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.1300000002 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "deepseek-v3-2", + "apiModelId": "deepseek/deepseek-v3.2", + "limits": { + "contextWindow": 163840 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.26 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.38 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.13 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "deepseek-v3-2-exp", + "apiModelId": "deepseek/deepseek-v3.2-exp", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.27 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.41 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "deepseek-v3-2-speciale", + "apiModelId": "deepseek/deepseek-v3.2-speciale" + }, + { + "providerId": "tokenflux", + "modelId": "gemini-2-0-flash-001", + "apiModelId": "google/gemini-2.0-flash-001", + "limits": { + "contextWindow": 1048576 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.024999999999999998 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gemini-2-0-flash-lite-001", + "apiModelId": "google/gemini-2.0-flash-lite-001", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.075 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gemini-2-5-flash", + "apiModelId": "google/gemini-2.5-flash", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.03 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gemini-2-5-flash-image", + "apiModelId": "google/gemini-2.5-flash-image", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.03 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gemini-2-5-flash-lite", + "apiModelId": "google/gemini-2.5-flash-lite", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.01 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gemini-2-5-flash-lite-preview-09-2025", + "apiModelId": "google/gemini-2.5-flash-lite-preview-09-2025", + "limits": { + "contextWindow": 1048576 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.01 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gemini-2-5-flash-preview-09-2025", + "apiModelId": "google/gemini-2.5-flash-preview-09-2025", + "limits": { + "contextWindow": 1048576 + } + }, + { + "providerId": "tokenflux", + "modelId": "gemini-2-5-pro", + "apiModelId": "google/gemini-2.5-pro", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.125 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gemini-2-5-pro-preview", + "apiModelId": "google/gemini-2.5-pro-preview" + }, + { + "providerId": "tokenflux", + "modelId": "gemini-2-5-pro-preview-05-06", + "apiModelId": "google/gemini-2.5-pro-preview-05-06", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.125 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gemini-3-flash-preview", + "apiModelId": "google/gemini-3-flash-preview", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gemini-3-pro-image-preview", + "apiModelId": "google/gemini-3-pro-image-preview", + "limits": { + "contextWindow": 65536 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 12 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gemini-3-pro-preview", + "apiModelId": "google/gemini-3-pro-preview", + "limits": { + "contextWindow": 1048576 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 12 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gemma-2-it", + "apiModelId": "google/gemma-2-27b-it", + "modelVariant": "27b", + "limits": { + "contextWindow": 8192 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.65 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.65 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gemma-2-it", + "apiModelId": "google/gemma-2-9b-it", + "modelVariant": "9b", + "limits": { + "contextWindow": 8192 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.03 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.09 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gemma-3-it", + "apiModelId": "google/gemma-3-12b-it", + "modelVariant": "12b", + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.03 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.015 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gemma-3-it", + "apiModelId": "google/gemma-3-27b-it", + "modelVariant": "27b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.04 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.02 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gemma-3-it", + "apiModelId": "google/gemma-3-4b-it", + "modelVariant": "4b", + "limits": { + "contextWindow": 96000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.01703012 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.0681536 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gemma-3n-e4b-it", + "apiModelId": "google/gemma-3n-e4b-it", + "limits": { + "contextWindow": 32768 + } + }, + { + "providerId": "tokenflux", + "modelId": "glm-4", + "apiModelId": "z-ai/glm-4-32b", + "modelVariant": "32b" + }, + { + "providerId": "tokenflux", + "modelId": "glm-4-5", + "apiModelId": "z-ai/glm-4.5", + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.35 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.55 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.175 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "glm-4-5-air", + "apiModelId": "z-ai/glm-4.5-air", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.13 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.85 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.024999999999999998 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "glm-4-5v", + "apiModelId": "z-ai/glm-4.5v", + "limits": { + "contextWindow": 65536 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.7999999999999998 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.11 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "glm-4-6", + "apiModelId": "z-ai/glm-4.6", + "limits": { + "contextWindow": 202752 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.33999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.7 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.16999999999999998 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "glm-4-6v", + "apiModelId": "z-ai/glm-4.6v", + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.8999999999999999 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "glm-4-7", + "apiModelId": "z-ai/glm-4.7", + "limits": { + "contextWindow": 202752 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.38 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.7 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.19 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "glm-4-7-flash", + "apiModelId": "z-ai/glm-4.7-flash", + "limits": { + "contextWindow": 202752 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.06 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.0100000002 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "glm-5", + "apiModelId": "z-ai/glm-5", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.5500000000000003 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-3-5-turbo", + "apiModelId": "openai/gpt-3.5-turbo", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-3-5-turbo-0613", + "apiModelId": "openai/gpt-3.5-turbo-0613", + "limits": { + "contextWindow": 4095 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-3-5-turbo-16k", + "apiModelId": "openai/gpt-3.5-turbo-16k" + }, + { + "providerId": "tokenflux", + "modelId": "gpt-3-5-turbo-instruct", + "apiModelId": "openai/gpt-3.5-turbo-instruct", + "limits": { + "contextWindow": 4095 + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-4", + "apiModelId": "openai/gpt-4", + "limits": { + "contextWindow": 8191 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 30 + }, + "output": { + "currency": "USD", + "perMillionTokens": 60 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-4-0314", + "apiModelId": "openai/gpt-4-0314" + }, + { + "providerId": "tokenflux", + "modelId": "gpt-4-1", + "apiModelId": "openai/gpt-4.1", + "limits": { + "contextWindow": 1047576 + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-4-1-mini", + "apiModelId": "openai/gpt-4.1-mini", + "limits": { + "contextWindow": 1047576 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5999999999999999 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-4-1-nano", + "apiModelId": "openai/gpt-4.1-nano", + "limits": { + "contextWindow": 1047576 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.024999999999999998 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-4-1106-preview", + "apiModelId": "openai/gpt-4-1106-preview" + }, + { + "providerId": "tokenflux", + "modelId": "gpt-4-turbo", + "apiModelId": "openai/gpt-4-turbo" + }, + { + "providerId": "tokenflux", + "modelId": "gpt-4-turbo-preview", + "apiModelId": "openai/gpt-4-turbo-preview" + }, + { + "providerId": "tokenflux", + "modelId": "gpt-4o", + "apiModelId": "openai/gpt-4o" + }, + { + "providerId": "tokenflux", + "modelId": "gpt-4o-2024-05-13", + "apiModelId": "openai/gpt-4o-2024-05-13", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 15 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-4o-2024-08-06", + "apiModelId": "openai/gpt-4o-2024-08-06" + }, + { + "providerId": "tokenflux", + "modelId": "gpt-4o-2024-11-20", + "apiModelId": "openai/gpt-4o-2024-11-20" + }, + { + "providerId": "tokenflux", + "modelId": "gpt-4o-audio-preview", + "apiModelId": "openai/gpt-4o-audio-preview" + }, + { + "providerId": "tokenflux", + "modelId": "gpt-4o-mini", + "apiModelId": "openai/gpt-4o-mini", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.075 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-4o-mini-2024-07-18", + "apiModelId": "openai/gpt-4o-mini-2024-07-18" + }, + { + "providerId": "tokenflux", + "modelId": "gpt-4o-mini-search-preview", + "apiModelId": "openai/gpt-4o-mini-search-preview", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-4o-search-preview", + "apiModelId": "openai/gpt-4o-search-preview", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-5", + "apiModelId": "openai/gpt-5", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.125 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-5-1", + "apiModelId": "openai/gpt-5.1", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.125 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-5-1-chat", + "apiModelId": "openai/gpt-5.1-chat" + }, + { + "providerId": "tokenflux", + "modelId": "gpt-5-1-codex", + "apiModelId": "openai/gpt-5.1-codex", + "limits": { + "contextWindow": 400000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.125 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-5-1-codex-max", + "apiModelId": "openai/gpt-5.1-codex-max", + "limits": { + "contextWindow": 400000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.125 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-5-1-codex-mini", + "apiModelId": "openai/gpt-5.1-codex-mini", + "limits": { + "contextWindow": 400000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.024999999999999998 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-5-2", + "apiModelId": "openai/gpt-5.2" + }, + { + "providerId": "tokenflux", + "modelId": "gpt-5-2-chat", + "apiModelId": "openai/gpt-5.2-chat", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.75 + }, + "output": { + "currency": "USD", + "perMillionTokens": 14 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.175 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-5-2-codex", + "apiModelId": "openai/gpt-5.2-codex", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.75 + }, + "output": { + "currency": "USD", + "perMillionTokens": 14 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.175 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-5-2-pro", + "apiModelId": "openai/gpt-5.2-pro", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 21 + }, + "output": { + "currency": "USD", + "perMillionTokens": 168 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-5-chat", + "apiModelId": "openai/gpt-5-chat", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.125 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-5-codex", + "apiModelId": "openai/gpt-5-codex", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.125 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-5-image", + "apiModelId": "openai/gpt-5-image", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 10 + }, + "output": { + "currency": "USD", + "perMillionTokens": 10 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 1.25 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-5-image-mini", + "apiModelId": "openai/gpt-5-image-mini" + }, + { + "providerId": "tokenflux", + "modelId": "gpt-5-mini", + "apiModelId": "openai/gpt-5-mini", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.25 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.024999999999999998 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-5-nano", + "apiModelId": "openai/gpt-5-nano", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.005 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-5-pro", + "apiModelId": "openai/gpt-5-pro", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 120 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-audio", + "apiModelId": "openai/gpt-audio" + }, + { + "providerId": "tokenflux", + "modelId": "gpt-audio-mini", + "apiModelId": "openai/gpt-audio-mini" + }, + { + "providerId": "tokenflux", + "modelId": "gpt-oss", + "apiModelId": "openai/gpt-oss-120b", + "modelVariant": "120b", + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.039 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.19 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-oss", + "apiModelId": "openai/gpt-oss-20b", + "modelVariant": "20b", + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.03 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.14 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "gpt-oss-safeguard", + "apiModelId": "openai/gpt-oss-safeguard-20b", + "modelVariant": "20b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.075 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.037 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "grok-3", + "apiModelId": "x-ai/grok-3" + }, + { + "providerId": "tokenflux", + "modelId": "grok-3-beta", + "apiModelId": "x-ai/grok-3-beta" + }, + { + "providerId": "tokenflux", + "modelId": "grok-3-mini", + "apiModelId": "x-ai/grok-3-mini" + }, + { + "providerId": "tokenflux", + "modelId": "grok-3-mini-beta", + "apiModelId": "x-ai/grok-3-mini-beta" + }, + { + "providerId": "tokenflux", + "modelId": "grok-4", + "apiModelId": "x-ai/grok-4" + }, + { + "providerId": "tokenflux", + "modelId": "grok-4-1-fast-reasoning", + "apiModelId": "x-ai/grok-4.1-fast", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "grok-4-fast-reasoning", + "apiModelId": "x-ai/grok-4-fast", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "grok-code-fast-1", + "apiModelId": "x-ai/grok-code-fast-1", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.02 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "kimi-k2", + "apiModelId": "moonshotai/kimi-k2", + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.5 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.4 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "kimi-k2", + "apiModelId": "moonshotai/kimi-k2-thinking", + "modelVariant": "thinking", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.75 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "kimi-k2-0905", + "apiModelId": "moonshotai/kimi-k2-0905", + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.15 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "kimi-k2-5", + "apiModelId": "moonshotai/kimi-k2.5", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.22999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "minimax-01", + "apiModelId": "minimax/minimax-01", + "limits": { + "contextWindow": 1000192 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.1 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "minimax-m1", + "apiModelId": "minimax/minimax-m1", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.2 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "minimax-m2", + "apiModelId": "minimax/minimax-m2", + "limits": { + "contextWindow": 196608 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.255 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.03 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "minimax-m2-1", + "apiModelId": "minimax/minimax-m2.1", + "limits": { + "contextWindow": 196608 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.27 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.95 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.0299999997 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "minimax-m2-5", + "apiModelId": "minimax/minimax-m2.5", + "limits": { + "contextWindow": 196608 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.1 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.15 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "minimax-m2-her", + "apiModelId": "minimax/minimax-m2-her" + }, + { + "providerId": "tokenflux", + "modelId": "morph-v3-fast", + "apiModelId": "morph/morph-v3-fast", + "limits": { + "contextWindow": 81920 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "morph-v3-large", + "apiModelId": "morph/morph-v3-large", + "limits": { + "contextWindow": 262144 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.8999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.9 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "o1", + "apiModelId": "openai/o1" + }, + { + "providerId": "tokenflux", + "modelId": "o1-pro", + "apiModelId": "openai/o1-pro", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 150 + }, + "output": { + "currency": "USD", + "perMillionTokens": 600 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "o3", + "apiModelId": "openai/o3", + "limits": { + "contextWindow": 200000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 8 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.5 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "o3-deep-research", + "apiModelId": "openai/o3-deep-research" + }, + { + "providerId": "tokenflux", + "modelId": "o3-mini", + "apiModelId": "openai/o3-mini", + "limits": { + "contextWindow": 200000 + } + }, + { + "providerId": "tokenflux", + "modelId": "o3-mini", + "apiModelId": "openai/o3-mini-high", + "modelVariant": "high", + "limits": { + "contextWindow": 200000 + } + }, + { + "providerId": "tokenflux", + "modelId": "o3-pro", + "apiModelId": "openai/o3-pro", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 20 + }, + "output": { + "currency": "USD", + "perMillionTokens": 80 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "o4-mini", + "apiModelId": "openai/o4-mini", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.275 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "o4-mini", + "apiModelId": "openai/o4-mini-high", + "modelVariant": "high", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 4.4 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.275 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "o4-mini-deep-research", + "apiModelId": "openai/o4-mini-deep-research" + }, + { + "providerId": "tokenflux", + "modelId": "qwen-2-5-coder-instruct", + "apiModelId": "qwen/qwen-2.5-coder-32b-instruct", + "modelVariant": "32b" + }, + { + "providerId": "tokenflux", + "modelId": "qwen-2-5-instruct", + "apiModelId": "qwen/qwen-2.5-72b-instruct", + "modelVariant": "72b", + "limits": { + "contextWindow": 32768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.12 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen-2-5-instruct", + "apiModelId": "qwen/qwen-2.5-7b-instruct", + "modelVariant": "7b", + "limits": { + "contextWindow": 32768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.04 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen-2-5-vl-instruct", + "apiModelId": "qwen/qwen-2.5-vl-7b-instruct", + "modelVariant": "7b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen-max", + "apiModelId": "qwen/qwen-max", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.5999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 6.3999999999999995 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.32 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen-plus", + "apiModelId": "qwen/qwen-plus", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.08 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen-plus-2025-07-28", + "apiModelId": "qwen/qwen-plus-2025-07-28", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen-turbo", + "apiModelId": "qwen/qwen-turbo", + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.01 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen-vl-max", + "apiModelId": "qwen/qwen-vl-max", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.7999999999999999 + }, + "output": { + "currency": "USD", + "perMillionTokens": 3.1999999999999997 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen-vl-plus", + "apiModelId": "qwen/qwen-vl-plus", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.21 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.63 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.041999999999999996 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen2-5-coder-instruct", + "apiModelId": "qwen/qwen2.5-coder-7b-instruct", + "modelVariant": "7b", + "limits": { + "contextWindow": 32768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.03 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.09 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen2-5-vl-instruct", + "apiModelId": "qwen/qwen2.5-vl-32b-instruct", + "modelVariant": "32b", + "limits": { + "contextWindow": 16384 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.22 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.024999999999999998 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen2-5-vl-instruct", + "apiModelId": "qwen/qwen2.5-vl-72b-instruct", + "modelVariant": "72b", + "limits": { + "contextWindow": 32768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.6 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.075 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen3", + "apiModelId": "qwen/qwen3-14b", + "modelVariant": "14b", + "limits": { + "contextWindow": 40960 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.22 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.024999999999999998 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen3", + "apiModelId": "qwen/qwen3-32b", + "modelVariant": "32b", + "limits": { + "contextWindow": 40960 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.08 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.24 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.04 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen3", + "apiModelId": "qwen/qwen3-4b", + "modelVariant": "4b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.0715 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.273 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen3", + "apiModelId": "qwen/qwen3-8b", + "modelVariant": "8b", + "limits": { + "contextWindow": 32000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.049999999999999996 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen3-5-a17b", + "apiModelId": "qwen/qwen3.5-397b-a17b", + "modelVariant": "397b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.15 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen3-5-plus-02-15", + "apiModelId": "qwen/qwen3.5-plus-02-15", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + }, + "output": { + "currency": "USD", + "perMillionTokens": 2.4 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen3-a22b", + "apiModelId": "qwen/qwen3-235b-a22b", + "modelVariant": "235b", + "limits": { + "contextWindow": 40960 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.3 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.15 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen3-a22b-2507", + "apiModelId": "qwen/qwen3-235b-a22b-2507", + "modelVariant": "235b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.071 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.09999999999999999 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen3-a22b-thinking-2507", + "apiModelId": "qwen/qwen3-235b-a22b-thinking-2507", + "modelVariant": "235b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen3-a3b", + "apiModelId": "qwen/qwen3-30b-a3b", + "modelVariant": "30b", + "limits": { + "contextWindow": 40960 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.06 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.22 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.03 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen3-a3b-instruct-2507", + "apiModelId": "qwen/qwen3-30b-a3b-instruct-2507", + "modelVariant": "30b", + "limits": { + "contextWindow": 262144 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.08 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.33 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.04 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen3-a3b-thinking-2507", + "apiModelId": "qwen/qwen3-30b-a3b-thinking-2507", + "modelVariant": "30b", + "limits": { + "contextWindow": 32768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.051 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.33999999999999997 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen3-coder", + "apiModelId": "qwen/qwen3-coder", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.22 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.022 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen3-coder-a3b-instruct", + "apiModelId": "qwen/qwen3-coder-30b-a3b-instruct", + "modelVariant": "30b", + "limits": { + "contextWindow": 160000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.07 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.27 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen3-coder-flash", + "apiModelId": "qwen/qwen3-coder-flash" + }, + { + "providerId": "tokenflux", + "modelId": "qwen3-coder-next", + "apiModelId": "qwen/qwen3-coder-next", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.12 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.75 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.06 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen3-coder-plus", + "apiModelId": "qwen/qwen3-coder-plus", + "limits": { + "contextWindow": 1000000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1 + }, + "output": { + "currency": "USD", + "perMillionTokens": 5 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen3-max", + "apiModelId": "qwen/qwen3-max" + }, + { + "providerId": "tokenflux", + "modelId": "qwen3-max", + "apiModelId": "qwen/qwen3-max-thinking", + "modelVariant": "thinking", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 1.2 + }, + "output": { + "currency": "USD", + "perMillionTokens": 6 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen3-next-a3b", + "apiModelId": "qwen/qwen3-next-80b-a3b-thinking", + "modelVariant": "80b-thinking", + "limits": { + "contextWindow": 128000 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.2 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen3-next-a3b-instruct", + "apiModelId": "qwen/qwen3-next-80b-a3b-instruct", + "modelVariant": "80b", + "limits": { + "contextWindow": 262144 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.09 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.1 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen3-vl", + "apiModelId": "qwen/qwen3-vl-8b-thinking", + "modelVariant": "8b-thinking", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.117 + }, + "output": { + "currency": "USD", + "perMillionTokens": 1.365 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen3-vl-a22b", + "apiModelId": "qwen/qwen3-vl-235b-a22b-thinking", + "modelVariant": "235b-thinking", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen3-vl-a22b-instruct", + "apiModelId": "qwen/qwen3-vl-235b-a22b-instruct", + "modelVariant": "235b", + "limits": { + "contextWindow": 262144 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.19999999999999998 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.88 + }, + "cacheRead": { + "currency": "USD", + "perMillionTokens": 0.11 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen3-vl-a3b", + "apiModelId": "qwen/qwen3-vl-30b-a3b-thinking", + "modelVariant": "30b-thinking", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen3-vl-a3b-instruct", + "apiModelId": "qwen/qwen3-vl-30b-a3b-instruct", + "modelVariant": "30b", + "limits": { + "contextWindow": 131072 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.13 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.52 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen3-vl-instruct", + "apiModelId": "qwen/qwen3-vl-32b-instruct", + "modelVariant": "32b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.10400000000000001 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.41600000000000004 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwen3-vl-instruct", + "apiModelId": "qwen/qwen3-vl-8b-instruct", + "modelVariant": "8b", + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.08 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.5 + } + } + }, + { + "providerId": "tokenflux", + "modelId": "qwq", + "apiModelId": "qwen/qwq-32b", + "modelVariant": "32b", + "limits": { + "contextWindow": 32768 + }, + "pricing": { + "input": { + "currency": "USD", + "perMillionTokens": 0.15 + }, + "output": { + "currency": "USD", + "perMillionTokens": 0.39999999999999997 + } + } + } + ] +} diff --git a/packages/provider-registry/data/providers.json b/packages/provider-registry/data/providers.json new file mode 100644 index 00000000000..7b0df9964ca --- /dev/null +++ b/packages/provider-registry/data/providers.json @@ -0,0 +1,391 @@ +{ + "version": "2026.03.09", + "providers": [ + { + "id": "cherryin", + "name": "CherryIN", + "description": "CherryIN - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "silicon", + "name": "Silicon", + "description": "Silicon - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "aihubmix", + "name": "AiHubMix", + "description": "AiHubMix - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "ovms", + "name": "OpenVINO Model Server", + "description": "OpenVINO Model Server - AI model provider" + }, + { + "id": "ocoolai", + "name": "ocoolAI", + "description": "ocoolAI - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "zhipu", + "name": "ZhiPu", + "description": "ZhiPu - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "deepseek", + "name": "deepseek", + "description": "deepseek - AI model provider", + "defaultChatEndpoint": "openai-chat-completions", + "apiFeatures": { + "arrayContent": false + } + }, + { + "id": "alayanew", + "name": "AlayaNew", + "description": "AlayaNew - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "dmxapi", + "name": "DMXAPI", + "description": "DMXAPI - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "aionly", + "name": "AIOnly", + "description": "AIOnly - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "burncloud", + "name": "BurnCloud", + "description": "BurnCloud - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "tokenflux", + "name": "TokenFlux", + "description": "TokenFlux - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "302ai", + "name": "302.AI", + "description": "302.AI - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "cephalon", + "name": "Cephalon", + "description": "Cephalon - AI model provider", + "defaultChatEndpoint": "openai-chat-completions", + "apiFeatures": { + "arrayContent": false + } + }, + { + "id": "lanyun", + "name": "LANYUN", + "description": "LANYUN - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "ph8", + "name": "PH8", + "description": "PH8 - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "sophnet", + "name": "SophNet", + "description": "SophNet - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "ppio", + "name": "PPIO", + "description": "PPIO - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "qiniu", + "name": "Qiniu", + "description": "Qiniu - AI model provider", + "defaultChatEndpoint": "openai-chat-completions", + "apiFeatures": { + "developerRole": false + } + }, + { + "id": "openrouter", + "name": "OpenRouter", + "description": "OpenRouter - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "ollama", + "name": "Ollama", + "description": "Ollama - AI model provider" + }, + { + "id": "new-api", + "name": "New API", + "description": "New API - AI model provider" + }, + { + "id": "lmstudio", + "name": "LM Studio", + "description": "LM Studio - AI model provider" + }, + { + "id": "anthropic", + "name": "Anthropic", + "description": "Anthropic - AI model provider", + "defaultChatEndpoint": "anthropic-messages" + }, + { + "id": "openai", + "name": "OpenAI", + "description": "OpenAI - AI model provider", + "defaultChatEndpoint": "openai-responses", + "apiFeatures": { + "serviceTier": true + } + }, + { + "id": "azure-openai", + "name": "Azure OpenAI", + "description": "Azure OpenAI - AI model provider" + }, + { + "id": "gemini", + "name": "Gemini", + "description": "Gemini - AI model provider", + "defaultChatEndpoint": "google-generate-content" + }, + { + "id": "vertexai", + "name": "VertexAI", + "description": "VertexAI - AI model provider" + }, + { + "id": "github", + "name": "Github Models", + "description": "Github Models - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "copilot", + "name": "Github Copilot", + "description": "Github Copilot - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "yi", + "name": "Yi", + "description": "Yi - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "moonshot", + "name": "Moonshot AI", + "description": "Moonshot AI - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "baichuan", + "name": "BAICHUAN AI", + "description": "BAICHUAN AI - AI model provider", + "defaultChatEndpoint": "openai-chat-completions", + "apiFeatures": { + "arrayContent": false + } + }, + { + "id": "dashscope", + "name": "Bailian", + "description": "Bailian - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "stepfun", + "name": "StepFun", + "description": "StepFun - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "doubao", + "name": "doubao", + "description": "doubao - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "infini", + "name": "Infini", + "description": "Infini - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "minimax", + "name": "MiniMax", + "description": "MiniMax - AI model provider", + "defaultChatEndpoint": "openai-chat-completions", + "apiFeatures": { + "arrayContent": false + } + }, + { + "id": "groq", + "name": "Groq", + "description": "Groq - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "together", + "name": "Together", + "description": "Together - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "fireworks", + "name": "Fireworks", + "description": "Fireworks - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "nvidia", + "name": "nvidia", + "description": "nvidia - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "grok", + "name": "Grok", + "description": "Grok - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "hyperbolic", + "name": "Hyperbolic", + "description": "Hyperbolic - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "mistral", + "name": "Mistral", + "description": "Mistral - AI model provider", + "defaultChatEndpoint": "openai-chat-completions", + "apiFeatures": { + "streamOptions": false + } + }, + { + "id": "jina", + "name": "Jina", + "description": "Jina - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "perplexity", + "name": "Perplexity", + "description": "Perplexity - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "modelscope", + "name": "ModelScope", + "description": "ModelScope - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "xirang", + "name": "Xirang", + "description": "Xirang - AI model provider", + "defaultChatEndpoint": "openai-chat-completions", + "apiFeatures": { + "arrayContent": false + } + }, + { + "id": "hunyuan", + "name": "hunyuan", + "description": "hunyuan - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "tencent-cloud-ti", + "name": "Tencent Cloud TI", + "description": "Tencent Cloud TI - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "baidu-cloud", + "name": "Baidu Cloud", + "description": "Baidu Cloud - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "gpustack", + "name": "GPUStack", + "description": "GPUStack - AI model provider" + }, + { + "id": "voyageai", + "name": "VoyageAI", + "description": "VoyageAI - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "aws-bedrock", + "name": "AWS Bedrock", + "description": "AWS Bedrock - AI model provider" + }, + { + "id": "poe", + "name": "Poe", + "description": "Poe - AI model provider", + "defaultChatEndpoint": "openai-chat-completions", + "apiFeatures": { + "arrayContent": false, + "developerRole": false + } + }, + { + "id": "longcat", + "name": "LongCat", + "description": "LongCat - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "huggingface", + "name": "Hugging Face", + "description": "Hugging Face - AI model provider", + "defaultChatEndpoint": "openai-responses" + }, + { + "id": "gateway", + "name": "Vercel AI Gateway", + "description": "Vercel AI Gateway - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "cerebras", + "name": "Cerebras AI", + "description": "Cerebras AI - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + }, + { + "id": "mimo", + "name": "Xiaomi MiMo", + "description": "Xiaomi MiMo - AI model provider", + "defaultChatEndpoint": "openai-chat-completions" + } + ] +} diff --git a/packages/provider-registry/package.json b/packages/provider-registry/package.json new file mode 100644 index 00000000000..88bf5438575 --- /dev/null +++ b/packages/provider-registry/package.json @@ -0,0 +1,67 @@ +{ + "name": "@cherrystudio/provider-registry", + "version": "0.0.1-alpha.1", + "description": "Provider and Model Registry", + "main": "dist/index.js", + "module": "dist/index.mjs", + "types": "dist/index.d.ts", + "packageManager": "pnpm@10.27.0", + "scripts": { + "build": "tsdown", + "dev": "tsc -w", + "clean": "rm -rf dist", + "test": "vitest run", + "test:watch": "vitest", + "import:modelsdev": "tsx scripts/import-modelsdev.ts", + "import:aihubmix": "tsx scripts/import-aihubmix.ts", + "import:openrouter": "tsx scripts/import-openrouter.ts", + "import:all": "pnpm import:modelsdev && pnpm import:aihubmix && pnpm import:openrouter", + "sync:all": "tsx scripts/generate-providers.ts", + "generate:provider-models": "tsx scripts/generate-provider-models.ts", + "populate:reasoning": "tsx scripts/populate-reasoning-data.ts", + "pipeline": "pnpm import:all && pnpm sync:all && pnpm generate:provider-models && pnpm populate:reasoning" + }, + "author": "Cherry Studio", + "license": "MIT", + "files": [ + "dist/**/*" + ], + "repository": { + "type": "git", + "url": "git+https://github.com/CherryHQ/cherry-studio.git" + }, + "bugs": { + "url": "https://github.com/CherryHQ/cherry-studio/issues" + }, + "homepage": "https://github.com/CherryHQ/cherry-studio#readme", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "react-native": "./dist/index.js", + "import": "./dist/index.mjs", + "require": "./dist/index.js", + "default": "./dist/index.js" + }, + "./node": { + "types": "./src/registry-reader.ts", + "import": "./dist/registry-reader.mjs", + "require": "./dist/registry-reader.js", + "default": "./dist/registry-reader.js" + } + }, + "devDependencies": { + "@types/json-schema": "^7.0.15", + "@types/node": "^24.10.2", + "dotenv": "^17.2.3", + "tsdown": "^0.16.6", + "typescript": "^5.9.3", + "vitest": "^4.0.13", + "zod": "^4.1.12" + }, + "peerDependencies": {}, + "dependencies": { + "class-variance-authority": "^0.7.1", + "json-schema": "^0.4.0", + "lucide-react": "^0.563.0" + } +} diff --git a/packages/provider-registry/src/index.ts b/packages/provider-registry/src/index.ts new file mode 100644 index 00000000000..07fd782b604 --- /dev/null +++ b/packages/provider-registry/src/index.ts @@ -0,0 +1,43 @@ +/** + * Cherry Studio Registry + * Main entry point for the model and provider registry system + */ + +// Enums (canonical source of truth) +export { + AnthropicReasoningEffort, + Currency, + ENDPOINT_TYPE, + EndpointType, + MODALITY, + Modality, + MODEL_CAPABILITY, + ModelCapability, + objectValues, + OpenAIReasoningEffort, + ReasoningEffort +} from './schemas/enums' + +// Schema-inferred types (replaces proto types) +export type { + ModelConfig, + ModelPricing, + ModelConfig as ProtoModelConfig, + ModelPricing as ProtoModelPricing, + ReasoningSupport as ProtoReasoningSupport, + ReasoningSupport +} from './schemas/model' +export type { + ProviderConfig as ProtoProviderConfig, + ProviderReasoningFormat as ProtoProviderReasoningFormat, + ProviderConfig, + ProviderReasoningFormat, + RegistryEndpointConfig +} from './schemas/provider' +export type { + ProviderModelOverride as ProtoProviderModelOverride, + ProviderModelOverride +} from './schemas/provider-models' + +// Model ID normalization utilities +export { normalizeModelId } from './utils/importers/base/base-transformer' diff --git a/packages/provider-registry/src/registry-reader.ts b/packages/provider-registry/src/registry-reader.ts new file mode 100644 index 00000000000..c8f07b00539 --- /dev/null +++ b/packages/provider-registry/src/registry-reader.ts @@ -0,0 +1,32 @@ +/** + * Read-only registry reader for JSON files. + * + * Reads JSON registry data and validates against Zod schemas. + */ + +import { readFileSync } from 'node:fs' + +import type { ModelConfig } from './schemas/model' +import { ModelListSchema } from './schemas/model' +import type { ProviderConfig } from './schemas/provider' +import { ProviderListSchema } from './schemas/provider' +import type { ProviderModelOverride } from './schemas/provider-models' +import { ProviderModelListSchema } from './schemas/provider-models' + +export function readModelRegistry(jsonPath: string): { version: string; models: ModelConfig[] } { + const data = JSON.parse(readFileSync(jsonPath, 'utf-8')) + const registry = ModelListSchema.parse(data) + return { version: registry.version, models: registry.models } +} + +export function readProviderRegistry(jsonPath: string): { version: string; providers: ProviderConfig[] } { + const data = JSON.parse(readFileSync(jsonPath, 'utf-8')) + const registry = ProviderListSchema.parse(data) + return { version: registry.version, providers: registry.providers } +} + +export function readProviderModelRegistry(jsonPath: string): { version: string; overrides: ProviderModelOverride[] } { + const data = JSON.parse(readFileSync(jsonPath, 'utf-8')) + const registry = ProviderModelListSchema.parse(data) + return { version: registry.version, overrides: registry.overrides } +} diff --git a/packages/provider-registry/src/schemas/common.ts b/packages/provider-registry/src/schemas/common.ts new file mode 100644 index 00000000000..459a2e39625 --- /dev/null +++ b/packages/provider-registry/src/schemas/common.ts @@ -0,0 +1,60 @@ +/** + * Common type definitions for the registry system + * Shared across model, provider, and override schemas + */ + +import * as z from 'zod' + +import { Currency, objectValues } from './enums' + +// Common string types for reuse +export const ModelIdSchema = z.string() +export const ProviderIdSchema = z.string() + +/** Version string (e.g., "2026-03-09" or "2026.03.09") */ +export const VersionSchema = z.string().regex(/^\d{4}[-./]\d{2}[-./]\d{2}$/, { + message: 'Version must be a date-like string (e.g., YYYY-MM-DD or YYYY.MM.DD)' +}) + +/** ISO 8601 datetime timestamp */ +export const ISOTimestampSchema = z.iso.datetime() + +// Range helper schemas +export const NumericRangeSchema = z + .object({ + min: z.number(), + max: z.number() + }) + .refine((r) => r.min <= r.max, { + message: 'min must be less than or equal to max' + }) + +export const StringRangeSchema = z.object({ + min: z.string(), + max: z.string() +}) + +// Supported currencies for pricing +export const ZodCurrencySchema = z.enum(objectValues(Currency)).optional() + +// Price per token schema +// Default currency is USD if not specified +// Allow null for perMillionTokens to handle incomplete pricing data from APIs +export const PricePerTokenSchema = z.object({ + perMillionTokens: z.number().nonnegative().nullable(), + currency: ZodCurrencySchema +}) + +// Generic metadata schema +export const MetadataSchema = z.record(z.string(), z.unknown()).optional() + +// Type exports +export type ModelId = z.infer +export type ProviderId = z.infer +export type Version = z.infer +export type ISOTimestamp = z.infer +export type NumericRange = z.infer +export type StringRange = z.infer +export type ZodCurrency = z.infer +export type PricePerToken = z.infer +export type Metadata = z.infer diff --git a/packages/provider-registry/src/schemas/enums.ts b/packages/provider-registry/src/schemas/enums.ts new file mode 100644 index 00000000000..5a453f63c4e --- /dev/null +++ b/packages/provider-registry/src/schemas/enums.ts @@ -0,0 +1,132 @@ +/** + * Canonical enum definitions for the registry system. + * + * These are the SINGLE SOURCE OF TRUTH for all enum types. + * Uses `as const` objects with kebab-case string values for debuggability. + * + * - registry/schemas/ uses these via z.enum() + * - shared/data/types/ re-exports these directly + */ + +// ───────────────────────────────────────────────────────────────────────────── +// EndpointType +// ───────────────────────────────────────────────────────────────────────────── + +export const EndpointType = { + OPENAI_CHAT_COMPLETIONS: 'openai-chat-completions', + OPENAI_TEXT_COMPLETIONS: 'openai-text-completions', + ANTHROPIC_MESSAGES: 'anthropic-messages', + OPENAI_RESPONSES: 'openai-responses', + GOOGLE_GENERATE_CONTENT: 'google-generate-content', + OLLAMA_CHAT: 'ollama-chat', + OLLAMA_GENERATE: 'ollama-generate', + OPENAI_EMBEDDINGS: 'openai-embeddings', + JINA_RERANK: 'jina-rerank', + OPENAI_IMAGE_GENERATION: 'openai-image-generation', + OPENAI_IMAGE_EDIT: 'openai-image-edit', + OPENAI_AUDIO_TRANSCRIPTION: 'openai-audio-transcription', + OPENAI_AUDIO_TRANSLATION: 'openai-audio-translation', + OPENAI_TEXT_TO_SPEECH: 'openai-text-to-speech', + OPENAI_VIDEO_GENERATION: 'openai-video-generation' +} as const +export type EndpointType = (typeof EndpointType)[keyof typeof EndpointType] + +// ───────────────────────────────────────────────────────────────────────────── +// ModelCapability +// ───────────────────────────────────────────────────────────────────────────── + +export const ModelCapability = { + FUNCTION_CALL: 'function-call', + REASONING: 'reasoning', + IMAGE_RECOGNITION: 'image-recognition', + IMAGE_GENERATION: 'image-generation', + AUDIO_RECOGNITION: 'audio-recognition', + AUDIO_GENERATION: 'audio-generation', + EMBEDDING: 'embedding', + RERANK: 'rerank', + AUDIO_TRANSCRIPT: 'audio-transcript', + VIDEO_RECOGNITION: 'video-recognition', + VIDEO_GENERATION: 'video-generation', + STRUCTURED_OUTPUT: 'structured-output', + FILE_INPUT: 'file-input', + WEB_SEARCH: 'web-search', + CODE_EXECUTION: 'code-execution', + FILE_SEARCH: 'file-search', + COMPUTER_USE: 'computer-use' +} as const +export type ModelCapability = (typeof ModelCapability)[keyof typeof ModelCapability] + +// ───────────────────────────────────────────────────────────────────────────── +// Modality +// ───────────────────────────────────────────────────────────────────────────── + +export const Modality = { + TEXT: 'text', + IMAGE: 'image', + AUDIO: 'audio', + VIDEO: 'video', + VECTOR: 'vector' +} as const +export type Modality = (typeof Modality)[keyof typeof Modality] + +// ───────────────────────────────────────────────────────────────────────────── +// Currency +// ───────────────────────────────────────────────────────────────────────────── + +// Uses uppercase ISO 4217 codes (not kebab-case) — intentional exception +export const Currency = { + USD: 'USD', + CNY: 'CNY' +} as const +export type Currency = (typeof Currency)[keyof typeof Currency] + +// ───────────────────────────────────────────────────────────────────────────── +// ReasoningEffort +// ───────────────────────────────────────────────────────────────────────────── + +export const ReasoningEffort = { + NONE: 'none', + MINIMAL: 'minimal', + LOW: 'low', + MEDIUM: 'medium', + HIGH: 'high', + MAX: 'max', + AUTO: 'auto' +} as const +export type ReasoningEffort = (typeof ReasoningEffort)[keyof typeof ReasoningEffort] + +// ───────────────────────────────────────────────────────────────────────────── +// Provider-specific reasoning effort enums +// ───────────────────────────────────────────────────────────────────────────── + +export const OpenAIReasoningEffort = { + LOW: 'low', + MEDIUM: 'medium', + HIGH: 'high' +} as const +export type OpenAIReasoningEffort = (typeof OpenAIReasoningEffort)[keyof typeof OpenAIReasoningEffort] + +export const AnthropicReasoningEffort = { + LOW: 'low', + MEDIUM: 'medium', + HIGH: 'high', + MAX: 'max' +} as const +export type AnthropicReasoningEffort = (typeof AnthropicReasoningEffort)[keyof typeof AnthropicReasoningEffort] + +// ───────────────────────────────────────────────────────────────────────────── +// Backward-compatible aliases +// ───────────────────────────────────────────────────────────────────────────── + +export const ENDPOINT_TYPE = EndpointType +export const MODEL_CAPABILITY = ModelCapability +export const MODALITY = Modality + +// ───────────────────────────────────────────────────────────────────────────── +// Utility +// ───────────────────────────────────────────────────────────────────────────── + +/** Extract the value tuple from a const object for use with z.enum(). */ +export function objectValues>(obj: T): [T[keyof T], ...T[keyof T][]] { + return Object.values(obj) as [T[keyof T], ...T[keyof T][]] +} diff --git a/packages/provider-registry/src/schemas/index.ts b/packages/provider-registry/src/schemas/index.ts new file mode 100644 index 00000000000..cbee2e2a335 --- /dev/null +++ b/packages/provider-registry/src/schemas/index.ts @@ -0,0 +1,19 @@ +/** + * Unified export of all registry schemas and types + * This file provides a single entry point for all schema definitions + */ + +// Export canonical const-object definitions and utilities +export * from './enums' + +// Export all schemas from common types +export * from './common' + +// Export model schemas +export * from './model' + +// Export provider schemas +export * from './provider' + +// Export provider-model mapping schemas +export * from './provider-models' diff --git a/packages/provider-registry/src/schemas/model.ts b/packages/provider-registry/src/schemas/model.ts new file mode 100644 index 00000000000..0978235e37d --- /dev/null +++ b/packages/provider-registry/src/schemas/model.ts @@ -0,0 +1,179 @@ +/** + * Model configuration schema definitions + * Defines the structure for model metadata, capabilities, and configurations + */ + +import * as z from 'zod' + +import { + MetadataSchema, + ModelIdSchema, + NumericRangeSchema, + PricePerTokenSchema, + VersionSchema, + ZodCurrencySchema +} from './common' +import { Modality, ModelCapability, objectValues, ReasoningEffort } from './enums' + +export const ModalitySchema = z.enum(objectValues(Modality)) +export type ModalityType = z.infer + +export const ModelCapabilityTypeSchema = z.enum(objectValues(ModelCapability)) +export type ModelCapabilityType = z.infer + +// Thinking token limits schema (shared across reasoning types) +export const ThinkingTokenLimitsSchema = z.object({ + min: z.number().nonnegative().optional(), + max: z.number().positive().optional(), + default: z.number().nonnegative().optional() +}) + +/** Reasoning effort levels shared across providers */ +export const ReasoningEffortSchema = z.enum(objectValues(ReasoningEffort)) + +// Common reasoning fields shared across all reasoning type variants +// Exported for shared/runtime types to reuse +export const CommonReasoningFieldsSchema = { + thinkingTokenLimits: ThinkingTokenLimitsSchema.optional(), + supportedEfforts: z.array(ReasoningEffortSchema).optional(), + interleaved: z.boolean().optional() +} + +/** + * Reasoning support schema — describes model-level reasoning capabilities. + * + * This only captures WHAT the model supports (effort levels, token limits). + * HOW to invoke reasoning is defined by the provider's reasoning format + * (see provider.ts ProviderReasoningFormatSchema). + */ +export const ReasoningSupportSchema = z.object({ + ...CommonReasoningFieldsSchema +}) + +// Parameter support configuration +export const ParameterSupportSchema = z.object({ + temperature: z + .object({ + supported: z.boolean(), + range: NumericRangeSchema.optional() + }) + .optional(), + + topP: z + .object({ + supported: z.boolean(), + range: NumericRangeSchema.optional() + }) + .optional(), + + topK: z + .object({ + supported: z.boolean(), + range: NumericRangeSchema.optional() + }) + .optional(), + + frequencyPenalty: z.boolean().optional(), + presencePenalty: z.boolean().optional(), + maxTokens: z.boolean().optional(), + stopSequences: z.boolean().optional(), + systemMessage: z.boolean().optional() +}) + +// Model pricing configuration +export const ModelPricingSchema = z.object({ + input: PricePerTokenSchema, + output: PricePerTokenSchema, + + cacheRead: PricePerTokenSchema.optional(), + cacheWrite: PricePerTokenSchema.optional(), + + perImage: z + .object({ + price: z.number(), + currency: ZodCurrencySchema, + unit: z.enum(['image', 'pixel']).optional() + }) + .optional(), + + perMinute: z + .object({ + price: z.number(), + currency: ZodCurrencySchema + }) + .optional() +}) + +// Model configuration schema +export const ModelConfigSchema = z.object({ + // Basic information + id: ModelIdSchema, + name: z.string().optional(), + description: z.string().optional(), + + // Capabilities + capabilities: z + .array(ModelCapabilityTypeSchema) + .refine((arr) => new Set(arr).size === arr.length, { + message: 'Capabilities must be unique' + }) + .optional(), + + // Modalities + inputModalities: z + .array(ModalitySchema) + .refine((arr) => new Set(arr).size === arr.length, { + message: 'Input modalities must be unique' + }) + .optional(), + outputModalities: z + .array(ModalitySchema) + .refine((arr) => new Set(arr).size === arr.length, { + message: 'Output modalities must be unique' + }) + .optional(), + + // Limits + contextWindow: z.number().optional(), + maxOutputTokens: z.number().optional(), + maxInputTokens: z.number().optional(), + + // Pricing + pricing: ModelPricingSchema.optional(), + + // Reasoning support (model capabilities only, no provider-specific params) + reasoning: ReasoningSupportSchema.optional(), + + // Parameter support + parameterSupport: ParameterSupportSchema.optional(), + + // Model family (e.g., "GPT-4", "Claude 3") + family: z.string().optional(), + + // Original creator of the model (e.g., "anthropic", "google", "openai") + // This is the original publisher/creator, not the aggregator that hosts the model + ownedBy: z.string().optional(), + + // Whether the model has open weights (from models.dev) + openWeights: z.boolean().optional(), + + // Date version variants (same capabilities, different snapshots) + // Example: gpt-4-turbo's variants: ["gpt-4-turbo-2024-04-09", "gpt-4-turbo-2024-01-25"] + alias: z.array(ModelIdSchema).optional(), + + // Additional metadata + metadata: MetadataSchema +}) + +// Model list container schema for JSON files +export const ModelListSchema = z.object({ + version: VersionSchema, + models: z.array(ModelConfigSchema) +}) + +export type ThinkingTokenLimits = z.infer +export type ReasoningSupport = z.infer +export type ParameterSupport = z.infer +export type ModelPricing = z.infer +export type ModelConfig = z.infer +export type ModelList = z.infer diff --git a/packages/provider-registry/src/schemas/provider-models.ts b/packages/provider-registry/src/schemas/provider-models.ts new file mode 100644 index 00000000000..30467532f7e --- /dev/null +++ b/packages/provider-registry/src/schemas/provider-models.ts @@ -0,0 +1,87 @@ +/** + * Provider-Model mapping schema definitions + * Defines how providers can override specific model configurations + * + * This file was renamed from override.ts for clearer semantics + */ + +import * as z from 'zod' + +import { ModelIdSchema, ProviderIdSchema, VersionSchema } from './common' +import { + ModalitySchema, + ModelCapabilityTypeSchema, + ModelPricingSchema, + ParameterSupportSchema, + ReasoningSupportSchema +} from './model' +import { EndpointTypeSchema } from './provider' + +export const CapabilityOverrideSchema = z.object({ + add: z.array(ModelCapabilityTypeSchema).optional(), // Add capabilities + remove: z.array(ModelCapabilityTypeSchema).optional(), // Remove capabilities + force: z.array(ModelCapabilityTypeSchema).optional() // Force set capabilities (ignore base config) +}) + +// ═══════════════════════════════════════════════════════════════════════════════ +// Provider-Model Override Schema +// ═══════════════════════════════════════════════════════════════════════════════ + +export const ProviderModelOverrideSchema = z.object({ + // Identification + providerId: ProviderIdSchema, + modelId: ModelIdSchema, // Canonical/normalized ID (references models.json) + + // API Model ID - The actual ID used when calling the provider's API + // This preserves the original provider-specific ID format + // Examples: + // - OpenRouter: "anthropic/claude-3-5-sonnet" + // - AIHubMix: "claude-3-5-sonnet" + // - Vertex AI: "global.anthropic.claude-3-5-sonnet-v1:0" + // If not set, modelId is used for API calls + apiModelId: z.string().optional(), + + // Variant identification (for same model with different variants) + // Used to distinguish variants like :free, :thinking, -search + // providerId + modelId + modelVariant forms the unique identifier + modelVariant: z.string().optional(), // 'free', 'thinking', 'nitro', 'search', etc. + + // Override configuration + capabilities: CapabilityOverrideSchema.optional(), + limits: z + .object({ + contextWindow: z.number().optional(), + maxOutputTokens: z.number().optional(), + maxInputTokens: z.number().optional() + }) + .optional(), + pricing: ModelPricingSchema.partial().optional(), + reasoning: ReasoningSupportSchema.optional(), + parameterSupport: ParameterSupportSchema.partial().optional(), + + // Endpoint type overrides (when model uses different endpoints than provider default) + endpointTypes: z.array(EndpointTypeSchema).optional(), + + // Modality overrides (when provider supports different modalities than base model) + inputModalities: z.array(ModalitySchema).optional(), + outputModalities: z.array(ModalitySchema).optional(), + + // Status control + disabled: z.boolean().optional(), + replaceWith: ModelIdSchema.optional(), + + // Metadata + reason: z.string().optional(), + priority: z.number().default(0) +}) + +// Container schema for JSON files +export const ProviderModelListSchema = z.object({ + version: VersionSchema, + overrides: z.array(ProviderModelOverrideSchema) +}) + +// Type exports +export type CapabilityOverride = z.infer +export type ProviderModelOverride = z.infer +export type ProviderModelList = z.infer diff --git a/packages/provider-registry/src/schemas/provider.ts b/packages/provider-registry/src/schemas/provider.ts new file mode 100644 index 00000000000..1036ec2e307 --- /dev/null +++ b/packages/provider-registry/src/schemas/provider.ts @@ -0,0 +1,238 @@ +/** + * Provider configuration schema definitions + * Defines the structure for provider connections and API configurations + */ + +import * as z from 'zod' + +import { MetadataSchema, ProviderIdSchema, VersionSchema } from './common' +import { EndpointType, objectValues, ReasoningEffort } from './enums' +import { CommonReasoningFieldsSchema } from './model' + +export const EndpointTypeSchema = z.enum(objectValues(EndpointType)) + +// ═══════════════════════════════════════════════════════════════════════════════ +// API Features +// ═══════════════════════════════════════════════════════════════════════════════ + +/** API feature flags controlling request construction at the SDK level */ +export const ApiFeaturesSchema = z.object({ + // --- Request format flags --- + + /** Whether the provider supports array-formatted content in messages */ + arrayContent: z.boolean().optional(), + /** Whether the provider supports stream_options for usage data */ + streamOptions: z.boolean().optional(), + + // --- Provider-specific parameter flags --- + + /** Whether the provider supports the 'developer' role (OpenAI-specific) */ + developerRole: z.boolean().optional(), + /** Whether the provider supports service tier selection (OpenAI/Groq-specific) */ + serviceTier: z.boolean().optional(), + /** Whether the provider supports verbosity settings (Gemini-specific) */ + verbosity: z.boolean().optional(), + /** Whether the provider supports enable_thinking parameter */ + enableThinking: z.boolean().optional() +}) + +// ═══════════════════════════════════════════════════════════════════════════════ +// Provider Reasoning Format +// +// Describes HOW a provider's API expects reasoning parameters to be formatted. +// This is a provider-level concern — model-level reasoning capabilities +// (effort levels, token limits) are in model.ts ReasoningSupportSchema. +// ═══════════════════════════════════════════════════════════════════════════════ + +const ReasoningEffortSchema = z.enum(objectValues(ReasoningEffort)) + +/** Provider reasoning format — discriminated union by format type */ +export const ProviderReasoningFormatSchema = z.discriminatedUnion('type', [ + z.object({ + type: z.literal('openai-chat'), + params: z + .object({ + reasoningEffort: ReasoningEffortSchema.optional() + }) + .optional() + }), + z.object({ + type: z.literal('openai-responses'), + params: z + .object({ + reasoning: z.object({ + effort: ReasoningEffortSchema.optional(), + summary: z.enum(['auto', 'concise', 'detailed']).optional() + }) + }) + .optional() + }), + z.object({ + type: z.literal('anthropic'), + params: z + .object({ + type: z.union([z.literal('enabled'), z.literal('disabled'), z.literal('adaptive')]), + budgetTokens: z.number().optional(), + effort: ReasoningEffortSchema.optional() + }) + .optional() + }), + z.object({ + type: z.literal('gemini'), + params: z + .union([ + z + .object({ + thinkingConfig: z.object({ + includeThoughts: z.boolean().optional(), + thinkingBudget: z.number().optional() + }) + }) + .optional(), + z + .object({ + thinkingLevel: z.enum(['minimal', 'low', 'medium', 'high']).optional() + }) + .optional() + ]) + .optional() + }), + z.object({ + type: z.literal('openrouter'), + params: z + .object({ + reasoning: z + .object({ + effort: z + .union([ + z.literal('none'), + z.literal('minimal'), + z.literal('low'), + z.literal('medium'), + z.literal('high') + ]) + .optional(), + maxTokens: z.number().optional(), + exclude: z.boolean().optional() + }) + .refine( + (v) => v.effort == null || v.maxTokens == null, + 'Only one of effort or maxTokens can be specified, not both' + ) + }) + .optional() + }), + z.object({ + type: z.literal('enable-thinking'), + params: z + .object({ + enableThinking: z.boolean(), + thinkingBudget: z.number().optional() + }) + .optional(), + ...CommonReasoningFieldsSchema + }), + z.object({ + type: z.literal('thinking-type'), + params: z + .object({ + thinking: z.object({ + type: z.union([z.literal('enabled'), z.literal('disabled'), z.literal('auto')]) + }) + }) + .optional() + }), + z.object({ + type: z.literal('dashscope'), + params: z + .object({ + enableThinking: z.boolean(), + incrementalOutput: z.boolean().optional() + }) + .optional() + }), + z.object({ + type: z.literal('self-hosted'), + params: z + .object({ + chatTemplateKwargs: z.object({ + enableThinking: z.boolean().optional(), + thinking: z.boolean().optional() + }) + }) + .optional() + }) +]) + +// ═══════════════════════════════════════════════════════════════════════════════ +// Provider Config +// ═══════════════════════════════════════════════════════════════════════════════ + +export const ProviderWebsiteSchema = z.object({ + website: z.object({ + official: z.url().optional(), + docs: z.url().optional(), + apiKey: z.url().optional(), + models: z.url().optional() + }) +}) + +/** Per-endpoint-type configuration in registry */ +export const RegistryEndpointConfigSchema = z.object({ + /** Base URL for this endpoint type's API */ + baseUrl: z.url().optional(), + /** URLs for fetching available models via this endpoint type */ + modelsApiUrls: z + .object({ + /** Default models listing endpoint */ + default: z.url().optional(), + /** Embedding models listing endpoint (if separate from default) */ + embedding: z.url().optional(), + /** Reranker models listing endpoint (if separate from default) */ + reranker: z.url().optional() + }) + .optional(), + /** How this endpoint type expects reasoning parameters to be formatted */ + reasoningFormat: ProviderReasoningFormatSchema.optional() +}) + +export const ProviderConfigSchema = z + .object({ + /** Unique provider identifier */ + id: ProviderIdSchema, + /** Display name */ + name: z.string(), + /** Provider description */ + description: z.string().optional(), + /** Per-endpoint-type configuration */ + endpointConfigs: z.record(EndpointTypeSchema, RegistryEndpointConfigSchema).optional(), + /** Default endpoint type for chat requests (must exist in endpointConfigs) */ + defaultChatEndpoint: EndpointTypeSchema.optional(), + /** API feature flags controlling request construction */ + apiFeatures: ApiFeaturesSchema.optional(), + /** Additional metadata including website URLs */ + metadata: MetadataSchema.and(ProviderWebsiteSchema) + }) + .refine( + (data) => { + if (data.defaultChatEndpoint && data.endpointConfigs) { + return data.defaultChatEndpoint in data.endpointConfigs + } + return true + }, + { + message: 'defaultChatEndpoint must exist as a key in endpointConfigs' + } + ) + +export const ProviderListSchema = z.object({ + version: VersionSchema, + providers: z.array(ProviderConfigSchema) +}) + +export { ENDPOINT_TYPE } from './enums' +export type ApiFeatures = z.infer +export type ProviderReasoningFormat = z.infer +export type RegistryEndpointConfig = z.infer +export type ProviderConfig = z.infer +export type ProviderList = z.infer diff --git a/packages/provider-registry/src/utils/importers/base/base-transformer.ts b/packages/provider-registry/src/utils/importers/base/base-transformer.ts new file mode 100644 index 00000000000..f906f421f98 --- /dev/null +++ b/packages/provider-registry/src/utils/importers/base/base-transformer.ts @@ -0,0 +1,857 @@ +/** + * Base transformer interface and OpenAI-compatible base class + * Provides structure for transforming provider API responses to internal ModelConfig + */ + +import type { ModelCapabilityType, ModelConfig } from '../../../schemas' +import { MODALITY, type Modality, MODEL_CAPABILITY } from '../../../schemas/enums' + +/** + * Generic transformer interface + */ +export interface ITransformer { + /** + * Transform API model to internal ModelConfig + */ + transform(apiModel: TInput): ModelConfig + + /** + * Optional: Validate API response structure + */ + validate?(response: any): boolean + + /** + * Optional: Extract models array from response + */ + extractModels?(response: any): TInput[] +} + +/** + * Known model ID patterns to original publisher mapping + * Used by all transformers to determine the original model creator + */ +export const MODEL_TO_PUBLISHER: [RegExp, string][] = [ + // Anthropic Claude models + [/^claude/, 'anthropic'], + // OpenAI models (including text-embedding-ada, text-embedding-3-*) + [/^(gpt-|o1|o3|o4|chatgpt|dall-e|whisper|tts-|sora|text-embedding-ada|text-embedding-3|babbage|davinci)/, 'openai'], + // Google models (including text-embedding-004, text-embedding-005) + [/^(gemini|palm|gemma|veo|imagen|learnlm|text-embedding-00|text-multilingual-embedding-00|nano-banana)/, 'google'], + // Alibaba/Qwen models (including text-embedding-v*) + [/^(qwen|qvq|qwq|wan|text-embedding-v|gte)/, 'alibaba'], + // Meta models + [/^llama/, 'meta'], + // Mistral models + [/^(voxtral|devstral|mistral|mixtral|codestral|ministral|pixtral|magistral)/, 'mistral'], + // DeepSeek models + [/^deepseek/, 'deepseek'], + // Cohere models + [/^(command|embed-|rerank-)/, 'cohere'], + // xAI Grok models + [/^grok/, 'xai'], + // Microsoft Phi models + [/^phi-/, 'microsoft'], + // 01.ai Yi models + [/^yi-/, '01ai'], + // Zhipu GLM models + [/^(glm|cogview|cogvideo)/, 'zhipu'], + // Stability AI models + [/^(stable-|sd3|sdxl)/, 'stability'], + // Perplexity models + [/^(sonar|pplx-)/, 'perplexity'], + // Amazon models + [/^nova-/, 'amazon'], + // Baidu ERNIE models + [/^ernie/, 'baidu'], + // Moonshot/Kimi models + [/^(moonshot|kimi)/, 'moonshot'], + // 360 models + [/^360/, '360ai'], + // ByteDance Doubao models + [/^(doubao|seed|ui-tars)/, 'bytedance'], + // MiniMax models + [/^(abab|minimax)/, 'minimax'], + // Baichuan models + [/^baichuan/, 'baichuan'], + // Nvidia models + [/^(nvidia|nemotron)/, 'nvidia'], + // AI21 models + [/^jamba/, 'ai21'], + // Inflection models + [/^inflection/, 'inflection'], + // Voyage models + [/^voyage/, 'voyage'], + // Jina models + [/^jina/, 'jina'], + // BGE models (BAAI) + [/^bge/, 'baai'], + // StreamLake modelsp + [/^kat/, 'streamlake'], + // allenai models + [/^(olmo|molmo)/, 'ai2'], + [/^(flux)/, 'bfl'], + [/^(lfm)/, 'liquidai'], + [/^(longcat)/, 'meituan'], + [/^(trinity|spotlight|virtuoso|coder-large)/, 'arceeai'], + [/^(solar)/, 'upstageai'], + [/^(step)/, 'stepfun'], + [/^(ling|ring)/, 'bailing'], + [/^cogito/, 'cogito'], + [/^rnj/, 'essentialai'], + [/^dolphin/, 'dolphinai'], + [/^ideogram/, 'ideogram'], + [/^hunyuan/, 'tencent'], + [/^morph/, 'morph'], + [/^mercury/, 'inception'], + [/^(hermes|deephermes)/, 'nousresearch'], + [/^recraft/, 'recraft'], + [/^runway/, 'runway'], + [/^eleven/, 'elevenlabs'], + [/^relace/, 'relace'], + [/^riverflow/, 'sourceful'], + [/^sensenova/, 'sensenova'], + [/^intern/, 'intern'], + [/^kling/, 'kling'], + [/^vidu/, 'vidu'], + [/^suno/, 'suno'], + [/^kolors/, 'kolors'], + [/^megrez/, 'infini'], + [/^aion/, 'aion'] +] + +// ═══════════════════════════════════════════════════════════════════════════════ +// Capability Detection Patterns (match + exclude) +// Each entry: [matchRegex, excludeRegex | null, capability] +// Based on renderer-layer detection logic in src/renderer/src/config/models/ +// ═══════════════════════════════════════════════════════════════════════════════ + +/** Reasoning model detection — based on renderer reasoning.ts REASONING_REGEX + model checks */ +const REASONING_MATCH = + /^(?!.*\bnon-reasoning\b)(o\d+(?:-[\w-]+)?$|.*\b(?:reasoning|reasoner|thinking|think)\b.*|.*-r\d+.*|.*\bqwq\b.*|.*\bqvq\b.*|.*\bhunyuan-t1\b.*|.*\bglm-zero-preview\b.*|.*\bgrok-(?:3-mini|4|4-fast|4-1)(?:-[\w-]+)?\b.*|.*\bclaude-(?:3-7|sonnet-4|opus-4|haiku-4)\b.*|.*\bgemini-(?:2-5|3-)(?!.*image).*|.*\bdoubao-(?:seed-1-[68]|1-5-thinking|seed-code)\b.*|.*\bdeepseek-(?:v3|chat)\b.*|.*\bbaichuan-m[23]\b.*|.*\bminimax-m[12]\b.*|.*\bstep-[r3]\b.*|.*\bmagistral\b.*|.*\bmimo-v2\b.*|.*\bsonar-deep-research\b.*)/i +const REASONING_EXCLUDE = /\b(embed|rerank|dall-e|stable-diffusion|whisper|tts-|sdxl|flux|cogview|imagen)\b/i + +/** Function calling detection — based on renderer tooluse.ts FUNCTION_CALLING_MODELS */ +const FUNCTION_CALL_MATCH = + /\b(?:gpt-4o|gpt-4-|gpt-4[.-][15]|gpt-5|o[134](?:-[\w-]+)?|claude|qwen[23]?(?:-[\w-]+)?|hunyuan|deepseek|glm-4|gemini-(?:2|3|flash|pro)|grok-[34]|doubao-seed|kimi-k2|minimax-m2|mimo-v2|mistral-large|llama-4)/i +const FUNCTION_CALL_EXCLUDE = + /\b(?:o1-mini|o1-preview|gemini-1[.-]|imagen|aqa|qwen-mt|gpt-5-chat|glm-4[.-]5v|deepseek-v3[.-]2-speciale|embed|rerank|dall-e|stable-diffusion|whisper|tts-|sdxl|flux|cogview)\b/i + +/** Vision/image recognition detection — based on renderer vision.ts visionAllowedModels */ +const VISION_MATCH = + /(-vision|-vl\b|-visual|vision-|vl-|4v|\bllava\b|\bminicpm\b|\bpixtral\b|\binternvl|\bgpt-4o\b|\bgpt-4-(?!32k|base)\b|\bgpt-4[.-][15]\b|\bgpt-5\b|\bo[134](?:-[\w-]+)?$|\bclaude-(?:3|haiku-4|sonnet-4|opus-4)\b|\bgemini-(?:1-5|2|3-(?:flash|pro))\b|\bgemma-?3\b|\bqwen(?:2|2[.-]5|3)-vl\b|\bqwen(?:2[.-]5|3)-omni\b|\bgrok-(?:4|vision)\b|\bdoubao-seed-1-[68]\b|\bkimi-(?:latest|vl|thinking)\b|\bllama-4\b)/i +const VISION_EXCLUDE = + /\b(?:gpt-4-\d+-preview|gpt-4-turbo-preview|gpt-4-32k|gpt-4o-image|gpt-image|o1-mini|o3-mini|o1-preview|embed|rerank|dall-e|stable-diffusion|sd3|sdxl|flux|cogview|imagen|midjourney|ideogram|sora|runway|pika|kling|veo|vidu|wan|whisper|tts-)\b/i + +/** Web search detection — models with built-in or API-supported web search */ +const WEB_SEARCH_MATCH = + /(-search\b|-online\b|searchgpt|\bsonar\b|\bgpt-4o\b|\bgpt-4[.-]1\b|\bgpt-4[.-]5\b|\bgpt-5\b|\bo[34](?:-[\w-]+)?$|\bclaude-(?:3-[57]-sonnet|3-5-haiku|sonnet-4|opus-4|haiku-4)\b|\bgemini-(?:2(?!.*image-preview).*|3-(?:flash|pro))\b|\bgrok-)/i +const WEB_SEARCH_EXCLUDE = + /\b(?:gpt-4o-image|gpt-4[.-]1-nano|embed|rerank|dall-e|stable-diffusion|whisper|tts-|sdxl|flux|cogview|imagen)\b/i + +/** File/document input detection — only models whose name definitively indicates file/doc processing + * Most FILE_INPUT capability comes from: + * 1. models.dev `attachment` field (modelsdev/transformer.ts) + * 2. Provider-level overrides (generate-provider-models.ts) for OpenAI/Anthropic/Google + * This regex is intentionally narrow — only models with document-specific naming */ +const FILE_INPUT_MATCH = /\b(?:qwen-(?:long|doc)\b|[-_]ocr\b)/i + +/** Computer use detection — models with API-supported computer/desktop interaction + * Anthropic: claude-sonnet-4, claude-opus-4, claude-3-7-sonnet, claude-3-5-sonnet (beta) + * OpenAI: computer-use-preview (CUA via Responses API) */ +const COMPUTER_USE_MATCH = /\b(?:claude-(?:sonnet-4|opus-4|3-[57]-sonnet|haiku-4)|computer-use)/i +const COMPUTER_USE_EXCLUDE = /\b(?:embed|rerank|tts-|dall-e|stable-diffusion|sdxl|flux|cogview|imagen|whisper)\b/i + +/** + * Model ID patterns that indicate specific capabilities + * Format: [matchRegex, excludeRegex | null, capability] + * Used to infer capabilities from model naming conventions + */ +export const CAPABILITY_PATTERNS: [RegExp, RegExp | null, ModelCapabilityType][] = [ + // Reasoning/thinking models + [REASONING_MATCH, REASONING_EXCLUDE, MODEL_CAPABILITY.REASONING], + // Function calling + [FUNCTION_CALL_MATCH, FUNCTION_CALL_EXCLUDE, MODEL_CAPABILITY.FUNCTION_CALL], + // Embedding models + [/(embed|embedding|bge-|e5-|gte-)/, null, MODEL_CAPABILITY.EMBEDDING], + // Reranker models + [/(rerank|reranker)/, null, MODEL_CAPABILITY.RERANK], + // Vision/multimodal models + [VISION_MATCH, VISION_EXCLUDE, MODEL_CAPABILITY.IMAGE_RECOGNITION], + // File/document input (PDF, etc.) — narrow regex, most detection via models.dev + provider overrides + [FILE_INPUT_MATCH, null, MODEL_CAPABILITY.FILE_INPUT], + // Image generation models + [/(dall-e|stable-diffusion|sd3|sdxl|flux|image|imagen|midjourney|ideogram)/, null, MODEL_CAPABILITY.IMAGE_GENERATION], + // Video generation models + [/(sora|runway|pika|kling|veo|luma|gen-3|video|vidu|wan)/, null, MODEL_CAPABILITY.VIDEO_GENERATION], + // Audio transcription models + [/(whisper)/, null, MODEL_CAPABILITY.AUDIO_TRANSCRIPT], + // TTS models + [/(tts-)/, null, MODEL_CAPABILITY.AUDIO_GENERATION], + // Web search models + [WEB_SEARCH_MATCH, WEB_SEARCH_EXCLUDE, MODEL_CAPABILITY.WEB_SEARCH], + // Computer use / desktop interaction + [COMPUTER_USE_MATCH, COMPUTER_USE_EXCLUDE, MODEL_CAPABILITY.COMPUTER_USE] +] + +/** + * Known official model aliases (from provider documentation) + * Format: normalized model ID -> alias array + * Only include officially documented aliases, not auto-generated ones + */ +export const OFFICIAL_ALIASES: Record = { + // Anthropic Claude 4.5 models + 'claude-sonnet-4-5-20250929': ['claude-sonnet-4-5'], + 'claude-haiku-4-5-20251001': ['claude-haiku-4-5'], + 'claude-opus-4-5-20251101': ['claude-opus-4-5'], + // Anthropic Claude 4 models + 'claude-sonnet-4-20250514': ['claude-sonnet-4', 'claude-sonnet-4-0'], + 'claude-opus-4-20250514': ['claude-opus-4', 'claude-opus-4-0'], + // Anthropic Claude 3.7 models + 'claude-3-7-sonnet-20250219': ['claude-3-7-sonnet', 'claude-3-7-sonnet-latest'], + // Anthropic Claude 3.5 models + 'claude-3-5-sonnet-20241022': ['claude-3-5-sonnet', 'claude-3-5-sonnet-latest'], + 'claude-3-5-sonnet-20240620': ['claude-3-5-sonnet-v1'], + 'claude-3-5-haiku-20241022': ['claude-3-5-haiku', 'claude-3-5-haiku-latest'] +} + +/** + * Common aggregator prefixes that should be stripped from model IDs + * These are routing/deployment prefixes, not actual model name parts + * Note: More specific prefixes must come before shorter ones (e.g., 'zai-org-' before 'zai-') + */ +export const COMMON_AGGREGATOR_PREFIXES = [ + // AIHubMix routing prefixes + 'aihubmix-', + 'aihub-', + 'ahm-', + // Cloud provider routing + 'alicloud-', + 'azure-', + 'baidu-', + 'cbs-', + 'cc-', + 'sf-', + 's-', + 'bai-', + 'mm-', + 'web-', + // Platform aggregators + 'deepinfra-', + 'groq-', + 'nvidia-', + 'sophnet-', + // Legacy prefixes + 'zai-org-', // Must be before zai- + 'zai-', + 'lucidquery-', + 'lucidnova-', + 'lucid-', + 'siliconflow-', + 'chutes-', + 'huoshan-', + 'meta-', + 'cohere-', + 'coding-', + 'dmxapi-', + 'perplexity-', + 'ai21-', + 'openai-', + // Underscore-based prefixes + 'dmxapi_', + 'aistudio_' +] + +/** + * Known abbreviated prefixes and their canonical expansions + * These are brand abbreviations that providers use in model IDs + * Applied after aggregator prefix stripping to restore canonical names + */ +export const PREFIX_EXPANSIONS: [string, string][] = [ + ['mm-', 'minimax-'] // MiniMax shorthand: mm-m2-1 → minimax-m2-1 +] + +/** + * Common colon-based variant suffixes (OpenRouter style) + */ +export const COLON_VARIANT_SUFFIXES = [':free', ':nitro', ':extended', ':beta', ':preview', ':thinking', ':exacto'] + +/** + * Common hyphen-based variant suffixes + * These are provider-specific deployment variants that should be stripped + */ +export const HYPHEN_VARIANT_SUFFIXES = [ + '-free', + '-search', + '-online', + '-think', + '-reasoning', + '-classic', + '-low', + '-high', + '-minimal', + '-medium', + '-nothink', + '-no-think', + '-ssvip', + '-thinking', + '-nothinking', + '-aliyun', + '-huoshan', + '-tee', // Trusted Execution Environment variant + '-cc', // Cloud compute variant + '-fw', // Firewall/provider-specific variant + '-di', // Provider-specific variant + '-t', // Test/provider-specific variant + '-reverse' // Reverse proxy variant +] + +/** + * Common parentheses-based variant suffixes + */ +export const PAREN_VARIANT_SUFFIXES = ['(free)', '(beta)', '(preview)', '(thinking)'] + +/** + * Compound prefixes that protect a hyphen-based suffix from being stripped. + * e.g., "non-" before "-reasoning" means "non-reasoning" is part of the model name, + * not a variant suffix. + */ +const PROTECTED_COMPOUND_PREFIXES = ['non', 'no', 'pre', 'anti', 'post'] + +/** + * Normalize version separators in model IDs + * Converts comma, dot, and 'p' separators to hyphen (-) + * Existing hyphens are left unchanged to avoid date pattern issues + * + * IMPORTANT: Call this AFTER stripping parameter sizes to avoid + * converting decimal parameter sizes like 1.5b to 1-5b + * + * Examples: + * - gpt-3,5-turbo → gpt-3-5-turbo + * - gpt-3p5-turbo → gpt-3-5-turbo + * - claude-3.5-sonnet → claude-3-5-sonnet (dot to hyphen) + * - claude-3-5-sonnet → claude-3-5-sonnet (unchanged) + * - deepseek-r1-0728 → deepseek-r1-0728 (unchanged, date pattern) + */ +export function normalizeVersionSeparators(modelId: string): string { + // Normalize comma, dot, and 'p' separators between digits to hyphen + // Uses lookahead so the trailing digit isn't consumed, allowing overlapping matches + // e.g., "4.0.1" → "4-0-1" (without lookahead, "4.0.1" → "4-0.1" because "0" is consumed) + return modelId.replace(/(\d)[,.p](?=\d)/g, '$1-') +} + +/** + * Parameter size pattern: matches Xb or X.Xb where X is a number + * Must be preceded by a hyphen and optionally followed by hyphen+suffix or end of string + * Examples: -72b, -7b, -1.5b, -72b-instruct + */ +const PARAMETER_SIZE_PATTERN = /-(\d+(?:\.\d+)?b)(?=-|$)/i + +/** + * Extract parameter size suffix from model ID + * Returns the parameter size (e.g., "72b", "7b", "1.5b") or undefined if not found + * + * Examples: + * - qwen-2.5-72b → "72b" + * - llama-3.1-70b-instruct → "70b" + * - qwen-2.5-1.5b → "1.5b" + * - gpt-4o → undefined + */ +export function extractParameterSize(modelId: string): string | undefined { + const match = modelId.match(PARAMETER_SIZE_PATTERN) + return match ? match[1].toLowerCase() : undefined +} + +/** + * Strip parameter size suffix from model ID + * Removes the parameter size but keeps other suffixes like -instruct, -chat + * + * Examples: + * - qwen-2.5-72b → qwen-2.5 + * - llama-3.1-70b-instruct → llama-3.1-instruct + * - gpt-4o → gpt-4o (unchanged) + */ +export function stripParameterSize(modelId: string): string { + return modelId.replace(PARAMETER_SIZE_PATTERN, '') +} + +/** + * Infer publisher from a normalized model ID using MODEL_TO_PUBLISHER patterns + */ +export function inferPublisherFromModelId(normalizedModelId: string): string | undefined { + const lowerId = normalizedModelId.toLowerCase() + for (const [pattern, publisher] of MODEL_TO_PUBLISHER) { + if (pattern.test(lowerId)) { + return publisher + } + } + return undefined +} + +/** + * Infer capabilities from model ID using CAPABILITY_PATTERNS + * Each pattern has an optional exclude regex to prevent false positives + */ +export function inferCapabilitiesFromModelId(modelId: string): ModelCapabilityType[] { + const caps: ModelCapabilityType[] = [] + const lowerId = modelId.toLowerCase() + + for (const [match, exclude, capability] of CAPABILITY_PATTERNS) { + if (match.test(lowerId) && (!exclude || !exclude.test(lowerId))) { + caps.push(capability) + } + } + + return caps +} + +/** + * Get official aliases for a normalized model ID + */ +export function getOfficialAliases(normalizedModelId: string): string[] | undefined { + return OFFICIAL_ALIASES[normalizedModelId] +} + +/** + * Map raw modality strings to internal Modality type + * Handles common variations in modality naming + */ +export function mapModalityString(modality: string): Modality | undefined { + const normalized = modality.toLowerCase().trim() + + switch (normalized) { + case 'text': + return MODALITY.TEXT + case 'image': + return MODALITY.IMAGE + case 'audio': + return MODALITY.AUDIO + case 'video': + return MODALITY.VIDEO + case 'embedding': + case 'embeddings': + return MODALITY.VECTOR + default: + return undefined + } +} + +/** + * Map an array of modality strings to internal Modality array + * Defaults to ['TEXT'] if no valid modalities found + */ +export function mapModalities(modalityList: string[]): Modality[] { + const modalities = new Set() + + for (const m of modalityList) { + const mapped = mapModalityString(m) + if (mapped) { + modalities.add(mapped) + } + } + + const result = Array.from(modalities) + return result.length > 0 ? result : [MODALITY.TEXT] +} + +/** + * Normalize a model ID to its canonical form: + * 1. Strip provider prefix (e.g., "anthropic/claude-3" -> "claude-3") + * 2. Convert to lowercase + * 3. Strip aggregator prefixes (zai-xxx -> xxx) + * 4. Strip variant suffixes (:free, -free, etc.) + * 5. Strip parameter size suffix (72b, 7b, 1.5b) - BEFORE version normalization + * 6. Normalize version separators (3.5, 3,5, 3p5 → 3-5) + * + * This is the single source of truth for model ID normalization. + * All parsers and scripts should use this function. + */ +export function normalizeModelId(modelId: string): string { + const parts = modelId.split('/') + let baseName = parts[parts.length - 1].toLowerCase() + baseName = stripAggregatorPrefixes(baseName) + baseName = expandKnownPrefixes(baseName) + baseName = stripVariantSuffixes(baseName) + baseName = stripParameterSize(baseName) + baseName = normalizeVersionSeparators(baseName) + return baseName +} + +/** + * Strip aggregator prefixes from a model ID + */ +export function stripAggregatorPrefixes(modelId: string, additionalPrefixes: string[] = []): string { + const allPrefixes = [...additionalPrefixes, ...COMMON_AGGREGATOR_PREFIXES] + let result = modelId + + for (const prefix of allPrefixes) { + if (result.startsWith(prefix)) { + result = result.slice(prefix.length) + break // Only remove one prefix + } + } + + return result +} + +/** + * Expand known abbreviated prefixes to their canonical form + * e.g., mm-m2-1 → minimax-m2-1 + */ +export function expandKnownPrefixes(modelId: string): string { + for (const [abbrev, canonical] of PREFIX_EXPANSIONS) { + if (modelId.startsWith(abbrev)) { + return canonical + modelId.slice(abbrev.length) + } + } + return modelId +} + +/** + * Strip variant suffixes from a model ID + * Handles colon-based (:free), hyphen-based (-free), and parentheses-based ((free)) suffixes + */ +export function stripVariantSuffixes( + modelId: string, + options: { + colonSuffixes?: string[] + hyphenSuffixes?: string[] + parenSuffixes?: string[] + officialModelsWithSuffix?: Set + } = {} +): string { + const colonSuffixes = options.colonSuffixes ?? COLON_VARIANT_SUFFIXES + const hyphenSuffixes = options.hyphenSuffixes ?? HYPHEN_VARIANT_SUFFIXES + const parenSuffixes = options.parenSuffixes ?? PAREN_VARIANT_SUFFIXES + const officialModels = options.officialModelsWithSuffix ?? new Set() + + // Don't strip if it's an official model + if (officialModels.has(modelId)) { + return modelId + } + + // Strip colon-based suffixes + const colonIdx = modelId.lastIndexOf(':') + if (colonIdx > 0) { + const suffix = modelId.slice(colonIdx) + if (colonSuffixes.includes(suffix)) { + return modelId.slice(0, colonIdx) + } + } + + // Strip hyphen-based suffixes + // Protect compound modifiers like "non-reasoning", "non-thinking" from being stripped + for (const suffix of hyphenSuffixes) { + if (modelId.endsWith(suffix)) { + const remaining = modelId.slice(0, -suffix.length) + if (PROTECTED_COMPOUND_PREFIXES.some((p) => remaining.endsWith(p))) { + continue + } + return remaining + } + } + + // Strip parentheses-based suffixes (with optional space before) + for (const suffix of parenSuffixes) { + if (modelId.endsWith(suffix)) { + let result = modelId.slice(0, -suffix.length) + // Also strip trailing space if present + if (result.endsWith(' ')) { + result = result.slice(0, -1) + } + return result + } + } + + return modelId +} + +/** + * Extract variant suffix from a model ID + * Returns the suffix without the leading character (: or - or parentheses) + */ +export function extractVariantSuffix( + modelId: string, + options: { + colonSuffixes?: string[] + hyphenSuffixes?: string[] + parenSuffixes?: string[] + officialModelsWithSuffix?: Set + } = {} +): string | undefined { + const colonSuffixes = options.colonSuffixes ?? COLON_VARIANT_SUFFIXES + const hyphenSuffixes = options.hyphenSuffixes ?? HYPHEN_VARIANT_SUFFIXES + const parenSuffixes = options.parenSuffixes ?? PAREN_VARIANT_SUFFIXES + const officialModels = options.officialModelsWithSuffix ?? new Set() + + const lowerModelId = modelId.toLowerCase() + + // Don't extract variant for official models + if (officialModels.has(lowerModelId)) { + return undefined + } + + // Check colon-based suffixes + const colonIdx = lowerModelId.lastIndexOf(':') + if (colonIdx > 0) { + const suffix = lowerModelId.slice(colonIdx) + if (colonSuffixes.includes(suffix)) { + return suffix.slice(1) // Remove leading ':' + } + } + + // Check hyphen-based suffixes + for (const suffix of hyphenSuffixes) { + if (lowerModelId.endsWith(suffix)) { + const remaining = lowerModelId.slice(0, -suffix.length) + if (PROTECTED_COMPOUND_PREFIXES.some((p) => remaining.endsWith(p))) { + continue + } + return suffix.slice(1) // Remove leading '-' + } + } + + // Check parentheses-based suffixes (with optional space before) + for (const suffix of parenSuffixes) { + if (lowerModelId.endsWith(suffix) || lowerModelId.endsWith(' ' + suffix)) { + // Return content without parentheses: "(free)" -> "free" + return suffix.slice(1, -1) + } + } + + return undefined +} + +/** + * Base class for OpenAI-compatible transformers + * Handles common patterns like extracting { data: [...] } responses + */ +export class OpenAICompatibleTransformer implements ITransformer { + /** + * Default implementation extracts from { data: [...] } or direct array + */ + extractModels(response: any): any[] { + if (Array.isArray(response.data)) { + return response.data + } + if (Array.isArray(response)) { + return response + } + throw new Error('Invalid API response structure: expected { data: [] } or []') + } + + /** + * Default transformation for OpenAI-compatible model responses + * Minimal transformation - most fields are optional + */ + transform(apiModel: any): ModelConfig { + // Normalize model ID to lowercase + const modelId = (apiModel.id || apiModel.model || '').toLowerCase() + + if (!modelId) { + throw new Error('Model ID is required') + } + + return { + id: modelId, + name: apiModel.name || modelId, + description: apiModel.description, + + capabilities: this.inferCapabilities(apiModel), + inputModalities: [MODALITY.TEXT], // Default to text + outputModalities: [MODALITY.TEXT], // Default to text + + contextWindow: apiModel.context_length || apiModel.context_window || undefined, + maxOutputTokens: apiModel.max_tokens || apiModel.max_output_tokens || undefined, + + pricing: this.extractPricing(apiModel), + + metadata: { + source: 'api', + owned_by: apiModel.owned_by, + tags: apiModel.tags || [], + created: apiModel.created, + updated: apiModel.updated + } + } + } + + /** + * Infer basic capabilities from model data + */ + protected inferCapabilities(apiModel: any): ModelCapabilityType[] | undefined { + const capabilities: ModelCapabilityType[] = [] + + // Check for common capability indicators + if (apiModel.supports_tools || apiModel.function_calling) { + capabilities.push(MODEL_CAPABILITY.FUNCTION_CALL) + } + if (apiModel.supports_vision || apiModel.vision) { + capabilities.push(MODEL_CAPABILITY.IMAGE_RECOGNITION) + } + if (apiModel.supports_json_output || apiModel.response_format) { + capabilities.push(MODEL_CAPABILITY.STRUCTURED_OUTPUT) + } + + return capabilities.length > 0 ? capabilities : undefined + } + + /** + * Extract pricing if available + */ + protected extractPricing(apiModel: any): ModelConfig['pricing'] { + if (!apiModel.pricing) return undefined + + const pricing = apiModel.pricing + + // Handle per-token pricing (convert to per-million) + if (pricing.prompt !== undefined && pricing.completion !== undefined) { + const inputCost = parseFloat(pricing.prompt) + const outputCost = parseFloat(pricing.completion) + + if (inputCost < 0 || outputCost < 0) return undefined + + return { + input: { perMillionTokens: inputCost * 1_000_000 }, + output: { perMillionTokens: outputCost * 1_000_000 } + } + } + + // Handle direct per-million pricing + if ( + pricing.input?.perMillionTokens != null && + pricing.output?.perMillionTokens != null && + !isNaN(pricing.input.perMillionTokens) && + !isNaN(pricing.output.perMillionTokens) + ) { + return { + input: { perMillionTokens: pricing.input.perMillionTokens }, + output: { perMillionTokens: pricing.output.perMillionTokens } + } + } + + return undefined + } +} + +/** + * Abstract base class for registry transformers + * Provides common functionality for normalizing model IDs, inferring publishers, etc. + */ +export abstract class BaseCatalogTransformer implements ITransformer { + /** + * Additional aggregator prefixes specific to this transformer + * Override in subclasses to add provider-specific prefixes + */ + protected readonly aggregatorPrefixes: string[] = [] + + /** + * Colon-based variant suffixes to strip + * Override in subclasses to customize + */ + protected readonly colonVariantSuffixes: string[] = COLON_VARIANT_SUFFIXES + + /** + * Hyphen-based variant suffixes to strip + * Override in subclasses to customize + */ + protected readonly hyphenVariantSuffixes: string[] = HYPHEN_VARIANT_SUFFIXES + + /** + * Official models that have suffix-like endings but should NOT be stripped + * Override in subclasses to customize + */ + protected readonly officialModelsWithSuffix: Set = new Set() + + /** + * Transform API model to internal ModelConfig + * Must be implemented by subclasses + */ + abstract transform(apiModel: TInput): ModelConfig + + /** + * Normalize a model ID by: + * 1. Removing provider prefix (e.g., "anthropic/claude-3" -> "claude-3") + * 2. Removing aggregator prefixes + * 3. Stripping variant suffixes + * 4. Stripping parameter size suffix (72b, 7b, 1.5b) - BEFORE version normalization + * 5. Normalizing version separators (3.5, 3,5, 3p5 → 3-5) + * 6. Converting to lowercase + */ + protected normalizeModelId(modelId: string): string { + // Split by '/' and take the last part + const parts = modelId.split('/') + let baseName = parts[parts.length - 1].toLowerCase() + + // Remove aggregator prefixes + baseName = stripAggregatorPrefixes(baseName, this.aggregatorPrefixes) + + // Expand known abbreviated prefixes (e.g., mm- → minimax-) + baseName = expandKnownPrefixes(baseName) + + // Strip variant suffixes + baseName = stripVariantSuffixes(baseName, { + colonSuffixes: this.colonVariantSuffixes, + hyphenSuffixes: this.hyphenVariantSuffixes, + officialModelsWithSuffix: this.officialModelsWithSuffix + }) + + // Strip parameter size suffix BEFORE version normalization + // This preserves decimal parameter sizes like 1.5b + baseName = stripParameterSize(baseName) + + // Normalize version separators (e.g., claude-3.5 → claude-3-5) + baseName = normalizeVersionSeparators(baseName) + + return baseName + } + + /** + * Extract parameter size from model ID + * Returns the size (e.g., "72b") or undefined + */ + protected getParameterSize(modelId: string): string | undefined { + // Normalize version first, then extract parameter size + const normalized = normalizeVersionSeparators(modelId.toLowerCase()) + return extractParameterSize(normalized) + } + + /** + * Infer the original model publisher from model ID + */ + protected inferPublisher(modelId: string): string | undefined { + return inferPublisherFromModelId(modelId) + } + + /** + * Get variant suffix from model ID if present + */ + protected getModelVariant(modelId: string): string | undefined { + return extractVariantSuffix(modelId, { + colonSuffixes: this.colonVariantSuffixes, + hyphenSuffixes: this.hyphenVariantSuffixes, + officialModelsWithSuffix: this.officialModelsWithSuffix + }) + } + + /** + * Get official aliases for a model ID + */ + protected getAlias(modelId: string): string[] | undefined { + const normalizedId = this.normalizeModelId(modelId) + return getOfficialAliases(normalizedId) + } + + /** + * Infer capabilities from model ID patterns + */ + protected inferCapabilitiesFromId(modelId: string): ModelCapabilityType[] { + return inferCapabilitiesFromModelId(modelId) + } + + /** + * Map modality strings to internal format + */ + protected mapModalities(modalityList: string[]): Modality[] { + return mapModalities(modalityList) + } +} diff --git a/packages/shared/data/api/schemas/index.ts b/packages/shared/data/api/schemas/index.ts index 9c8fa746fa6..c3bcadce11b 100644 --- a/packages/shared/data/api/schemas/index.ts +++ b/packages/shared/data/api/schemas/index.ts @@ -26,6 +26,8 @@ import type { KnowledgeSchemas } from './knowledges' import type { MCPServerSchemas } from './mcpServers' import type { MessageSchemas } from './messages' import type { MiniappSchemas } from './miniapps' +import type { ModelSchemas } from './models' +import type { ProviderSchemas } from './providers' import type { TestSchemas } from './test' import type { TopicSchemas } from './topics' import type { TranslateSchemas } from './translate' @@ -47,6 +49,8 @@ export type ApiSchemas = AssertValidSchemas< TestSchemas & TopicSchemas & MessageSchemas & + ModelSchemas & + ProviderSchemas & TranslateSchemas & FileProcessingSchemas & MCPServerSchemas & diff --git a/packages/shared/data/api/schemas/models.ts b/packages/shared/data/api/schemas/models.ts new file mode 100644 index 00000000000..75318ede465 --- /dev/null +++ b/packages/shared/data/api/schemas/models.ts @@ -0,0 +1,141 @@ +/** + * Model API Schema definitions + * + * Contains all model-related endpoints for CRUD operations. + * DTO types are derived from Zod schemas in ../../types/model + */ + +import * as z from 'zod' + +import { + type Model, + ParameterSupportDbSchema, + RuntimeModelPricingSchema, + RuntimeReasoningSchema +} from '../../types/model' + +/** Query parameters for listing models */ +const ListModelsQuerySchema = z.object({ + /** Filter by provider ID */ + providerId: z.string().optional(), + /** Filter by capability (ModelCapability string value) */ + capability: z.string().optional(), + /** Filter by enabled status */ + enabled: z.boolean().optional() +}) +export type ListModelsQuery = z.infer + +/** DTO for creating a new model */ +const CreateModelDtoSchema = z.object({ + /** Provider ID */ + providerId: z.string(), + /** Model ID (used in API calls) */ + modelId: z.string(), + /** Associated preset model ID */ + presetModelId: z.string().optional(), + /** Display name */ + name: z.string().optional(), + /** Description */ + description: z.string().optional(), + /** UI grouping */ + group: z.string().optional(), + /** Capabilities (numeric ModelCapability enum values) */ + capabilities: z.array(z.string()).optional(), + /** Input modalities (numeric Modality enum values) */ + inputModalities: z.array(z.string()).optional(), + /** Output modalities (numeric Modality enum values) */ + outputModalities: z.array(z.string()).optional(), + /** Endpoint types */ + endpointTypes: z.array(z.string()).optional(), + /** Context window size */ + contextWindow: z.number().optional(), + /** Maximum output tokens */ + maxOutputTokens: z.number().optional(), + /** Streaming support */ + supportsStreaming: z.boolean().optional(), + /** Reasoning configuration */ + reasoning: RuntimeReasoningSchema.optional(), + /** Parameter support (DB form) */ + parameterSupport: ParameterSupportDbSchema.optional(), + /** Pricing configuration */ + pricing: RuntimeModelPricingSchema.optional() +}) +export type CreateModelDto = z.infer + +/** DTO for updating an existing model — CreateModelDto minus identity fields, all optional, plus status fields */ +const UpdateModelDtoSchema = CreateModelDtoSchema.omit({ + providerId: true, + modelId: true, + presetModelId: true +}) + .partial() + .extend({ + isEnabled: z.boolean().optional(), + isHidden: z.boolean().optional(), + sortOrder: z.number().optional(), + notes: z.string().optional() + }) +export type UpdateModelDto = z.infer + +/** DTO for enriching raw model entries against registry presets */ +const EnrichModelsDtoSchema = z.object({ + /** Raw model entries from SDK */ + models: z.array( + z.object({ + modelId: z.string(), + name: z.string().optional(), + group: z.string().optional(), + description: z.string().optional(), + endpointTypes: z.array(z.string()).optional() + }) + ) +}) +export type EnrichModelsDto = z.infer + +/** + * Model API Schema definitions + */ +export interface ModelSchemas { + /** + * Models collection endpoint + * @example GET /models?providerId=openai&capability=REASONING + * @example POST /models { "providerId": "openai", "modelId": "gpt-5" } + */ + '/models': { + /** List models with optional filters */ + GET: { + query: ListModelsQuery + response: Model[] + } + /** Create a new model */ + POST: { + body: CreateModelDto + response: Model + } + } + + /** + * Individual model endpoint (keyed by providerId + modelId) + * @example GET /models/openai/gpt-5 + * @example PATCH /models/openai/gpt-5 { "isEnabled": false } + * @example DELETE /models/openai/gpt-5 + */ + '/models/:providerId/:modelId': { + /** Get a model by provider ID and model ID */ + GET: { + params: { providerId: string; modelId: string } + response: Model + } + /** Update a model */ + PATCH: { + params: { providerId: string; modelId: string } + body: UpdateModelDto + response: Model + } + /** Delete a model */ + DELETE: { + params: { providerId: string; modelId: string } + response: void + } + } +} diff --git a/packages/shared/data/api/schemas/providers.ts b/packages/shared/data/api/schemas/providers.ts new file mode 100644 index 00000000000..267e48c305a --- /dev/null +++ b/packages/shared/data/api/schemas/providers.ts @@ -0,0 +1,181 @@ +/** + * Provider API Schema definitions + * + * Contains all provider-related endpoints for CRUD operations. + * DTO types are plain TypeScript interfaces — runtime validation + * is performed by the ORM-derived Zod schema in userProvider.ts (main process). + */ + +import type { EndpointType, Model } from '../../types/model' +import type { + ApiFeatures, + ApiKeyEntry, + AuthConfig, + EndpointConfig, + Provider, + ProviderSettings +} from '../../types/provider' +import type { EnrichModelsDto } from './models' + +export interface ListProvidersQuery { + /** Filter by enabled status */ + enabled?: boolean +} + +/** Shared editable fields between Create and Update DTOs */ +interface ProviderMutableFields { + /** Display name */ + name?: string + /** Per-endpoint-type configuration (baseUrl, reasoningFormatType, modelsApiUrls) */ + endpointConfigs?: Partial> + /** Default text generation endpoint (numeric EndpointType enum value) */ + defaultChatEndpoint?: EndpointType + /** API keys */ + apiKeys?: ApiKeyEntry[] + /** Authentication configuration */ + authConfig?: AuthConfig + /** API feature support */ + apiFeatures?: ApiFeatures + /** Provider-specific settings */ + providerSettings?: Partial +} + +/** DTO for creating a new provider */ +export interface CreateProviderDto extends ProviderMutableFields { + /** User-defined unique ID (required) */ + providerId: string + /** Associated preset provider ID */ + presetProviderId?: string + /** Display name (required on create) */ + name: string +} + +/** DTO for updating an existing provider — all mutable fields optional, plus status fields */ +export interface UpdateProviderDto extends ProviderMutableFields { + /** Whether this provider is enabled */ + isEnabled?: boolean + /** Sort order in UI */ + sortOrder?: number +} + +/** + * Provider API Schema definitions + */ +export interface ProviderSchemas { + /** + * Providers collection endpoint + * @example GET /providers?enabled=true + * @example POST /providers { "providerId": "openai-main", "name": "OpenAI" } + */ + '/providers': { + /** List providers with optional filters */ + GET: { + query: ListProvidersQuery + response: Provider[] + } + /** Create a new provider */ + POST: { + body: CreateProviderDto + response: Provider + } + } + + /** + * Individual provider endpoint + * @example GET /providers/openai-main + * @example PATCH /providers/openai-main { "isEnabled": false } + * @example DELETE /providers/openai-main + */ + '/providers/:providerId': { + /** Get a provider by ID */ + GET: { + params: { providerId: string } + response: Provider + } + /** Update a provider */ + PATCH: { + params: { providerId: string } + body: UpdateProviderDto + response: Provider + } + /** Delete a provider */ + DELETE: { + params: { providerId: string } + response: void + } + } + + /** + * Get a rotated API key for a provider (round-robin across enabled keys) + * @example GET /providers/openai-main/rotated-key + */ + '/providers/:providerId/rotated-key': { + GET: { + params: { providerId: string } + response: { apiKey: string } + } + } + + /** + * Get all enabled API key values for a provider (for health check etc.) + * @example GET /providers/openai-main/api-keys + * @example POST /providers/openai-main/api-keys { "key": "sk-xxx", "label": "From URL import" } + */ + '/providers/:providerId/api-keys': { + GET: { + params: { providerId: string } + response: { keys: ApiKeyEntry[] } + } + /** Add an API key to a provider */ + POST: { + params: { providerId: string } + body: { key: string; label?: string } + response: Provider + } + } + + /** + * Registry models for a provider + * GET: Get all registry preset models (read-only, no DB writes) + * POST: Enrich raw SDK model entries against registry presets + * @example GET /providers/openai/registry-models + * @example POST /providers/openai/registry-models { "models": [{ "modelId": "gpt-4o" }] } + */ + '/providers/:providerId/registry-models': { + GET: { + params: { providerId: string } + response: Model[] + } + /** Enrich raw model entries with registry capabilities, pricing, etc. */ + POST: { + params: { providerId: string } + body: EnrichModelsDto + response: Model[] + } + } + + /** + * Get full auth config for a provider (includes sensitive credentials). + * SECURITY NOTE: Runtime Provider intentionally strips authConfig (only exposes authType). + * This endpoint is for settings pages only — never call in chat hot path. + * Acceptable in Electron (same-process IPC, no network exposure). + * @example GET /providers/vertexai/auth-config + */ + '/providers/:providerId/auth-config': { + GET: { + params: { providerId: string } + response: AuthConfig | null + } + } + + /** + * Delete a specific API key by ID + * @example DELETE /providers/openai/api-keys/abc-123 + */ + '/providers/:providerId/api-keys/:keyId': { + DELETE: { + params: { providerId: string; keyId: string } + response: Provider + } + } +} diff --git a/packages/shared/data/types/model.ts b/packages/shared/data/types/model.ts new file mode 100644 index 00000000000..86496bf6e2b --- /dev/null +++ b/packages/shared/data/types/model.ts @@ -0,0 +1,275 @@ +/** + * Model - Merged runtime model type + * + * This is the "final state" after merging from all data sources. + * Consumers don't need to know the source - they just use the merged config. + * + * Data source priority: + * 1. user_model (user customization) + * 2. provider-models.json (catalog provider-level override) + * 3. models.json (catalog base definition) + */ + +import { + Currency, + ENDPOINT_TYPE, + EndpointType, + MODALITY, + Modality, + MODEL_CAPABILITY, + ModelCapability, + objectValues, + ReasoningEffort +} from '@cherrystudio/provider-registry' +import * as z from 'zod' + +// Re-export const objects and types for consumers +export { Currency, ENDPOINT_TYPE, EndpointType, MODALITY, Modality, MODEL_CAPABILITY, ModelCapability, ReasoningEffort } + +// ═══════════════════════════════════════════════════════════════════════════════ +// Zod schemas (formerly in provider-registry/schemas, now owned by shared) +// ═══════════════════════════════════════════════════════════════════════════════ + +/** Price per token schema */ +export const PricePerTokenSchema = z.object({ + perMillionTokens: z.number().nonnegative().nullable(), + currency: z.enum(objectValues(Currency)).default(Currency.USD).optional() +}) + +/** Thinking token limits */ +export const ThinkingTokenLimitsSchema = z.object({ + min: z.number().nonnegative().optional(), + max: z.number().positive().optional(), + default: z.number().nonnegative().optional() +}) + +/** Reasoning effort levels */ +const ReasoningEffortSchema = z.enum(objectValues(ReasoningEffort)) + +/** Common reasoning fields shared across all reasoning type variants */ +const CommonReasoningFieldsSchema = { + thinkingTokenLimits: ThinkingTokenLimitsSchema.optional(), + supportedEfforts: z.array(ReasoningEffortSchema).optional(), + interleaved: z.boolean().optional() +} + +/** Parameter support (DB form) */ +const NumericRangeSchema = z.object({ + min: z.number(), + max: z.number() +}) + +export const ParameterSupportDbSchema = z.object({ + temperature: z.object({ supported: z.boolean(), range: NumericRangeSchema.optional() }).optional(), + topP: z.object({ supported: z.boolean(), range: NumericRangeSchema.optional() }).optional(), + topK: z.object({ supported: z.boolean(), range: NumericRangeSchema.optional() }).optional(), + frequencyPenalty: z.boolean().optional(), + presencePenalty: z.boolean().optional(), + maxTokens: z.boolean().optional(), + stopSequences: z.boolean().optional(), + systemMessage: z.boolean().optional() +}) + +/** Separator used in UniqueModelId */ +export const UNIQUE_MODEL_ID_SEPARATOR = '::' + +/** UniqueModelId type: "providerId::modelId" */ +export type UniqueModelId = `${string}${typeof UNIQUE_MODEL_ID_SEPARATOR}${string}` + +/** + * Create a UniqueModelId from provider and model IDs + * @throws Error if providerId contains the separator + */ +export function createUniqueModelId(providerId: string, modelId: string): UniqueModelId { + if (providerId.includes(UNIQUE_MODEL_ID_SEPARATOR)) { + throw new Error(`providerId cannot contain "${UNIQUE_MODEL_ID_SEPARATOR}": ${providerId}`) + } + return `${providerId}${UNIQUE_MODEL_ID_SEPARATOR}${modelId}` +} + +/** + * Parse a UniqueModelId into its components + * @throws Error if the format is invalid + */ +export function parseUniqueModelId(uniqueId: UniqueModelId): { + providerId: string + modelId: string +} { + const idx = uniqueId.indexOf(UNIQUE_MODEL_ID_SEPARATOR) + if (idx === -1) { + throw new Error(`Invalid UniqueModelId format: ${uniqueId}`) + } + return { + providerId: uniqueId.slice(0, idx), + modelId: uniqueId.slice(idx + UNIQUE_MODEL_ID_SEPARATOR.length) + } +} + +/** + * Check if a string is a valid UniqueModelId + */ +export function isUniqueModelId(value: string): value is UniqueModelId { + return value.includes(UNIQUE_MODEL_ID_SEPARATOR) +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// UI Tag Constants +// ═══════════════════════════════════════════════════════════════════════════════ + +/** Capabilities surfaced as filter tags in the UI */ +export const UI_CAPABILITY_TAGS = [ + MODEL_CAPABILITY.IMAGE_RECOGNITION, + MODEL_CAPABILITY.IMAGE_GENERATION, + MODEL_CAPABILITY.AUDIO_RECOGNITION, + MODEL_CAPABILITY.AUDIO_GENERATION, + MODEL_CAPABILITY.VIDEO_GENERATION, + MODEL_CAPABILITY.EMBEDDING, + MODEL_CAPABILITY.REASONING, + MODEL_CAPABILITY.FUNCTION_CALL, + MODEL_CAPABILITY.WEB_SEARCH, + MODEL_CAPABILITY.RERANK +] as const + +/** A capability that is shown as a UI tag */ +export type ModelCapabilityTag = (typeof UI_CAPABILITY_TAGS)[number] + +/** All UI-visible model tags: capability-derived + business tags */ +export type ModelTag = ModelCapabilityTag | 'free' + +/** All possible ModelTag values (for iteration) */ +export const ALL_MODEL_TAGS: readonly ModelTag[] = [...UI_CAPABILITY_TAGS, 'free'] as const + +export type ThinkingTokenLimits = z.infer + +/** DB form: supportedEfforts is optional */ +export const ReasoningConfigSchema = z.object({ + /** Reasoning type: must match a known reasoning variant */ + type: z.string().regex(/^[a-z][a-z0-9-]*$/, { + message: 'Reasoning type must be lowercase alphanumeric with hyphens' + }), + ...CommonReasoningFieldsSchema +}) +export type ReasoningConfig = z.infer + +/** Runtime form: extends DB form — supportedEfforts required, adds defaultEffort */ +export const RuntimeReasoningSchema = ReasoningConfigSchema.required({ supportedEfforts: true }).extend({ + /** Default effort level */ + defaultEffort: z.enum(objectValues(ReasoningEffort)).optional() +}) + +export type RuntimeReasoning = z.infer + +export type ParameterSupport = z.infer + +/** Runtime form: strict parameter support with more fields (not derivable from DB form — different shape) */ +export const RuntimeParameterSupportSchema = z.object({ + temperature: z + .object({ + supported: z.boolean(), + min: z.number(), + max: z.number(), + default: z.number().optional() + }) + .optional(), + topP: z + .object({ + supported: z.boolean(), + min: z.number(), + max: z.number(), + default: z.number().optional() + }) + .optional(), + topK: z + .object({ + supported: z.boolean(), + min: z.number(), + max: z.number() + }) + .optional(), + frequencyPenalty: z.boolean().optional(), + presencePenalty: z.boolean().optional(), + maxTokens: z.boolean(), + stopSequences: z.boolean(), + systemMessage: z.boolean() +}) +export type RuntimeParameterSupport = z.infer + +/** Pricing tier imported from catalog (source of truth) */ +export const PricingTierSchema = PricePerTokenSchema +export type PricingTier = z.infer + +export const RuntimeModelPricingSchema = z.object({ + input: PricePerTokenSchema, + output: PricePerTokenSchema, + cacheRead: PricePerTokenSchema.optional(), + cacheWrite: PricePerTokenSchema.optional(), + perImage: z + .object({ + price: z.number(), + unit: z.enum(['image', 'pixel']).optional() + }) + .optional(), + perMinute: z + .object({ + price: z.number() + }) + .optional() +}) +export type RuntimeModelPricing = z.infer + +export const ModelSchema = z.object({ + /** Unique identifier: "providerId::modelId" */ + id: z.string() as z.ZodType, + /** Provider ID */ + providerId: z.string(), + /** API Model ID - The actual ID used when calling the provider's API */ + apiModelId: z.string().optional(), + + // Display Information + /** Display name */ + name: z.string(), + /** Description */ + description: z.string().optional(), + /** UI grouping */ + group: z.string().optional(), + /** Model family */ + family: z.string().optional(), + /** Organization that owns the model */ + ownedBy: z.string().optional(), + + // Capabilities + /** Final capability list after all merges */ + capabilities: z.array(z.enum(objectValues(ModelCapability))), + /** Supported input modalities */ + inputModalities: z.array(z.enum(objectValues(Modality))).optional(), + /** Supported output modalities */ + outputModalities: z.array(z.enum(objectValues(Modality))).optional(), + + // Configuration + /** Context window size */ + contextWindow: z.number().optional(), + /** Maximum output tokens */ + maxOutputTokens: z.number().optional(), + /** Maximum input tokens */ + maxInputTokens: z.number().optional(), + /** Supported endpoint types */ + endpointTypes: z.array(z.enum(objectValues(EndpointType))).optional(), + /** Whether streaming is supported */ + supportsStreaming: z.boolean(), + /** Reasoning configuration */ + reasoning: RuntimeReasoningSchema.optional(), + /** Parameter support */ + parameterSupport: RuntimeParameterSupportSchema.optional(), + + pricing: RuntimeModelPricingSchema.optional(), + + // Status + /** Whether this model is available for use */ + isEnabled: z.boolean(), + /** Whether this model is hidden from lists */ + isHidden: z.boolean(), + /** Replacement model if this one is deprecated */ + replaceWith: (z.string() as z.ZodType).optional() +}) + +export type Model = z.infer diff --git a/packages/shared/data/types/provider.ts b/packages/shared/data/types/provider.ts new file mode 100644 index 00000000000..78d22044c75 --- /dev/null +++ b/packages/shared/data/types/provider.ts @@ -0,0 +1,264 @@ +/** + * Provider - Merged runtime provider type + * + * This is the "final state" after merging user config with preset. + * Consumers don't need to know the source - they just use the merged config. + * + * Data source priority: + * 1. user_provider (user configuration) + * 2. providers.json (catalog preset) + * + * Zod schemas are the single source of truth — all types derived via z.infer<> + */ + +import { EndpointType, objectValues } from '@cherrystudio/provider-registry' +import * as z from 'zod' + +// ─── Schemas formerly from provider-registry/schemas ───────────────────────── + +const EndpointTypeSchema = z.enum(objectValues(EndpointType)) + +/** API feature flags controlling request construction at the SDK level */ +const CatalogApiFeaturesSchema = z.object({ + arrayContent: z.boolean().optional(), + streamOptions: z.boolean().optional(), + developerRole: z.boolean().optional(), + serviceTier: z.boolean().optional(), + verbosity: z.boolean().optional(), + enableThinking: z.boolean().optional() +}) + +/** Provider website schema (type used for catalog ProviderWebsite type) */ +const ProviderWebsiteSchema = z.object({ + website: z.object({ + official: z.string().url().optional(), + docs: z.string().url().optional(), + apiKey: z.string().url().optional(), + models: z.string().url().optional() + }) +}) + +export type OpenAIServiceTier = 'auto' | 'default' | 'flex' | 'priority' | null | undefined +export type GroqServiceTier = 'auto' | 'on_demand' | 'flex' | undefined | null +export type ServiceTier = OpenAIServiceTier | GroqServiceTier + +export const OpenAIServiceTiers = { + auto: 'auto', + default: 'default', + flex: 'flex', + priority: 'priority' +} as const + +export const GroqServiceTiers = { + auto: 'auto', + on_demand: 'on_demand', + flex: 'flex' +} as const + +export function isOpenAIServiceTier(tier: string | null | undefined): tier is OpenAIServiceTier { + return tier === null || tier === undefined || Object.hasOwn(OpenAIServiceTiers, tier) +} + +export function isGroqServiceTier(tier: string | undefined | null): tier is GroqServiceTier { + return tier === null || tier === undefined || Object.hasOwn(GroqServiceTiers, tier) +} + +export function isServiceTier(tier: string | null | undefined): tier is ServiceTier { + return isGroqServiceTier(tier) || isOpenAIServiceTier(tier) +} + +export const ApiKeyEntrySchema = z.object({ + /** UUID for referencing this key */ + id: z.string(), + /** Actual key value (encrypted in storage) */ + key: z.string(), + /** User-friendly label */ + label: z.string().optional(), + /** Whether this key is enabled */ + isEnabled: z.boolean() +}) + +export type ApiKeyEntry = z.infer +export const RuntimeApiKeySchema = ApiKeyEntrySchema.omit({ key: true }) +export type RuntimeApiKey = z.infer + +export const AuthTypeSchema = z.enum(['api-key', 'oauth', 'iam-aws', 'iam-gcp', 'iam-azure']) +export type AuthType = z.infer + +const AuthConfigApiKey = z.object({ + type: z.literal('api-key'), + headerName: z.string().optional(), + prefix: z.string().optional(), + /** Whether the provider requires an API key (false for local providers like Ollama) */ + required: z.boolean().optional() +}) + +const AuthConfigOAuth = z.object({ + type: z.literal('oauth'), + clientId: z.string(), + refreshToken: z.string().optional(), + accessToken: z.string().optional(), + expiresAt: z.number().optional() +}) + +const AuthConfigIamAws = z.object({ + type: z.literal('iam-aws'), + region: z.string(), + accessKeyId: z.string().optional(), + secretAccessKey: z.string().optional() +}) + +const AuthConfigIamGcp = z.object({ + type: z.literal('iam-gcp'), + project: z.string(), + location: z.string(), + credentials: z.record(z.string(), z.unknown()).optional() +}) + +const AuthConfigIamAzure = z.object({ + type: z.literal('iam-azure'), + apiVersion: z.string(), + deploymentId: z.string().optional() +}) + +export const AuthConfigSchema = z.discriminatedUnion('type', [ + AuthConfigApiKey, + AuthConfigOAuth, + AuthConfigIamAws, + AuthConfigIamGcp, + AuthConfigIamAzure +]) +export type AuthConfig = z.infer + +export const ApiFeaturesSchema = CatalogApiFeaturesSchema +export type ApiFeatures = z.infer + +export const RuntimeApiFeaturesSchema = ApiFeaturesSchema.required() +export type RuntimeApiFeatures = z.infer + +export type ProviderWebsite = z.infer + +/** Flat website links schema for runtime Provider (without the catalog wrapper) */ +export const ProviderWebsitesSchema = z.object({ + official: z.string().optional(), + apiKey: z.string().optional(), + docs: z.string().optional(), + models: z.string().optional() +}) + +export type ProviderWebsites = z.infer + +export const ProviderSettingsSchema = z.object({ + // OpenAI / Groq + serviceTier: z.string().optional(), + verbosity: z.string().optional(), + + // Azure-specific + apiVersion: z.string().optional(), + + // Anthropic + cacheControl: z + .object({ + enabled: z.boolean(), + tokenThreshold: z.number().optional(), + cacheSystemMessage: z.boolean().optional(), + cacheLastNMessages: z.number().optional() + }) + .optional(), + + // Ollama / LMStudio / GPUStack + keepAliveTime: z.number().optional(), + + // Common + rateLimit: z.number().optional(), + timeout: z.number().optional(), + extraHeaders: z.record(z.string(), z.string()).optional(), + + // User notes + notes: z.string().optional(), + + // GitHub Copilot auth state (stored here because v2 Provider has no isAuthed column) + isAuthed: z.boolean().optional(), + oauthUsername: z.string().optional(), + oauthAvatar: z.string().optional() +}) + +export type ProviderSettings = z.infer + +export const REASONING_FORMAT_TYPES = [ + 'openai-chat', + 'openai-responses', + 'anthropic', + 'gemini', + 'openrouter', + 'enable-thinking', + 'thinking-type', + 'dashscope', + 'self-hosted' +] as const + +export const ReasoningFormatTypeSchema = z.enum(REASONING_FORMAT_TYPES) +export type ReasoningFormatType = z.infer + +/** URLs for fetching available models, separated by model category */ +export const ModelsApiUrlsSchema = z.object({ + default: z.string().optional(), + embedding: z.string().optional(), + reranker: z.string().optional() +}) + +export type ModelsApiUrls = z.infer + +/** Per-endpoint-type configuration */ +export const EndpointConfigSchema = z.object({ + /** Base URL for this endpoint type's API */ + baseUrl: z.string().optional(), + /** How this endpoint type expects reasoning parameters */ + reasoningFormatType: ReasoningFormatTypeSchema.optional(), + /** URLs for fetching available models via this endpoint type */ + modelsApiUrls: ModelsApiUrlsSchema.optional() +}) + +export type EndpointConfig = z.infer + +export const ProviderSchema = z.object({ + /** Provider ID */ + id: z.string(), + /** Associated preset provider ID (if any) */ + presetProviderId: z.string().optional(), + /** Display name */ + name: z.string(), + /** Description */ + description: z.string().optional(), + /** Per-endpoint-type configuration (baseUrl, reasoningFormatType, modelsApiUrls) */ + endpointConfigs: z.record(EndpointTypeSchema, EndpointConfigSchema).optional() as z.ZodOptional< + z.ZodType>> + >, + /** Default text generation endpoint type */ + defaultChatEndpoint: EndpointTypeSchema.optional(), + /** API Keys (without actual key values) */ + apiKeys: z.array(RuntimeApiKeySchema), + /** Authentication type (no sensitive data) */ + authType: AuthTypeSchema, + /** Merged API feature support */ + apiFeatures: RuntimeApiFeaturesSchema, + /** Provider settings */ + settings: ProviderSettingsSchema, + /** Website links (official, apiKey, docs, models) */ + websites: ProviderWebsitesSchema.optional(), + /** Whether this provider is enabled */ + isEnabled: z.boolean() +}) + +export type Provider = z.infer + +export const DEFAULT_API_FEATURES: RuntimeApiFeatures = { + arrayContent: true, + streamOptions: true, + developerRole: false, + serviceTier: false, + verbosity: false, + enableThinking: true +} + +export const DEFAULT_PROVIDER_SETTINGS: ProviderSettings = {} diff --git a/packages/shared/data/utils/modelMerger.ts b/packages/shared/data/utils/modelMerger.ts new file mode 100644 index 00000000000..889257c9784 --- /dev/null +++ b/packages/shared/data/utils/modelMerger.ts @@ -0,0 +1,548 @@ +/** + * Model and Provider configuration merging utilities + * + * These utilities merge configurations from different sources with + * the correct priority order. + */ + +import type { + ProtoModelConfig, + ProtoProviderConfig, + ProtoProviderModelOverride, + ProtoReasoningSupport +} from '@cherrystudio/provider-registry' +import type { Modality, ModelCapability, ReasoningEffort as ReasoningEffortType } from '@cherrystudio/provider-registry' +import { EndpointType, objectValues, ReasoningEffort } from '@cherrystudio/provider-registry' +import * as z from 'zod' + +import type { Model, RuntimeModelPricing, RuntimeReasoning } from '../types/model' +import { createUniqueModelId } from '../types/model' +import type { + EndpointConfig, + Provider, + ProviderSettings, + ReasoningFormatType, + RuntimeApiFeatures +} from '../types/provider' +import { + ApiFeaturesSchema, + ApiKeyEntrySchema, + DEFAULT_API_FEATURES, + DEFAULT_PROVIDER_SETTINGS, + EndpointConfigSchema, + ProviderSettingsSchema +} from '../types/provider' + +export type { ProtoModelConfig as CatalogModel, ProtoProviderModelOverride as CatalogProviderModelOverride } + +export { DEFAULT_API_FEATURES, DEFAULT_PROVIDER_SETTINGS } + +/** + * Apply capability override to a base capability list + * + * @param base - Base capability list + * @param override - Override operations (add/remove/force) + * @returns Merged capability list + */ +export function applyCapabilityOverride( + base: ModelCapability[], + override: { add?: ModelCapability[]; remove?: ModelCapability[]; force?: ModelCapability[] } | null | undefined +): ModelCapability[] { + if (!override) { + return [...base] + } + + // Force completely replaces the base + if (override.force && override.force.length > 0) { + return [...override.force] + } + + let result = [...base] + + // Add new capabilities + if (override.add?.length) { + result = Array.from(new Set([...result, ...override.add])) + } + + // Remove capabilities + if (override.remove?.length) { + const removeSet = new Set(override.remove) + result = result.filter((c) => !removeSet.has(c)) + } + + return result +} + +const UserProviderRowSchema = z.object({ + providerId: z.string(), + presetProviderId: z.string().nullish(), + name: z.string(), + endpointConfigs: z.record(z.string(), EndpointConfigSchema).nullish(), + defaultChatEndpoint: z.enum(objectValues(EndpointType)).nullish(), + apiKeys: z.array(ApiKeyEntrySchema.pick({ id: true, key: true, label: true, isEnabled: true })).nullish(), + authConfig: z.object({ type: z.string() }).catchall(z.unknown()).nullish(), + apiFeatures: ApiFeaturesSchema.nullish(), + providerSettings: ProviderSettingsSchema.partial().nullish(), + isEnabled: z.boolean().nullish(), + sortOrder: z.number().nullish() +}) + +type UserProviderRow = z.infer + +const UserModelRowSchema = z.object({ + providerId: z.string(), + modelId: z.string(), + presetModelId: z.string().nullable(), + name: z.string().nullish(), + description: z.string().nullish(), + group: z.string().nullish(), + capabilities: z.array(z.string()).nullish(), + inputModalities: z.array(z.string()).nullish(), + outputModalities: z.array(z.string()).nullish(), + endpointTypes: z.array(z.string()).nullish(), + customEndpointUrl: z.string().nullish(), + contextWindow: z.number().nullish(), + maxOutputTokens: z.number().nullish(), + supportsStreaming: z.boolean().nullish(), + reasoning: z.record(z.string(), z.unknown()).nullish(), + parameterSupport: z.record(z.string(), z.unknown()).nullish(), + isEnabled: z.boolean().nullish(), + isHidden: z.boolean().nullish(), + sortOrder: z.number().nullish(), + notes: z.string().nullish() +}) + +type UserModelRow = z.infer + +/** + * Merge model configurations from all sources + * + * Priority: userModel > catalogOverride > presetModel + * + * @param userModel - User model from SQLite (or null) + * @param catalogOverride - Catalog provider-model override (or null) + * @param presetModel - Preset model from catalog (or null) + * @param providerId - Provider ID for the result + * @returns Merged Model + */ +export function mergeModelConfig( + userModel: UserModelRow | null, + catalogOverride: ProtoProviderModelOverride | null, + presetModel: ProtoModelConfig | null, + providerId: string, + reasoningFormatTypes?: Partial> | null, + defaultChatEndpoint?: EndpointType +): Model { + // Case 1: Fully custom user model (no preset association) + if (userModel && !userModel.presetModelId) { + return { + id: createUniqueModelId(providerId, userModel.modelId), + providerId, + name: userModel.name ?? userModel.modelId, + description: userModel.description ?? undefined, + group: userModel.group ?? undefined, + capabilities: (userModel.capabilities ?? []) as ModelCapability[], + inputModalities: (userModel.inputModalities ?? undefined) as Modality[] | undefined, + outputModalities: (userModel.outputModalities ?? undefined) as Modality[] | undefined, + contextWindow: userModel.contextWindow ?? undefined, + maxOutputTokens: userModel.maxOutputTokens ?? undefined, + endpointTypes: (userModel.endpointTypes ?? undefined) as EndpointType[] | undefined, + supportsStreaming: userModel.supportsStreaming ?? true, + reasoning: userModel.reasoning as RuntimeReasoning | undefined, + isEnabled: userModel.isEnabled ?? true, + isHidden: userModel.isHidden ?? false + } + } + + // Case 2: Preset model (may have catalog override and user override) + if (!presetModel) { + throw new Error('Preset model not found for merge') + } + + const modelId = presetModel.id + + // Start from preset + let capabilities: ModelCapability[] = [...(presetModel.capabilities ?? [])] + let inputModalities: Modality[] | undefined = presetModel.inputModalities?.length + ? [...presetModel.inputModalities] + : undefined + let outputModalities: Modality[] | undefined = presetModel.outputModalities?.length + ? [...presetModel.outputModalities] + : undefined + let endpointTypes: EndpointType[] | undefined = undefined + let name = presetModel.name ?? presetModel.id + let description = presetModel.description + let contextWindow = presetModel.contextWindow + let maxOutputTokens = presetModel.maxOutputTokens + let maxInputTokens = presetModel.maxInputTokens + let reasoning: RuntimeReasoning | undefined + let pricing: RuntimeModelPricing | undefined + let replaceWith: string | undefined + + // Extract pricing + if (presetModel.pricing) { + pricing = { + input: { + perMillionTokens: presetModel.pricing.input?.perMillionTokens ?? null, + currency: presetModel.pricing.input?.currency + }, + output: { + perMillionTokens: presetModel.pricing.output?.perMillionTokens ?? null, + currency: presetModel.pricing.output?.currency + }, + cacheRead: presetModel.pricing.cacheRead + ? { + perMillionTokens: presetModel.pricing.cacheRead.perMillionTokens ?? null, + currency: presetModel.pricing.cacheRead.currency + } + : undefined, + cacheWrite: presetModel.pricing.cacheWrite + ? { + perMillionTokens: presetModel.pricing.cacheWrite.perMillionTokens ?? null, + currency: presetModel.pricing.cacheWrite.currency + } + : undefined + } + } + + // Apply catalog override + if (catalogOverride) { + if (catalogOverride.capabilities) { + capabilities = applyCapabilityOverride(capabilities, catalogOverride.capabilities) + } + if (catalogOverride.limits?.contextWindow != null) { + contextWindow = catalogOverride.limits.contextWindow + } + if (catalogOverride.limits?.maxOutputTokens != null) { + maxOutputTokens = catalogOverride.limits.maxOutputTokens + } + if (catalogOverride.limits?.maxInputTokens != null) { + maxInputTokens = catalogOverride.limits.maxInputTokens + } + if (catalogOverride.endpointTypes?.length) { + endpointTypes = [...catalogOverride.endpointTypes] + } + if (catalogOverride.inputModalities?.length) { + inputModalities = [...catalogOverride.inputModalities] + } + if (catalogOverride.outputModalities?.length) { + outputModalities = [...catalogOverride.outputModalities] + } + if (catalogOverride.replaceWith) { + replaceWith = catalogOverride.replaceWith + } + } + + // Apply user override + if (userModel) { + if (userModel.capabilities) { + capabilities = [...userModel.capabilities] as ModelCapability[] + } + if (userModel.endpointTypes) { + endpointTypes = [...userModel.endpointTypes] as EndpointType[] + } + if (userModel.inputModalities) { + inputModalities = [...userModel.inputModalities] as Modality[] + } + if (userModel.outputModalities) { + outputModalities = [...userModel.outputModalities] as Modality[] + } + if (userModel.name) { + name = userModel.name + } + if (userModel.description) { + description = userModel.description + } + if (userModel.contextWindow != null) { + contextWindow = userModel.contextWindow + } + if (userModel.maxOutputTokens != null) { + maxOutputTokens = userModel.maxOutputTokens + } + } + + const reasoningFormatType = resolveReasoningFormatType(endpointTypes, defaultChatEndpoint, reasoningFormatTypes) + + // Extract reasoning config from proto ReasoningSupport + provider's reasoning format type + if (presetModel.reasoning) { + reasoning = extractRuntimeReasoning(presetModel.reasoning, reasoningFormatType) + } + + if (catalogOverride?.reasoning) { + const overrideReasoning = extractRuntimeReasoning(catalogOverride.reasoning, reasoningFormatType) + reasoning = { + ...overrideReasoning, + thinkingTokenLimits: overrideReasoning.thinkingTokenLimits ?? reasoning?.thinkingTokenLimits, + interleaved: overrideReasoning.interleaved ?? reasoning?.interleaved + } + } + + if (userModel) { + if (userModel.reasoning) { + reasoning = userModel.reasoning as RuntimeReasoning + } + } + + return { + id: createUniqueModelId(providerId, modelId), + providerId, + // Use api_model_id from catalog override if available, otherwise fall back to model id + apiModelId: catalogOverride?.apiModelId, + name, + description, + group: userModel?.group ?? undefined, + family: presetModel.family, + ownedBy: presetModel.ownedBy, + capabilities, + inputModalities, + outputModalities, + contextWindow, + maxOutputTokens, + maxInputTokens, + endpointTypes, + supportsStreaming: userModel?.supportsStreaming ?? true, + reasoning, + pricing, + isEnabled: userModel?.isEnabled ?? !(catalogOverride?.disabled ?? false), + isHidden: userModel?.isHidden ?? false, + replaceWith: replaceWith ? createUniqueModelId(providerId, replaceWith) : undefined + } +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// Provider Merge Utilities +// ═══════════════════════════════════════════════════════════════════════════════ + +/** + * Merge provider configurations + * + * Priority: userProvider > presetProvider + * + * @param userProvider - User provider from SQLite (or null) + * @param presetProvider - Preset provider from catalog (or null) + * @returns Merged Provider + */ +export function mergeProviderConfig( + userProvider: UserProviderRow | null, + presetProvider: ProtoProviderConfig | null +): Provider { + if (!userProvider && !presetProvider) { + throw new Error('At least one of userProvider or presetProvider must be provided') + } + + const providerId = userProvider?.providerId ?? presetProvider!.id + + // Merge endpointConfigs — build from preset then overlay user config + const presetEndpointConfigs = buildPresetEndpointConfigs(presetProvider) + const endpointConfigs = mergeEndpointConfigs(presetEndpointConfigs, userProvider?.endpointConfigs) + + // Merge API features (catalog now uses the same field names) + const apiFeatures: RuntimeApiFeatures = { + ...DEFAULT_API_FEATURES, + ...presetProvider?.apiFeatures, + ...userProvider?.apiFeatures + } + + // Merge settings + const settings: ProviderSettings = { + ...DEFAULT_PROVIDER_SETTINGS, + ...userProvider?.providerSettings + } + + // Process API keys (strip actual key values for security) + const apiKeys = + userProvider?.apiKeys?.map((k) => ({ + id: k.id, + label: k.label, + isEnabled: k.isEnabled + })) ?? [] + + // Determine auth type + let authType: Provider['authType'] = 'api-key' + if (userProvider?.authConfig?.type) { + authType = userProvider.authConfig.type as Provider['authType'] + } + + return { + id: providerId, + presetProviderId: userProvider?.presetProviderId ?? undefined, + name: userProvider?.name ?? presetProvider?.name ?? providerId, + description: presetProvider?.description, + endpointConfigs: Object.keys(endpointConfigs).length > 0 ? endpointConfigs : undefined, + defaultChatEndpoint: userProvider?.defaultChatEndpoint ?? presetProvider?.defaultChatEndpoint, + apiKeys, + authType, + apiFeatures, + settings, + isEnabled: userProvider?.isEnabled ?? true + } +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// Helper Functions +// ═══════════════════════════════════════════════════════════════════════════════ + +const CHAT_REASONING_ENDPOINT_PRIORITY: EndpointType[] = [ + EndpointType.OPENAI_RESPONSES, + EndpointType.OPENAI_CHAT_COMPLETIONS, + EndpointType.ANTHROPIC_MESSAGES, + EndpointType.GOOGLE_GENERATE_CONTENT, + EndpointType.OLLAMA_CHAT, + EndpointType.OLLAMA_GENERATE, + EndpointType.OPENAI_TEXT_COMPLETIONS +] + +/** Default effort levels per reasoning format type (when not specified in catalog) */ +const DEFAULT_EFFORTS: Partial> = { + 'openai-chat': [ + ReasoningEffort.NONE, + ReasoningEffort.MINIMAL, + ReasoningEffort.LOW, + ReasoningEffort.MEDIUM, + ReasoningEffort.HIGH + ], + 'openai-responses': [ + ReasoningEffort.NONE, + ReasoningEffort.MINIMAL, + ReasoningEffort.LOW, + ReasoningEffort.MEDIUM, + ReasoningEffort.HIGH + ], + anthropic: [], + gemini: [ReasoningEffort.LOW, ReasoningEffort.MEDIUM, ReasoningEffort.HIGH], + 'enable-thinking': [ReasoningEffort.NONE, ReasoningEffort.LOW, ReasoningEffort.MEDIUM, ReasoningEffort.HIGH], + 'thinking-type': [ReasoningEffort.NONE, ReasoningEffort.AUTO] +} + +function isChatReasoningEndpointType(endpointType: EndpointType): boolean { + return CHAT_REASONING_ENDPOINT_PRIORITY.includes(endpointType) +} + +/** + * Build runtime endpointConfigs from preset provider's registry data. + * Maps registry reasoningFormat (discriminated union) to runtime reasoningFormatType string. + */ +function buildPresetEndpointConfigs( + presetProvider: ProtoProviderConfig | null +): Partial> { + if (!presetProvider?.endpointConfigs) return {} + + const configs: Partial> = {} + + for (const [k, regConfig] of Object.entries(presetProvider.endpointConfigs)) { + const ep = k as EndpointType + const config: EndpointConfig = {} + + if (regConfig.baseUrl) config.baseUrl = regConfig.baseUrl + if (regConfig.modelsApiUrls) config.modelsApiUrls = regConfig.modelsApiUrls + if (regConfig.reasoningFormat?.type) config.reasoningFormatType = regConfig.reasoningFormat.type + + if (Object.keys(config).length > 0) { + configs[ep] = config + } + } + + return configs +} + +/** + * Deep-merge two endpointConfigs. User config takes priority per field within each endpoint. + */ +function mergeEndpointConfigs( + preset: Partial> | null | undefined, + user: Partial> | null | undefined +): Partial> { + const result: Partial> = {} + + const allKeys = new Set([...Object.keys(preset ?? {}), ...Object.keys(user ?? {})]) + + for (const k of allKeys) { + const endpointType = k as EndpointType + const presetConfig = preset?.[endpointType] + const userConfig = user?.[endpointType] + result[endpointType] = { + ...presetConfig, + ...userConfig + } + } + + return result +} + +/** + * Extract reasoningFormatTypes map from endpointConfigs (for backward-compatible access) + */ +export function extractReasoningFormatTypes( + endpointConfigs: Partial> | null | undefined +): Partial> | undefined { + if (!endpointConfigs) return undefined + const result: Partial> = {} + for (const [k, v] of Object.entries(endpointConfigs)) { + if (v?.reasoningFormatType) { + result[k as EndpointType] = v.reasoningFormatType + } + } + return Object.keys(result).length > 0 ? result : undefined +} + +function resolveReasoningEndpointType( + endpointTypes: EndpointType[] | undefined, + defaultChatEndpoint: EndpointType | undefined +): EndpointType | undefined { + const candidates = (endpointTypes ?? []).filter(isChatReasoningEndpointType) + + if (candidates.length === 1) { + return candidates[0] + } + + if (defaultChatEndpoint !== undefined && isChatReasoningEndpointType(defaultChatEndpoint)) { + if (candidates.length === 0 || candidates.includes(defaultChatEndpoint)) { + return defaultChatEndpoint + } + } + + for (const endpointType of CHAT_REASONING_ENDPOINT_PRIORITY) { + if (candidates.includes(endpointType)) { + return endpointType + } + } + + return undefined +} + +function resolveReasoningFormatType( + endpointTypes: EndpointType[] | undefined, + defaultChatEndpoint: EndpointType | undefined, + reasoningFormatTypes: Partial> | null | undefined +): ReasoningFormatType | undefined { + const endpointType = resolveReasoningEndpointType(endpointTypes, defaultChatEndpoint) + if (endpointType === undefined || !reasoningFormatTypes) { + return undefined + } + + return reasoningFormatTypes[endpointType] +} + +/** + * Convert proto ReasoningSupport to runtime RuntimeReasoning + * The `type` comes from the provider's reasoningFormat, not from the model. + */ +function extractRuntimeReasoning( + reasoning: ProtoReasoningSupport, + reasoningFormatType: ReasoningFormatType | undefined +): RuntimeReasoning { + const type = reasoningFormatType ?? '' + + // Get supported efforts, with fallback based on provider format type + let supportedEfforts: ReasoningEffortType[] = [...(reasoning.supportedEfforts ?? [])] + if (supportedEfforts.length === 0) { + supportedEfforts = DEFAULT_EFFORTS[type] ?? [] + } + + return { + type, + supportedEfforts, + thinkingTokenLimits: reasoning.thinkingTokenLimits, + interleaved: reasoning.interleaved + } +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index bd1c4ec8728..899108e2b8b 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -110,6 +110,9 @@ importers: cron-parser: specifier: ^5.0.8 version: 5.5.0 + drizzle-zod: + specifier: ^0.8.3 + version: 0.8.3(drizzle-orm@0.44.7(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0))(zod@4.3.4) express: specifier: 5.1.0 version: 5.1.0 @@ -306,6 +309,9 @@ importers: '@cherrystudio/openai': specifier: 6.15.0 version: 6.15.0(ws@8.20.0)(zod@4.3.4) + '@cherrystudio/provider-registry': + specifier: workspace:* + version: link:packages/provider-registry '@cherrystudio/ui': specifier: workspace:* version: link:packages/ui @@ -1406,10 +1412,44 @@ importers: version: 12.1.1(eslint@9.39.2(jiti@2.6.1)) eslint-plugin-unused-imports: specifier: ^4.1.4 - version: 4.3.0(@typescript-eslint/eslint-plugin@8.51.0(@typescript-eslint/parser@8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.2))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.2))(eslint@9.39.2(jiti@2.6.1)) + version: 4.3.0(@typescript-eslint/eslint-plugin@8.51.0(@typescript-eslint/parser@8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)) tsdown: specifier: ^0.20.3 - version: 0.20.3(@typescript/native-preview@7.0.0-dev.20260204.1)(typescript@5.9.2) + version: 0.20.3(@typescript/native-preview@7.0.0-dev.20260204.1)(typescript@5.9.3) + + packages/provider-registry: + dependencies: + class-variance-authority: + specifier: ^0.7.1 + version: 0.7.1 + json-schema: + specifier: ^0.4.0 + version: 0.4.0 + lucide-react: + specifier: ^0.563.0 + version: 0.563.0(react@19.2.3) + devDependencies: + '@types/json-schema': + specifier: ^7.0.15 + version: 7.0.15 + '@types/node': + specifier: ^24.10.2 + version: 24.10.4 + dotenv: + specifier: ^17.2.3 + version: 17.4.1 + tsdown: + specifier: ^0.16.6 + version: 0.16.8(@typescript/native-preview@7.0.0-dev.20260204.1)(typescript@5.9.3) + typescript: + specifier: ^5.9.3 + version: 5.9.3 + vitest: + specifier: ^4.0.13 + version: 4.1.2(@opentelemetry/api@1.9.0)(@types/node@24.10.4)(@vitest/ui@3.2.4(vitest@3.2.4))(jsdom@26.1.0)(msw@2.12.7(@types/node@24.10.4)(typescript@5.9.3))(vite@8.0.4(@types/node@24.10.4)(esbuild@0.25.12)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2)) + zod: + specifier: ^4.1.12 + version: 4.3.6 packages/ui: dependencies: @@ -4064,6 +4104,9 @@ packages: '@oxc-project/types@0.95.0': resolution: {integrity: sha512-vACy7vhpMPhjEJhULNxrdR0D943TkA/MigMpJCHmBHvMXxRStRi/dPtTlfQ3uDwWSzRpT8z+7ImjZVf8JWBocQ==} + '@oxc-project/types@0.99.0': + resolution: {integrity: sha512-LLDEhXB7g1m5J+woRSgfKsFPS3LhR9xRhTeIoEBm5WrkwMxn6eZ0Ld0c0K5eHB57ChZX6I3uSmmLjZ8pcjlRcw==} + '@oxlint-tsgolint/darwin-arm64@0.17.4': resolution: {integrity: sha512-XEA7vl/T1+wiVnMq2MR6u5OYr2pwKHiAPgklxpK8tPrjQ1ci/amNmwI8ECn6TPXSCsC8SJsSN5xvzXm5H3dTfw==} cpu: [arm64] @@ -4891,6 +4934,12 @@ packages: cpu: [arm64] os: [android] + '@rolldown/binding-android-arm64@1.0.0-beta.52': + resolution: {integrity: sha512-MBGIgysimZPqTDcLXI+i9VveijkP5C3EAncEogXhqfax6YXj1Tr2LY3DVuEOMIjWfMPMhtQSPup4fSTAmgjqIw==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm64] + os: [android] + '@rolldown/binding-android-arm64@1.0.0-beta.53': resolution: {integrity: sha512-Ok9V8o7o6YfSdTTYA/uHH30r3YtOxLD6G3wih/U9DO0ucBBFq8WPt/DslU53OgfteLRHITZny9N/qCUxMf9kjQ==} engines: {node: ^20.19.0 || >=22.12.0} @@ -4915,6 +4964,12 @@ packages: cpu: [arm64] os: [darwin] + '@rolldown/binding-darwin-arm64@1.0.0-beta.52': + resolution: {integrity: sha512-MmKeoLnKu1d9j6r19K8B+prJnIZ7u+zQ+zGQ3YHXGnr41rzE3eqQLovlkvoZnRoxDGPA4ps0pGiwXy6YE3lJyg==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm64] + os: [darwin] + '@rolldown/binding-darwin-arm64@1.0.0-beta.53': resolution: {integrity: sha512-yIsKqMz0CtRnVa6x3Pa+mzTihr4Ty+Z6HfPbZ7RVbk1Uxnco4+CUn7Qbm/5SBol1JD/7nvY8rphAgyAi7Lj6Vg==} engines: {node: ^20.19.0 || >=22.12.0} @@ -4939,6 +4994,12 @@ packages: cpu: [x64] os: [darwin] + '@rolldown/binding-darwin-x64@1.0.0-beta.52': + resolution: {integrity: sha512-qpHedvQBmIjT8zdnjN3nWPR2qjQyJttbXniCEKKdHeAbZG9HyNPBUzQF7AZZGwmS9coQKL+hWg9FhWzh2dZ2IA==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [x64] + os: [darwin] + '@rolldown/binding-darwin-x64@1.0.0-beta.53': resolution: {integrity: sha512-GTXe+mxsCGUnJOFMhfGWmefP7Q9TpYUseHvhAhr21nCTgdS8jPsvirb0tJwM3lN0/u/cg7bpFNa16fQrjKrCjQ==} engines: {node: ^20.19.0 || >=22.12.0} @@ -4963,6 +5024,12 @@ packages: cpu: [x64] os: [freebsd] + '@rolldown/binding-freebsd-x64@1.0.0-beta.52': + resolution: {integrity: sha512-dDp7WbPapj/NVW0LSiH/CLwMhmLwwKb3R7mh2kWX+QW85X1DGVnIEyKh9PmNJjB/+suG1dJygdtdNPVXK1hylg==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [x64] + os: [freebsd] + '@rolldown/binding-freebsd-x64@1.0.0-beta.53': resolution: {integrity: sha512-9Tmp7bBvKqyDkMcL4e089pH3RsjD3SUungjmqWtyhNOxoQMh0fSmINTyYV8KXtE+JkxYMPWvnEt+/mfpVCkk8w==} engines: {node: ^20.19.0 || >=22.12.0} @@ -4987,6 +5054,12 @@ packages: cpu: [arm] os: [linux] + '@rolldown/binding-linux-arm-gnueabihf@1.0.0-beta.52': + resolution: {integrity: sha512-9e4l6vy5qNSliDPqNfR6CkBOAx6PH7iDV4OJiEJzajajGrVy8gc/IKKJUsoE52G8ud8MX6r3PMl97NfwgOzB7g==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm] + os: [linux] + '@rolldown/binding-linux-arm-gnueabihf@1.0.0-beta.53': resolution: {integrity: sha512-a1y5fiB0iovuzdbjUxa7+Zcvgv+mTmlGGC4XydVIsyl48eoxgaYkA3l9079hyTyhECsPq+mbr0gVQsFU11OJAQ==} engines: {node: ^20.19.0 || >=22.12.0} @@ -5011,6 +5084,12 @@ packages: cpu: [arm64] os: [linux] + '@rolldown/binding-linux-arm64-gnu@1.0.0-beta.52': + resolution: {integrity: sha512-V48oDR84feRU2KRuzpALp594Uqlx27+zFsT6+BgTcXOtu7dWy350J1G28ydoCwKB+oxwsRPx2e7aeQnmd3YJbQ==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm64] + os: [linux] + '@rolldown/binding-linux-arm64-gnu@1.0.0-beta.53': resolution: {integrity: sha512-bpIGX+ov9PhJYV+wHNXl9rzq4F0QvILiURn0y0oepbQx+7stmQsKA0DhPGwmhfvF856wq+gbM8L92SAa/CBcLg==} engines: {node: ^20.19.0 || >=22.12.0} @@ -5035,6 +5114,12 @@ packages: cpu: [arm64] os: [linux] + '@rolldown/binding-linux-arm64-musl@1.0.0-beta.52': + resolution: {integrity: sha512-ENLmSQCWqSA/+YN45V2FqTIemg7QspaiTjlm327eUAMeOLdqmSOVVyrQexJGNTQ5M8sDYCgVAig2Kk01Ggmqaw==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm64] + os: [linux] + '@rolldown/binding-linux-arm64-musl@1.0.0-beta.53': resolution: {integrity: sha512-bGe5EBB8FVjHBR1mOLOPEFg1Lp3//7geqWkU5NIhxe+yH0W8FVrQ6WRYOap4SUTKdklD/dC4qPLREkMMQ855FA==} engines: {node: ^20.19.0 || >=22.12.0} @@ -5071,6 +5156,12 @@ packages: cpu: [x64] os: [linux] + '@rolldown/binding-linux-x64-gnu@1.0.0-beta.52': + resolution: {integrity: sha512-klahlb2EIFltSUubn/VLjuc3qxp1E7th8ukayPfdkcKvvYcQ5rJztgx8JsJSuAKVzKtNTqUGOhy4On71BuyV8g==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [x64] + os: [linux] + '@rolldown/binding-linux-x64-gnu@1.0.0-beta.53': resolution: {integrity: sha512-qL+63WKVQs1CMvFedlPt0U9PiEKJOAL/bsHMKUDS6Vp2Q+YAv/QLPu8rcvkfIMvQ0FPU2WL0aX4eWwF6e/GAnA==} engines: {node: ^20.19.0 || >=22.12.0} @@ -5095,6 +5186,12 @@ packages: cpu: [x64] os: [linux] + '@rolldown/binding-linux-x64-musl@1.0.0-beta.52': + resolution: {integrity: sha512-UuA+JqQIgqtkgGN2c/AQ5wi8M6mJHrahz/wciENPTeI6zEIbbLGoth5XN+sQe2pJDejEVofN9aOAp0kaazwnVg==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [x64] + os: [linux] + '@rolldown/binding-linux-x64-musl@1.0.0-beta.53': resolution: {integrity: sha512-VGl9JIGjoJh3H8Mb+7xnVqODajBmrdOOb9lxWXdcmxyI+zjB2sux69br0hZJDTyLJfvBoYm439zPACYbCjGRmw==} engines: {node: ^20.19.0 || >=22.12.0} @@ -5119,6 +5216,12 @@ packages: cpu: [arm64] os: [openharmony] + '@rolldown/binding-openharmony-arm64@1.0.0-beta.52': + resolution: {integrity: sha512-1BNQW8u4ro8bsN1+tgKENJiqmvc+WfuaUhXzMImOVSMw28pkBKdfZtX2qJPADV3terx+vNJtlsgSGeb3+W6Jiw==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm64] + os: [openharmony] + '@rolldown/binding-openharmony-arm64@1.0.0-beta.53': resolution: {integrity: sha512-B4iIserJXuSnNzA5xBLFUIjTfhNy7d9sq4FUMQY3GhQWGVhS2RWWzzDnkSU6MUt7/aHUrep0CdQfXUJI9D3W7A==} engines: {node: ^20.19.0 || >=22.12.0} @@ -5142,6 +5245,11 @@ packages: engines: {node: '>=14.0.0'} cpu: [wasm32] + '@rolldown/binding-wasm32-wasi@1.0.0-beta.52': + resolution: {integrity: sha512-K/p7clhCqJOQpXGykrFaBX2Dp9AUVIDHGc+PtFGBwg7V+mvBTv/tsm3LC3aUmH02H2y3gz4y+nUTQ0MLpofEEg==} + engines: {node: '>=14.0.0'} + cpu: [wasm32] + '@rolldown/binding-wasm32-wasi@1.0.0-beta.53': resolution: {integrity: sha512-BUjAEgpABEJXilGq/BPh7jeU3WAJ5o15c1ZEgHaDWSz3LB881LQZnbNJHmUiM4d1JQWMYYyR1Y490IBHi2FPJg==} engines: {node: '>=14.0.0'} @@ -5163,6 +5271,12 @@ packages: cpu: [arm64] os: [win32] + '@rolldown/binding-win32-arm64-msvc@1.0.0-beta.52': + resolution: {integrity: sha512-a4EkXBtnYYsKipjS7QOhEBM4bU5IlR9N1hU+JcVEVeuTiaslIyhWVKsvf7K2YkQHyVAJ+7/A9BtrGqORFcTgng==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm64] + os: [win32] + '@rolldown/binding-win32-arm64-msvc@1.0.0-beta.53': resolution: {integrity: sha512-s27uU7tpCWSjHBnxyVXHt3rMrQdJq5MHNv3BzsewCIroIw3DJFjMH1dzCPPMUFxnh1r52Nf9IJ/eWp6LDoyGcw==} engines: {node: ^20.19.0 || >=22.12.0} @@ -5187,12 +5301,24 @@ packages: cpu: [ia32] os: [win32] + '@rolldown/binding-win32-ia32-msvc@1.0.0-beta.52': + resolution: {integrity: sha512-5ZXcYyd4GxPA6QfbGrNcQjmjbuLGvfz6728pZMsQvGHI+06LT06M6TPtXvFvLgXtexc+OqvFe1yAIXJU1gob/w==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [ia32] + os: [win32] + '@rolldown/binding-win32-x64-msvc@1.0.0-beta.45': resolution: {integrity: sha512-wiU40G1nQo9rtfvF9jLbl79lUgjfaD/LTyUEw2Wg/gdF5OhjzpKMVugZQngO+RNdwYaNj+Fs+kWBWfp4VXPMHA==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [x64] os: [win32] + '@rolldown/binding-win32-x64-msvc@1.0.0-beta.52': + resolution: {integrity: sha512-tzpnRQXJrSzb8Z9sm97UD3cY0toKOImx+xRKsDLX4zHaAlRXWh7jbaKBePJXEN7gNw7Nm03PBNwphdtA8KSUYQ==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [x64] + os: [win32] + '@rolldown/binding-win32-x64-msvc@1.0.0-beta.53': resolution: {integrity: sha512-cjWL/USPJ1g0en2htb4ssMjIycc36RvdQAx1WlXnS6DpULswiUTVXPDesTifSKYSyvx24E0YqQkEm0K/M2Z/AA==} engines: {node: ^20.19.0 || >=22.12.0} @@ -5217,6 +5343,9 @@ packages: '@rolldown/pluginutils@1.0.0-beta.45': resolution: {integrity: sha512-Le9ulGCrD8ggInzWw/k2J8QcbPz7eGIOWqfJ2L+1R0Opm7n6J37s2hiDWlh6LJN0Lk9L5sUzMvRHKW7UxBZsQA==} + '@rolldown/pluginutils@1.0.0-beta.52': + resolution: {integrity: sha512-/L0htLJZbaZFL1g9OHOblTxbCYIGefErJjtYOwgl9ZqNx27P3L0SDfjhhHIss32gu5NWgnxuT2a2Hnnv6QGHKA==} + '@rolldown/pluginutils@1.0.0-beta.53': resolution: {integrity: sha512-vENRlFU4YbrwVqNDZ7fLvy+JR1CRkyr01jhSiDpE1u6py3OMzQfztQU2jxykW3ALNxO4kSlqIDeYyD0Y9RcQeQ==} @@ -6974,6 +7103,9 @@ packages: '@vitest/expect@3.2.4': resolution: {integrity: sha512-Io0yyORnB6sikFlt8QW5K7slY4OjqNX9jmJQ02QDda8lyM6B5oNgVWoSoKPac8/kgnCUzuHQKrSLtu/uOqqrig==} + '@vitest/expect@4.1.2': + resolution: {integrity: sha512-gbu+7B0YgUJ2nkdsRJrFFW6X7NTP44WlhiclHniUhxADQJH5Szt9mZ9hWnJPJ8YwOK5zUOSSlSvyzRf0u1DSBQ==} + '@vitest/mocker@3.2.4': resolution: {integrity: sha512-46ryTE9RZO/rfDd7pEqFl7etuyzekzEhUbTW3BvmeO/BcCMEgq59BKhek3dXDWgAj4oMK6OZi+vRr1wPW6qjEQ==} peerDependencies: @@ -6985,18 +7117,41 @@ packages: vite: optional: true + '@vitest/mocker@4.1.2': + resolution: {integrity: sha512-Ize4iQtEALHDttPRCmN+FKqOl2vxTiNUhzobQFFt/BM1lRUTG7zRCLOykG/6Vo4E4hnUdfVLo5/eqKPukcWW7Q==} + peerDependencies: + msw: ^2.4.9 + vite: ^6.0.0 || ^7.0.0 || ^8.0.0 + peerDependenciesMeta: + msw: + optional: true + vite: + optional: true + '@vitest/pretty-format@3.2.4': resolution: {integrity: sha512-IVNZik8IVRJRTr9fxlitMKeJeXFFFN0JaB9PHPGQ8NKQbGpfjlTx9zO4RefN8gp7eqjNy8nyK3NZmBzOPeIxtA==} + '@vitest/pretty-format@4.1.2': + resolution: {integrity: sha512-dwQga8aejqeuB+TvXCMzSQemvV9hNEtDDpgUKDzOmNQayl2OG241PSWeJwKRH3CiC+sESrmoFd49rfnq7T4RnA==} + '@vitest/runner@3.2.4': resolution: {integrity: sha512-oukfKT9Mk41LreEW09vt45f8wx7DordoWUZMYdY/cyAk7w5TWkTRCNZYF7sX7n2wB7jyGAl74OxgwhPgKaqDMQ==} + '@vitest/runner@4.1.2': + resolution: {integrity: sha512-Gr+FQan34CdiYAwpGJmQG8PgkyFVmARK8/xSijia3eTFgVfpcpztWLuP6FttGNfPLJhaZVP/euvujeNYar36OQ==} + '@vitest/snapshot@3.2.4': resolution: {integrity: sha512-dEYtS7qQP2CjU27QBC5oUOxLE/v5eLkGqPE0ZKEIDGMs4vKWe7IjgLOeauHsR0D5YuuycGRO5oSRXnwnmA78fQ==} + '@vitest/snapshot@4.1.2': + resolution: {integrity: sha512-g7yfUmxYS4mNxk31qbOYsSt2F4m1E02LFqO53Xpzg3zKMhLAPZAjjfyl9e6z7HrW6LvUdTwAQR3HHfLjpko16A==} + '@vitest/spy@3.2.4': resolution: {integrity: sha512-vAfasCOe6AIK70iP5UD11Ac4siNUNJ9i/9PZ3NKx07sG6sUxeag1LWdNrMWeKKYBLlzuK+Gn65Yd5nyL6ds+nw==} + '@vitest/spy@4.1.2': + resolution: {integrity: sha512-DU4fBnbVCJGNBwVA6xSToNXrkZNSiw59H8tcuUspVMsBDBST4nfvsPsEHDHGtWRRnqBERBQu7TrTKskmjqTXKA==} + '@vitest/ui@3.2.4': resolution: {integrity: sha512-hGISOaP18plkzbWEcP/QvtRW1xDXF2+96HbEX6byqQhAUbiS5oH6/9JwW+QsQCIYON2bI6QZBF+2PvOmrRZ9wA==} peerDependencies: @@ -7005,6 +7160,9 @@ packages: '@vitest/utils@3.2.4': resolution: {integrity: sha512-fB2V0JFrQSMsCo9HiSq3Ezpdv4iYaXRG1Sx8edX3MwxfyNn83mKiGzOcH+Fkxt4MHxr3y42fQi1oeAInqgX2QA==} + '@vitest/utils@4.1.2': + resolution: {integrity: sha512-xw2/TiX82lQHA06cgbqRKFb5lCAy3axQ4H4SoUFhUsg+wztiet+co86IAMDtF6Vm1hc7J6j09oh/rgDn+JdKIQ==} + '@vitest/web-worker@3.2.4': resolution: {integrity: sha512-JXK3lMyZHDrJ/BrJmxSZxe3RYT9oy2juxN4kpdrQ8NL8iibz352lXbcrnqG4WuSoBDwhjgghgvmIpsTv9Be7eA==} peerDependencies: @@ -7587,6 +7745,10 @@ packages: resolution: {integrity: sha512-4zNhdJD/iOjSH0A05ea+Ke6MU5mmpQcbQsSOkgdaUMJ9zTlDTD/GYlwohmIE2u0gaxHYiVHEn1Fw9mZ/ktJWgw==} engines: {node: '>=18'} + chai@6.2.2: + resolution: {integrity: sha512-NUPRluOfOiTKBKvWPtSD4PhFvWCqOi0BGStNWs57X9js7XGTprSmFoz5F0tWhR4WPjNeR9jXqdC7/UpSJTnlRg==} + engines: {node: '>=18'} + chalk@1.1.3: resolution: {integrity: sha512-U3lRVLMSlsCfjqYPbLyVv11M9CPW4I728d6TCKMAOJueEeB9/8o+eSsMnxPJD+Q+K909sdESg7C+tIkoH6on1A==} engines: {node: '>=0.10.0'} @@ -7657,6 +7819,10 @@ packages: resolution: {integrity: sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA==} engines: {node: '>= 14.16.0'} + chokidar@5.0.0: + resolution: {integrity: sha512-TQMmc3w+5AxjpL8iIiwebF73dRDF4fBIieAqGn9RGCWaEVwQ6Fb2cGe31Yns0RRIzii5goJ1Y7xbMwo1TxMplw==} + engines: {node: '>= 20.19.0'} + chownr@1.1.4: resolution: {integrity: sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==} @@ -8467,6 +8633,10 @@ packages: resolution: {integrity: sha512-uBq4egWHTcTt33a72vpSG0z3HnPuIl6NqYcTrKEg2azoEyl2hpW0zqlxysq2pK9HlDIHyHyakeYaYnSAwd8bow==} engines: {node: '>=12'} + dotenv@17.4.1: + resolution: {integrity: sha512-k8DaKGP6r1G30Lx8V4+pCsLzKr8vLmV2paqEj1Y55GdAgJuIqpRp5FfajGF8KtwMxCz9qJc6wUIJnm053d/WCw==} + engines: {node: '>=12'} + dotenv@8.6.0: resolution: {integrity: sha512-IrPdXQsk2BbzvCBGBOTmmSH5SodmqZNt4ERAZDmW4CT+tL8VtvinqywuANaFu4bOMWki16nqf0e4oC0QIaDr/g==} engines: {node: '>=10'} @@ -8567,6 +8737,12 @@ packages: sqlite3: optional: true + drizzle-zod@0.8.3: + resolution: {integrity: sha512-66yVOuvGhKJnTdiqj1/Xaaz9/qzOdRJADpDa68enqS6g3t0kpNkwNYjUuaeXgZfO/UWuIM9HIhSlJ6C5ZraMww==} + peerDependencies: + drizzle-orm: '>=0.36.0' + zod: ^3.25.0 || ^4.0.0 + dts-resolver@2.1.3: resolution: {integrity: sha512-bihc7jPC90VrosXNzK0LTE2cuLP6jr0Ro8jk+kMugHReJVLIpHz/xadeq3MhuwyO4TD4OA3L1Q8pBBFRc08Tsw==} engines: {node: '>=20.19.0'} @@ -8747,6 +8923,9 @@ packages: es-module-lexer@1.7.0: resolution: {integrity: sha512-jEQoCwk8hyb2AZziIOLhDqpm5+2ww5uIE6lkO/6jcOCusfk6LhMHpXXfBLXTZ7Ydyt0j4VoUQv6uGNYbdW+kBA==} + es-module-lexer@2.0.0: + resolution: {integrity: sha512-5POEcUuZybH7IdmGsD8wlf0AI55wMecM9rVBTI/qEAy2c1kTOm3DjFYjrBdI2K3BaJjJYfYFeRtM0t9ssnRuxw==} + es-object-atoms@1.1.1: resolution: {integrity: sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==} engines: {node: '>= 0.4'} @@ -10633,6 +10812,11 @@ packages: peerDependencies: react: ^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0 + lucide-react@0.563.0: + resolution: {integrity: sha512-8dXPB2GI4dI8jV4MgUDGBeLdGk8ekfqVZ0BdLcrRzocGgG75ltNEmWS+gE7uokKF/0oSUuczNDT+g9hFJ23FkA==} + peerDependencies: + react: ^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0 + luxon@3.7.2: resolution: {integrity: sha512-vtEhXh/gNjI9Yg1u4jX/0YVPMvxzHuGgCm6tC5kZyb08yjGWGnqAjGJvcXbqQR2P3MyMEFnRbpcdFS6PBcLqew==} engines: {node: '>=12'} @@ -12399,6 +12583,10 @@ packages: resolution: {integrity: sha512-GDhwkLfywWL2s6vEjyhri+eXmfH6j1L7JE27WhqLeYzoh/A3DBaYGEj2H/HFZCn/kMfim73FXxEJTw06WtxQwg==} engines: {node: '>= 14.18.0'} + readdirp@5.0.0: + resolution: {integrity: sha512-9u/XQ1pvrQtYyMpZe7DXKv2p5CNvyVwzUB6uhLAnQwHMSgKMBR62lc7AHljaeteeHXn11XTAaLLUVZYVZyuRBQ==} + engines: {node: '>= 20.19.0'} + readline2@1.0.1: resolution: {integrity: sha512-8/td4MmwUB6PkZUbV25uKz7dfrmjYWxsW8DVfibWdlHRk/l/DfHKn4pU+dfcoGLFgWOdyGCzINRQD7jn+Bv+/g==} @@ -12648,6 +12836,25 @@ packages: vue-tsc: optional: true + rolldown-plugin-dts@0.18.4: + resolution: {integrity: sha512-7UpdiICFd/BhdjKtDPeakCFRk6pbkTGFe0Z6u01egt4c8aoO+JoPGF1Smc+JRuCH2s5j5hBdteBi0e10G0xQdQ==} + engines: {node: '>=20.19.0'} + peerDependencies: + '@ts-macro/tsc': ^0.3.6 + '@typescript/native-preview': '>=7.0.0-dev.20250601.1' + rolldown: ^1.0.0-beta.51 + typescript: ^5.0.0 + vue-tsc: ~3.1.0 + peerDependenciesMeta: + '@ts-macro/tsc': + optional: true + '@typescript/native-preview': + optional: true + typescript: + optional: true + vue-tsc: + optional: true + rolldown-plugin-dts@0.22.1: resolution: {integrity: sha512-5E0AiM5RSQhU6cjtkDFWH6laW4IrMu0j1Mo8x04Xo1ALHmaRMs9/7zej7P3RrryVHW/DdZAp85MA7Be55p0iUw==} engines: {node: '>=20.19.0'} @@ -12713,6 +12920,11 @@ packages: engines: {node: ^20.19.0 || >=22.12.0} hasBin: true + rolldown@1.0.0-beta.52: + resolution: {integrity: sha512-Hbnpljue+JhMJrlOjQ1ixp9me7sUec7OjFvS+A1Qm8k8Xyxmw3ZhxFu7LlSXW1s9AX3POE9W9o2oqCEeR5uDmg==} + engines: {node: ^20.19.0 || >=22.12.0} + hasBin: true + rolldown@1.0.0-beta.53: resolution: {integrity: sha512-Qd9c2p0XKZdgT5AYd+KgAMggJ8ZmCs3JnS9PTMWkyUfteKlfmKtxJbWTHkVakxwXs1Ub7jrRYVeFeF7N0sQxyw==} engines: {node: ^20.19.0 || >=22.12.0} @@ -13055,6 +13267,9 @@ packages: std-env@3.10.0: resolution: {integrity: sha512-5GS12FdOZNliM5mAOxFRg7Ir0pWz8MdpYm6AY6VPkGpbA7ZzmbzNcBJQ0GPvvyWgcY7QAhCgf9Uy89I03faLkg==} + std-env@4.0.0: + resolution: {integrity: sha512-zUMPtQ/HBY3/50VbpkupYHbRroTRZJPRLvreamgErJVys0ceuzMkD44J/QjqhHjOzK42GQ3QZIeFG1OYfOtKqQ==} + storybook@10.3.4: resolution: {integrity: sha512-866YXZy9k59tLPl9SN3KZZOFeBC/swxkuBVtW8iQjJIzfCrvk7zXQd8RSQ4ignmCdArVvY4lGMCAT4yNaZSt1g==} hasBin: true @@ -13390,6 +13605,10 @@ packages: resolution: {integrity: sha512-op4nsTR47R6p0vMUUoYl/a+ljLFVtlfaXkLQmqfLR1qHma1h/ysYk4hEXZ880bf2CYgTskvTa/e196Vd5dDQXw==} engines: {node: '>=14.0.0'} + tinyrainbow@3.1.0: + resolution: {integrity: sha512-Bf+ILmBgretUrdJxzXM0SgXLZ3XfiaUuOj/IKQHuTXip+05Xn+uyEYdVg0kYDipTBcLrCVyUzAPz7QmArb0mmw==} + engines: {node: '>=14.0.0'} + tinyspy@4.0.4: resolution: {integrity: sha512-azl+t0z7pw/z958Gy9svOTuzqIk6xq+NSheJzn5MMWtWTFywIacg2wUlzKFGtt3cthx0r2SxMK0yzJOR0IES7Q==} engines: {node: '>=14.0.0'} @@ -13541,6 +13760,31 @@ packages: unrun: optional: true + tsdown@0.16.8: + resolution: {integrity: sha512-6ANw9mgU9kk7SvTBKvpDu/DVJeAFECiLUSeL5M7f5Nm5H97E7ybxmXT4PQ23FySYn32y6OzjoAH/lsWCbGzfLA==} + engines: {node: '>=20.19.0'} + hasBin: true + peerDependencies: + '@arethetypeswrong/core': ^0.18.1 + '@vitejs/devtools': ^0.0.0-alpha.18 + publint: ^0.3.0 + typescript: ^5.0.0 + unplugin-lightningcss: ^0.4.0 + unplugin-unused: ^0.5.0 + peerDependenciesMeta: + '@arethetypeswrong/core': + optional: true + '@vitejs/devtools': + optional: true + publint: + optional: true + typescript: + optional: true + unplugin-lightningcss: + optional: true + unplugin-unused: + optional: true + tsdown@0.20.3: resolution: {integrity: sha512-qWOUXSbe4jN8JZEgrkc/uhJpC8VN2QpNu3eZkBWwNuTEjc/Ik1kcc54ycfcQ5QPRHeu9OQXaLfCI3o7pEJgB2w==} engines: {node: '>=20.19.0'} @@ -13642,8 +13886,8 @@ packages: engines: {node: '>=14.17'} hasBin: true - typescript@5.9.2: - resolution: {integrity: sha512-CWBzXQrc/qOkhidw1OzBTQuYRbfyxDXJMVJ1XNwUHGROVmuaeiEm3OslpZ1RV96d7SKKjZKrSJu3+t/xlw3R9A==} + typescript@5.9.3: + resolution: {integrity: sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==} engines: {node: '>=14.17'} hasBin: true @@ -14001,6 +14245,41 @@ packages: jsdom: optional: true + vitest@4.1.2: + resolution: {integrity: sha512-xjR1dMTVHlFLh98JE3i/f/WePqJsah4A0FK9cc8Ehp9Udk0AZk6ccpIZhh1qJ/yxVWRZ+Q54ocnD8TXmkhspGg==} + engines: {node: ^20.0.0 || ^22.0.0 || >=24.0.0} + hasBin: true + peerDependencies: + '@edge-runtime/vm': '*' + '@opentelemetry/api': ^1.9.0 + '@types/node': ^20.0.0 || ^22.0.0 || >=24.0.0 + '@vitest/browser-playwright': 4.1.2 + '@vitest/browser-preview': 4.1.2 + '@vitest/browser-webdriverio': 4.1.2 + '@vitest/ui': 4.1.2 + happy-dom: '*' + jsdom: '*' + vite: ^6.0.0 || ^7.0.0 || ^8.0.0 + peerDependenciesMeta: + '@edge-runtime/vm': + optional: true + '@opentelemetry/api': + optional: true + '@types/node': + optional: true + '@vitest/browser-playwright': + optional: true + '@vitest/browser-preview': + optional: true + '@vitest/browser-webdriverio': + optional: true + '@vitest/ui': + optional: true + happy-dom: + optional: true + jsdom: + optional: true + void-elements@3.1.0: resolution: {integrity: sha512-Dhxzh5HZuiHQhbvTW9AMetFfBHDMYpo23Uo9btPXgdYP+3T5S+p+jgNy7spra+veYhBP2dCSgxR/i2Y02h5/6w==} engines: {node: '>=0.10.0'} @@ -15787,6 +16066,11 @@ snapshots: ws: 8.20.0 zod: 4.3.4 + '@cherrystudio/openai@6.15.0(ws@8.20.0)(zod@4.3.6)': + optionalDependencies: + ws: 8.20.0 + zod: 4.3.6 + '@chevrotain/cst-dts-gen@11.1.2': dependencies: '@chevrotain/gast': 11.1.2 @@ -16431,7 +16715,7 @@ snapshots: '@eslint-react/eff': 1.53.1 '@typescript-eslint/utils': 8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.8.3) ts-pattern: 5.9.0 - zod: 4.3.4 + zod: 4.3.6 transitivePeerDependencies: - eslint - supports-color @@ -16443,7 +16727,7 @@ snapshots: '@eslint-react/kit': 1.53.1(eslint@9.39.2(jiti@2.6.1))(typescript@5.8.3) '@typescript-eslint/utils': 8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.8.3) ts-pattern: 5.9.0 - zod: 4.3.4 + zod: 4.3.6 transitivePeerDependencies: - eslint - supports-color @@ -16780,7 +17064,7 @@ snapshots: openapi-types: 12.1.3 uuid: 10.0.0 yaml: 2.8.2 - zod: 4.3.4 + zod: 4.3.6 optionalDependencies: cheerio: 1.1.2 langsmith: 0.4.4(@opentelemetry/api@1.9.0)(@opentelemetry/sdk-trace-base@2.2.0(@opentelemetry/api@1.9.0))(openai@6.15.0(ws@8.20.0)(zod@4.3.4)) @@ -16894,8 +17178,8 @@ snapshots: dependencies: '@langchain/core': 1.0.2(patch_hash=8dc787a82cebafe8b23c8826f25f29aca64fc8b43a0a1878e0010782e4da96ed)(@opentelemetry/api@1.9.0)(@opentelemetry/sdk-trace-base@2.2.0(@opentelemetry/api@1.9.0))(openai@6.15.0(ws@8.20.0)(zod@4.3.4)) js-tiktoken: 1.0.21 - openai: '@cherrystudio/openai@6.15.0(ws@8.20.0)(zod@4.3.4)' - zod: 4.3.4 + openai: '@cherrystudio/openai@6.15.0(ws@8.20.0)(zod@4.3.6)' + zod: 4.3.6 transitivePeerDependencies: - ws @@ -17507,6 +17791,8 @@ snapshots: '@oxc-project/types@0.95.0': {} + '@oxc-project/types@0.99.0': {} + '@oxlint-tsgolint/darwin-arm64@0.17.4': optional: true @@ -18285,6 +18571,9 @@ snapshots: '@rolldown/binding-android-arm64@1.0.0-beta.45': optional: true + '@rolldown/binding-android-arm64@1.0.0-beta.52': + optional: true + '@rolldown/binding-android-arm64@1.0.0-beta.53': optional: true @@ -18297,6 +18586,9 @@ snapshots: '@rolldown/binding-darwin-arm64@1.0.0-beta.45': optional: true + '@rolldown/binding-darwin-arm64@1.0.0-beta.52': + optional: true + '@rolldown/binding-darwin-arm64@1.0.0-beta.53': optional: true @@ -18309,6 +18601,9 @@ snapshots: '@rolldown/binding-darwin-x64@1.0.0-beta.45': optional: true + '@rolldown/binding-darwin-x64@1.0.0-beta.52': + optional: true + '@rolldown/binding-darwin-x64@1.0.0-beta.53': optional: true @@ -18321,6 +18616,9 @@ snapshots: '@rolldown/binding-freebsd-x64@1.0.0-beta.45': optional: true + '@rolldown/binding-freebsd-x64@1.0.0-beta.52': + optional: true + '@rolldown/binding-freebsd-x64@1.0.0-beta.53': optional: true @@ -18333,6 +18631,9 @@ snapshots: '@rolldown/binding-linux-arm-gnueabihf@1.0.0-beta.45': optional: true + '@rolldown/binding-linux-arm-gnueabihf@1.0.0-beta.52': + optional: true + '@rolldown/binding-linux-arm-gnueabihf@1.0.0-beta.53': optional: true @@ -18345,6 +18646,9 @@ snapshots: '@rolldown/binding-linux-arm64-gnu@1.0.0-beta.45': optional: true + '@rolldown/binding-linux-arm64-gnu@1.0.0-beta.52': + optional: true + '@rolldown/binding-linux-arm64-gnu@1.0.0-beta.53': optional: true @@ -18357,6 +18661,9 @@ snapshots: '@rolldown/binding-linux-arm64-musl@1.0.0-beta.45': optional: true + '@rolldown/binding-linux-arm64-musl@1.0.0-beta.52': + optional: true + '@rolldown/binding-linux-arm64-musl@1.0.0-beta.53': optional: true @@ -18375,6 +18682,9 @@ snapshots: '@rolldown/binding-linux-x64-gnu@1.0.0-beta.45': optional: true + '@rolldown/binding-linux-x64-gnu@1.0.0-beta.52': + optional: true + '@rolldown/binding-linux-x64-gnu@1.0.0-beta.53': optional: true @@ -18387,6 +18697,9 @@ snapshots: '@rolldown/binding-linux-x64-musl@1.0.0-beta.45': optional: true + '@rolldown/binding-linux-x64-musl@1.0.0-beta.52': + optional: true + '@rolldown/binding-linux-x64-musl@1.0.0-beta.53': optional: true @@ -18399,6 +18712,9 @@ snapshots: '@rolldown/binding-openharmony-arm64@1.0.0-beta.45': optional: true + '@rolldown/binding-openharmony-arm64@1.0.0-beta.52': + optional: true + '@rolldown/binding-openharmony-arm64@1.0.0-beta.53': optional: true @@ -18413,6 +18729,11 @@ snapshots: '@napi-rs/wasm-runtime': 1.1.1 optional: true + '@rolldown/binding-wasm32-wasi@1.0.0-beta.52': + dependencies: + '@napi-rs/wasm-runtime': 1.1.1 + optional: true + '@rolldown/binding-wasm32-wasi@1.0.0-beta.53': dependencies: '@napi-rs/wasm-runtime': 1.1.1 @@ -18431,6 +18752,9 @@ snapshots: '@rolldown/binding-win32-arm64-msvc@1.0.0-beta.45': optional: true + '@rolldown/binding-win32-arm64-msvc@1.0.0-beta.52': + optional: true + '@rolldown/binding-win32-arm64-msvc@1.0.0-beta.53': optional: true @@ -18443,9 +18767,15 @@ snapshots: '@rolldown/binding-win32-ia32-msvc@1.0.0-beta.45': optional: true + '@rolldown/binding-win32-ia32-msvc@1.0.0-beta.52': + optional: true + '@rolldown/binding-win32-x64-msvc@1.0.0-beta.45': optional: true + '@rolldown/binding-win32-x64-msvc@1.0.0-beta.52': + optional: true + '@rolldown/binding-win32-x64-msvc@1.0.0-beta.53': optional: true @@ -18459,6 +18789,8 @@ snapshots: '@rolldown/pluginutils@1.0.0-beta.45': {} + '@rolldown/pluginutils@1.0.0-beta.52': {} + '@rolldown/pluginutils@1.0.0-beta.53': {} '@rolldown/pluginutils@1.0.0-rc.12': {} @@ -20107,19 +20439,19 @@ snapshots: transitivePeerDependencies: - supports-color - '@typescript-eslint/eslint-plugin@8.51.0(@typescript-eslint/parser@8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.2))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.2)': + '@typescript-eslint/eslint-plugin@8.51.0(@typescript-eslint/parser@8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)': dependencies: '@eslint-community/regexpp': 4.12.2 - '@typescript-eslint/parser': 8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.2) + '@typescript-eslint/parser': 8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3) '@typescript-eslint/scope-manager': 8.51.0 - '@typescript-eslint/type-utils': 8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.2) - '@typescript-eslint/utils': 8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.2) + '@typescript-eslint/type-utils': 8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3) + '@typescript-eslint/utils': 8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3) '@typescript-eslint/visitor-keys': 8.51.0 eslint: 9.39.2(jiti@2.6.1) ignore: 7.0.5 natural-compare: 1.4.0 - ts-api-utils: 2.4.0(typescript@5.9.2) - typescript: 5.9.2 + ts-api-utils: 2.4.0(typescript@5.9.3) + typescript: 5.9.3 transitivePeerDependencies: - supports-color optional: true @@ -20136,15 +20468,15 @@ snapshots: transitivePeerDependencies: - supports-color - '@typescript-eslint/parser@8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.2)': + '@typescript-eslint/parser@8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)': dependencies: '@typescript-eslint/scope-manager': 8.51.0 '@typescript-eslint/types': 8.51.0 - '@typescript-eslint/typescript-estree': 8.51.0(typescript@5.9.2) + '@typescript-eslint/typescript-estree': 8.51.0(typescript@5.9.3) '@typescript-eslint/visitor-keys': 8.51.0 debug: 4.4.3 eslint: 9.39.2(jiti@2.6.1) - typescript: 5.9.2 + typescript: 5.9.3 transitivePeerDependencies: - supports-color optional: true @@ -20158,12 +20490,12 @@ snapshots: transitivePeerDependencies: - supports-color - '@typescript-eslint/project-service@8.51.0(typescript@5.9.2)': + '@typescript-eslint/project-service@8.51.0(typescript@5.9.3)': dependencies: - '@typescript-eslint/tsconfig-utils': 8.51.0(typescript@5.9.2) + '@typescript-eslint/tsconfig-utils': 8.51.0(typescript@5.9.3) '@typescript-eslint/types': 8.51.0 debug: 4.4.3 - typescript: 5.9.2 + typescript: 5.9.3 transitivePeerDependencies: - supports-color optional: true @@ -20177,9 +20509,9 @@ snapshots: dependencies: typescript: 5.8.3 - '@typescript-eslint/tsconfig-utils@8.51.0(typescript@5.9.2)': + '@typescript-eslint/tsconfig-utils@8.51.0(typescript@5.9.3)': dependencies: - typescript: 5.9.2 + typescript: 5.9.3 optional: true '@typescript-eslint/type-utils@8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.8.3)': @@ -20194,15 +20526,15 @@ snapshots: transitivePeerDependencies: - supports-color - '@typescript-eslint/type-utils@8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.2)': + '@typescript-eslint/type-utils@8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)': dependencies: '@typescript-eslint/types': 8.51.0 - '@typescript-eslint/typescript-estree': 8.51.0(typescript@5.9.2) - '@typescript-eslint/utils': 8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.2) + '@typescript-eslint/typescript-estree': 8.51.0(typescript@5.9.3) + '@typescript-eslint/utils': 8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3) debug: 4.4.3 eslint: 9.39.2(jiti@2.6.1) - ts-api-utils: 2.4.0(typescript@5.9.2) - typescript: 5.9.2 + ts-api-utils: 2.4.0(typescript@5.9.3) + typescript: 5.9.3 transitivePeerDependencies: - supports-color optional: true @@ -20224,18 +20556,18 @@ snapshots: transitivePeerDependencies: - supports-color - '@typescript-eslint/typescript-estree@8.51.0(typescript@5.9.2)': + '@typescript-eslint/typescript-estree@8.51.0(typescript@5.9.3)': dependencies: - '@typescript-eslint/project-service': 8.51.0(typescript@5.9.2) - '@typescript-eslint/tsconfig-utils': 8.51.0(typescript@5.9.2) + '@typescript-eslint/project-service': 8.51.0(typescript@5.9.3) + '@typescript-eslint/tsconfig-utils': 8.51.0(typescript@5.9.3) '@typescript-eslint/types': 8.51.0 '@typescript-eslint/visitor-keys': 8.51.0 debug: 4.4.3 minimatch: 9.0.6 semver: 7.7.1 tinyglobby: 0.2.15 - ts-api-utils: 2.4.0(typescript@5.9.2) - typescript: 5.9.2 + ts-api-utils: 2.4.0(typescript@5.9.3) + typescript: 5.9.3 transitivePeerDependencies: - supports-color optional: true @@ -20251,14 +20583,14 @@ snapshots: transitivePeerDependencies: - supports-color - '@typescript-eslint/utils@8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.2)': + '@typescript-eslint/utils@8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)': dependencies: '@eslint-community/eslint-utils': 4.9.1(eslint@9.39.2(jiti@2.6.1)) '@typescript-eslint/scope-manager': 8.51.0 '@typescript-eslint/types': 8.51.0 - '@typescript-eslint/typescript-estree': 8.51.0(typescript@5.9.2) + '@typescript-eslint/typescript-estree': 8.51.0(typescript@5.9.3) eslint: 9.39.2(jiti@2.6.1) - typescript: 5.9.2 + typescript: 5.9.3 transitivePeerDependencies: - supports-color optional: true @@ -20762,6 +21094,15 @@ snapshots: chai: 5.3.3 tinyrainbow: 2.0.0 + '@vitest/expect@4.1.2': + dependencies: + '@standard-schema/spec': 1.1.0 + '@types/chai': 5.2.3 + '@vitest/spy': 4.1.2 + '@vitest/utils': 4.1.2 + chai: 6.2.2 + tinyrainbow: 3.1.0 + '@vitest/mocker@3.2.4(msw@2.12.7(@types/node@24.10.4)(typescript@5.8.3))(rolldown-vite@7.3.0(@types/node@24.10.4)(esbuild@0.25.12)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2))': dependencies: '@vitest/spy': 3.2.4 @@ -20771,26 +21112,53 @@ snapshots: msw: 2.12.7(@types/node@24.10.4)(typescript@5.8.3) vite: rolldown-vite@7.3.0(@types/node@24.10.4)(esbuild@0.25.12)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2) + '@vitest/mocker@4.1.2(msw@2.12.7(@types/node@24.10.4)(typescript@5.9.3))(vite@8.0.4(@types/node@24.10.4)(esbuild@0.25.12)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2))': + dependencies: + '@vitest/spy': 4.1.2 + estree-walker: 3.0.3 + magic-string: 0.30.21 + optionalDependencies: + msw: 2.12.7(@types/node@24.10.4)(typescript@5.9.3) + vite: 8.0.4(@types/node@24.10.4)(esbuild@0.25.12)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2) + '@vitest/pretty-format@3.2.4': dependencies: tinyrainbow: 2.0.0 + '@vitest/pretty-format@4.1.2': + dependencies: + tinyrainbow: 3.1.0 + '@vitest/runner@3.2.4': dependencies: '@vitest/utils': 3.2.4 pathe: 2.0.3 strip-literal: 3.1.0 + '@vitest/runner@4.1.2': + dependencies: + '@vitest/utils': 4.1.2 + pathe: 2.0.3 + '@vitest/snapshot@3.2.4': dependencies: '@vitest/pretty-format': 3.2.4 magic-string: 0.30.21 pathe: 2.0.3 + '@vitest/snapshot@4.1.2': + dependencies: + '@vitest/pretty-format': 4.1.2 + '@vitest/utils': 4.1.2 + magic-string: 0.30.21 + pathe: 2.0.3 + '@vitest/spy@3.2.4': dependencies: tinyspy: 4.0.4 + '@vitest/spy@4.1.2': {} + '@vitest/ui@3.2.4(vitest@3.2.4)': dependencies: '@vitest/utils': 3.2.4 @@ -20808,6 +21176,12 @@ snapshots: loupe: 3.2.1 tinyrainbow: 2.0.0 + '@vitest/utils@4.1.2': + dependencies: + '@vitest/pretty-format': 4.1.2 + convert-source-map: 2.0.0 + tinyrainbow: 3.1.0 + '@vitest/web-worker@3.2.4(vitest@3.2.4)': dependencies: debug: 4.4.3 @@ -21583,6 +21957,8 @@ snapshots: loupe: 3.2.1 pathval: 2.0.1 + chai@6.2.2: {} + chalk@1.1.3: dependencies: ansi-styles: 2.2.1 @@ -21676,6 +22052,10 @@ snapshots: dependencies: readdirp: 4.1.2 + chokidar@5.0.0: + dependencies: + readdirp: 5.0.0 + chownr@1.1.4: {} chownr@2.0.0: {} @@ -22524,6 +22904,8 @@ snapshots: dotenv@16.6.1: {} + dotenv@17.4.1: {} + dotenv@8.6.0: {} drizzle-kit@0.31.8: @@ -22540,6 +22922,11 @@ snapshots: '@libsql/client': 0.14.0 '@opentelemetry/api': 1.9.0 + drizzle-zod@0.8.3(drizzle-orm@0.44.7(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0))(zod@4.3.4): + dependencies: + drizzle-orm: 0.44.7(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0) + zod: 4.3.4 + dts-resolver@2.1.3: {} duck@0.1.12: @@ -22770,6 +23157,8 @@ snapshots: es-module-lexer@1.7.0: {} + es-module-lexer@2.0.0: {} + es-object-atoms@1.1.1: dependencies: es-errors: 1.3.0 @@ -23000,11 +23389,11 @@ snapshots: optionalDependencies: '@typescript-eslint/eslint-plugin': 8.51.0(@typescript-eslint/parser@8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.8.3))(eslint@9.39.2(jiti@2.6.1))(typescript@5.8.3) - eslint-plugin-unused-imports@4.3.0(@typescript-eslint/eslint-plugin@8.51.0(@typescript-eslint/parser@8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.2))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.2))(eslint@9.39.2(jiti@2.6.1)): + eslint-plugin-unused-imports@4.3.0(@typescript-eslint/eslint-plugin@8.51.0(@typescript-eslint/parser@8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)): dependencies: eslint: 9.39.2(jiti@2.6.1) optionalDependencies: - '@typescript-eslint/eslint-plugin': 8.51.0(@typescript-eslint/parser@8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.2))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.2) + '@typescript-eslint/eslint-plugin': 8.51.0(@typescript-eslint/parser@8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3) eslint-scope@8.4.0: dependencies: @@ -24910,6 +25299,10 @@ snapshots: dependencies: react: 19.2.3 + lucide-react@0.563.0(react@19.2.3): + dependencies: + react: 19.2.3 + luxon@3.7.2: {} lz-string@1.5.0: {} @@ -25758,6 +26151,32 @@ snapshots: transitivePeerDependencies: - '@types/node' + msw@2.12.7(@types/node@24.10.4)(typescript@5.9.3): + dependencies: + '@inquirer/confirm': 5.1.21(@types/node@24.10.4) + '@mswjs/interceptors': 0.40.0 + '@open-draft/deferred-promise': 2.2.0 + '@types/statuses': 2.0.6 + cookie: 1.1.1 + graphql: 16.12.0 + headers-polyfill: 4.0.3 + is-node-process: 1.2.0 + outvariant: 1.4.3 + path-to-regexp: 6.3.0 + picocolors: 1.1.1 + rettime: 0.7.0 + statuses: 2.0.2 + strict-event-emitter: 0.5.1 + tough-cookie: 6.0.0 + type-fest: 5.4.2 + until-async: 3.0.2 + yargs: 17.7.2 + optionalDependencies: + typescript: 5.9.3 + transitivePeerDependencies: + - '@types/node' + optional: true + multicast-dns@7.2.5: dependencies: dns-packet: 5.6.1 @@ -27215,6 +27634,8 @@ snapshots: readdirp@4.1.2: {} + readdirp@5.0.0: {} + readline2@1.0.1: dependencies: code-point-at: 1.1.0 @@ -27538,6 +27959,24 @@ snapshots: transitivePeerDependencies: - oxc-resolver + rolldown-plugin-dts@0.18.4(@typescript/native-preview@7.0.0-dev.20260204.1)(rolldown@1.0.0-beta.52)(typescript@5.9.3): + dependencies: + '@babel/generator': 7.28.5 + '@babel/parser': 7.28.5 + '@babel/types': 7.28.5 + ast-kit: 2.2.0 + birpc: 4.0.0 + dts-resolver: 2.1.3 + get-tsconfig: 4.13.6 + magic-string: 0.30.21 + obug: 2.1.1 + rolldown: 1.0.0-beta.52 + optionalDependencies: + '@typescript/native-preview': 7.0.0-dev.20260204.1 + typescript: 5.9.3 + transitivePeerDependencies: + - oxc-resolver + rolldown-plugin-dts@0.22.1(@typescript/native-preview@7.0.0-dev.20260204.1)(rolldown@1.0.0-rc.3)(typescript@5.8.3): dependencies: '@babel/generator': 8.0.0-rc.1 @@ -27556,7 +27995,7 @@ snapshots: transitivePeerDependencies: - oxc-resolver - rolldown-plugin-dts@0.22.1(@typescript/native-preview@7.0.0-dev.20260204.1)(rolldown@1.0.0-rc.3)(typescript@5.9.2): + rolldown-plugin-dts@0.22.1(@typescript/native-preview@7.0.0-dev.20260204.1)(rolldown@1.0.0-rc.3)(typescript@5.9.3): dependencies: '@babel/generator': 8.0.0-rc.1 '@babel/helper-validator-identifier': 8.0.0-rc.1 @@ -27570,7 +28009,7 @@ snapshots: rolldown: 1.0.0-rc.3 optionalDependencies: '@typescript/native-preview': 7.0.0-dev.20260204.1 - typescript: 5.9.2 + typescript: 5.9.3 transitivePeerDependencies: - oxc-resolver @@ -27611,6 +28050,26 @@ snapshots: '@rolldown/binding-win32-ia32-msvc': 1.0.0-beta.45 '@rolldown/binding-win32-x64-msvc': 1.0.0-beta.45 + rolldown@1.0.0-beta.52: + dependencies: + '@oxc-project/types': 0.99.0 + '@rolldown/pluginutils': 1.0.0-beta.52 + optionalDependencies: + '@rolldown/binding-android-arm64': 1.0.0-beta.52 + '@rolldown/binding-darwin-arm64': 1.0.0-beta.52 + '@rolldown/binding-darwin-x64': 1.0.0-beta.52 + '@rolldown/binding-freebsd-x64': 1.0.0-beta.52 + '@rolldown/binding-linux-arm-gnueabihf': 1.0.0-beta.52 + '@rolldown/binding-linux-arm64-gnu': 1.0.0-beta.52 + '@rolldown/binding-linux-arm64-musl': 1.0.0-beta.52 + '@rolldown/binding-linux-x64-gnu': 1.0.0-beta.52 + '@rolldown/binding-linux-x64-musl': 1.0.0-beta.52 + '@rolldown/binding-openharmony-arm64': 1.0.0-beta.52 + '@rolldown/binding-wasm32-wasi': 1.0.0-beta.52 + '@rolldown/binding-win32-arm64-msvc': 1.0.0-beta.52 + '@rolldown/binding-win32-ia32-msvc': 1.0.0-beta.52 + '@rolldown/binding-win32-x64-msvc': 1.0.0-beta.52 + rolldown@1.0.0-beta.53: dependencies: '@oxc-project/types': 0.101.0 @@ -28075,6 +28534,8 @@ snapshots: std-env@3.10.0: {} + std-env@4.0.0: {} + storybook@10.3.4(@testing-library/dom@10.4.1)(prettier@3.8.1)(react-dom@19.2.3(react@19.2.3))(react@19.2.3): dependencies: '@storybook/global': 5.0.0 @@ -28481,6 +28942,8 @@ snapshots: tinyrainbow@2.0.0: {} + tinyrainbow@3.1.0: {} + tinyspy@4.0.4: {} tldts-core@6.1.86: {} @@ -28578,9 +29041,9 @@ snapshots: dependencies: typescript: 5.8.3 - ts-api-utils@2.4.0(typescript@5.9.2): + ts-api-utils@2.4.0(typescript@5.9.3): dependencies: - typescript: 5.9.2 + typescript: 5.9.3 optional: true ts-declaration-location@1.0.7(typescript@5.8.3): @@ -28629,6 +29092,32 @@ snapshots: - supports-color - vue-tsc + tsdown@0.16.8(@typescript/native-preview@7.0.0-dev.20260204.1)(typescript@5.9.3): + dependencies: + ansis: 4.2.0 + cac: 6.7.14 + chokidar: 5.0.0 + diff: 8.0.3 + empathic: 2.0.0 + hookable: 5.5.3 + obug: 2.1.1 + rolldown: 1.0.0-beta.52 + rolldown-plugin-dts: 0.18.4(@typescript/native-preview@7.0.0-dev.20260204.1)(rolldown@1.0.0-beta.52)(typescript@5.9.3) + semver: 7.7.3 + tinyexec: 1.0.2 + tinyglobby: 0.2.15 + tree-kill: 1.2.2 + unconfig-core: 7.5.0 + unrun: 0.2.27 + optionalDependencies: + typescript: 5.9.3 + transitivePeerDependencies: + - '@ts-macro/tsc' + - '@typescript/native-preview' + - oxc-resolver + - synckit + - vue-tsc + tsdown@0.20.3(@typescript/native-preview@7.0.0-dev.20260204.1)(typescript@5.8.3): dependencies: ansis: 4.2.0 @@ -28656,7 +29145,7 @@ snapshots: - synckit - vue-tsc - tsdown@0.20.3(@typescript/native-preview@7.0.0-dev.20260204.1)(typescript@5.9.2): + tsdown@0.20.3(@typescript/native-preview@7.0.0-dev.20260204.1)(typescript@5.9.3): dependencies: ansis: 4.2.0 cac: 6.7.14 @@ -28667,7 +29156,7 @@ snapshots: obug: 2.1.1 picomatch: 4.0.3 rolldown: 1.0.0-rc.3 - rolldown-plugin-dts: 0.22.1(@typescript/native-preview@7.0.0-dev.20260204.1)(rolldown@1.0.0-rc.3)(typescript@5.9.2) + rolldown-plugin-dts: 0.22.1(@typescript/native-preview@7.0.0-dev.20260204.1)(rolldown@1.0.0-rc.3)(typescript@5.9.3) semver: 7.7.3 tinyexec: 1.0.2 tinyglobby: 0.2.15 @@ -28675,7 +29164,7 @@ snapshots: unconfig-core: 7.4.2 unrun: 0.2.27 optionalDependencies: - typescript: 5.9.2 + typescript: 5.9.3 transitivePeerDependencies: - '@ts-macro/tsc' - '@typescript/native-preview' @@ -28754,8 +29243,7 @@ snapshots: typescript@5.8.3: {} - typescript@5.9.2: - optional: true + typescript@5.9.3: {} ua-parser-js@1.0.41: {} @@ -29129,6 +29617,36 @@ snapshots: - tsx - yaml + vitest@4.1.2(@opentelemetry/api@1.9.0)(@types/node@24.10.4)(@vitest/ui@3.2.4(vitest@3.2.4))(jsdom@26.1.0)(msw@2.12.7(@types/node@24.10.4)(typescript@5.9.3))(vite@8.0.4(@types/node@24.10.4)(esbuild@0.25.12)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2)): + dependencies: + '@vitest/expect': 4.1.2 + '@vitest/mocker': 4.1.2(msw@2.12.7(@types/node@24.10.4)(typescript@5.9.3))(vite@8.0.4(@types/node@24.10.4)(esbuild@0.25.12)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2)) + '@vitest/pretty-format': 4.1.2 + '@vitest/runner': 4.1.2 + '@vitest/snapshot': 4.1.2 + '@vitest/spy': 4.1.2 + '@vitest/utils': 4.1.2 + es-module-lexer: 2.0.0 + expect-type: 1.3.0 + magic-string: 0.30.21 + obug: 2.1.1 + pathe: 2.0.3 + picomatch: 4.0.4 + std-env: 4.0.0 + tinybench: 2.9.0 + tinyexec: 1.0.2 + tinyglobby: 0.2.15 + tinyrainbow: 3.1.0 + vite: 8.0.4(@types/node@24.10.4)(esbuild@0.25.12)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2) + why-is-node-running: 2.3.0 + optionalDependencies: + '@opentelemetry/api': 1.9.0 + '@types/node': 24.10.4 + '@vitest/ui': 3.2.4(vitest@3.2.4) + jsdom: 26.1.0 + transitivePeerDependencies: + - msw + void-elements@3.1.0: {} voyage-ai-provider@3.0.0(zod@4.3.4): diff --git a/src/main/core/application/serviceRegistry.ts b/src/main/core/application/serviceRegistry.ts index 9134b3e70d0..5246a97db9c 100644 --- a/src/main/core/application/serviceRegistry.ts +++ b/src/main/core/application/serviceRegistry.ts @@ -2,6 +2,7 @@ import { CacheService } from '@data/CacheService' import { DataApiService } from '@data/DataApiService' import { DbService } from '@data/db/DbService' import { PreferenceService } from '@data/PreferenceService' +import { ProviderRegistryService } from '@data/services/ProviderRegistryService' import { AgentBootstrapService } from '@main/services/AgentBootstrapService' import { AnalyticsService } from '@main/services/AnalyticsService' import { ApiServerService } from '@main/services/ApiServerService' @@ -76,6 +77,7 @@ export const services = { MCPService, OpenClawService, SearchService, + ProviderRegistryService, AgentBootstrapService, ApiServerService, AppUpdaterService diff --git a/src/main/data/api/handlers/index.ts b/src/main/data/api/handlers/index.ts index 83116a47450..454df708974 100644 --- a/src/main/data/api/handlers/index.ts +++ b/src/main/data/api/handlers/index.ts @@ -8,6 +8,8 @@ * - test.ts - Test API handlers * - topics.ts - Topic API handlers * - messages.ts - Message API handlers + * - models.ts - Model API handlers + * - providers.ts - Provider API handlers * - translate.ts - Translate API handlers */ @@ -18,6 +20,8 @@ import { knowledgeHandlers } from './knowledges' import { mcpServerHandlers } from './mcpServers' import { messageHandlers } from './messages' import { miniappHandlers } from './miniapps' +import { modelHandlers } from './models' +import { providerHandlers } from './providers' import { testHandlers } from './test' import { topicHandlers } from './topics' import { translateHandlers } from './translate' @@ -34,6 +38,8 @@ export const apiHandlers: ApiImplementation = { ...testHandlers, ...topicHandlers, ...messageHandlers, + ...modelHandlers, + ...providerHandlers, ...knowledgeHandlers, ...translateHandlers, ...mcpServerHandlers, diff --git a/src/main/data/api/handlers/models.ts b/src/main/data/api/handlers/models.ts new file mode 100644 index 00000000000..a547437d59a --- /dev/null +++ b/src/main/data/api/handlers/models.ts @@ -0,0 +1,50 @@ +/** + * Model API Handlers + * + * Implements all model-related API endpoints including: + * - Model CRUD operations + * - Listing with filters + */ + +import { modelService } from '@data/services/ModelService' +import type { ApiHandler, ApiMethods } from '@shared/data/api/apiTypes' +import type { ModelSchemas } from '@shared/data/api/schemas/models' + +/** + * Handler type for a specific model endpoint + */ +type ModelHandler> = ApiHandler + +/** + * Model API handlers implementation + */ +export const modelHandlers: { + [Path in keyof ModelSchemas]: { + [Method in keyof ModelSchemas[Path]]: ModelHandler> + } +} = { + '/models': { + GET: async ({ query }) => { + return await modelService.list(query ?? {}) + }, + + POST: async ({ body }) => { + return await modelService.create(body) + } + }, + + '/models/:providerId/:modelId': { + GET: async ({ params }) => { + return await modelService.getByKey(params.providerId, params.modelId) + }, + + PATCH: async ({ params, body }) => { + return await modelService.update(params.providerId, params.modelId, body) + }, + + DELETE: async ({ params }) => { + await modelService.delete(params.providerId, params.modelId) + return undefined + } + } +} diff --git a/src/main/data/api/handlers/providers.ts b/src/main/data/api/handlers/providers.ts new file mode 100644 index 00000000000..3fff96d5239 --- /dev/null +++ b/src/main/data/api/handlers/providers.ts @@ -0,0 +1,108 @@ +/** + * Provider API Handlers + * + * Implements all provider-related API endpoints including: + * - Provider CRUD operations + * - Listing with filters + * + * Runtime validation uses the ORM-derived Zod schema (userProviderInsertSchema) + * so the DB table definition is the single source of truth. + */ + +import { userProviderInsertSchema } from '@data/db/schemas/userProvider' +import { providerService } from '@data/services/ProviderService' +import { application } from '@main/core/application' +import type { ApiHandler, ApiMethods } from '@shared/data/api/apiTypes' +import type { CreateProviderDto, UpdateProviderDto } from '@shared/data/api/schemas/providers' +import type { ProviderSchemas } from '@shared/data/api/schemas/providers' + +/** + * Handler type for a specific provider endpoint + */ +type ProviderHandler> = ApiHandler + +/** + * Provider API handlers implementation + */ +export const providerHandlers: { + [Path in keyof ProviderSchemas]: { + [Method in keyof ProviderSchemas[Path]]: ProviderHandler> + } +} = { + '/providers': { + GET: async ({ query }) => { + return await providerService.list(query ?? {}) + }, + + POST: async ({ body }) => { + const parsed = userProviderInsertSchema.safeParse(body) + if (!parsed.success) { + throw new Error(`Invalid provider data: ${parsed.error.message}`) + } + return await providerService.create(parsed.data as CreateProviderDto) + } + }, + + '/providers/:providerId': { + GET: async ({ params }) => { + return await providerService.getByProviderId(params.providerId) + }, + + PATCH: async ({ params, body }) => { + const parsed = userProviderInsertSchema.partial().safeParse(body) + if (!parsed.success) { + throw new Error(`Invalid provider update data: ${parsed.error.message}`) + } + return await providerService.update(params.providerId, parsed.data as UpdateProviderDto) + }, + + DELETE: async ({ params }) => { + await providerService.delete(params.providerId) + return undefined + } + }, + + '/providers/:providerId/rotated-key': { + GET: async ({ params }) => { + const apiKey = await providerService.getRotatedApiKey(params.providerId) + return { apiKey } + } + }, + + '/providers/:providerId/api-keys': { + GET: async ({ params }) => { + const keys = await providerService.getEnabledApiKeys(params.providerId) + return { keys } + }, + + POST: async ({ params, body }) => { + const { key, label } = body as { key: string; label?: string } + if (!key || typeof key !== 'string') { + throw new Error('API key value is required') + } + return await providerService.addApiKey(params.providerId, key, label) + } + }, + + '/providers/:providerId/registry-models': { + GET: async ({ params }) => { + return application.get('ProviderRegistryService').getRegistryModelsByProvider(params.providerId) + }, + + POST: async ({ params, body }) => { + return await application.get('ProviderRegistryService').resolveModels(params.providerId, body.models) + } + }, + + '/providers/:providerId/auth-config': { + GET: async ({ params }) => { + return providerService.getAuthConfig(params.providerId) + } + }, + + '/providers/:providerId/api-keys/:keyId': { + DELETE: async ({ params }) => { + return providerService.deleteApiKey(params.providerId, params.keyId) + } + } +} diff --git a/src/main/data/db/schemas/userModel.ts b/src/main/data/db/schemas/userModel.ts new file mode 100644 index 00000000000..62d4a36e187 --- /dev/null +++ b/src/main/data/db/schemas/userModel.ts @@ -0,0 +1,179 @@ +/** + * User Model table schema + * + * Stores all user models with fully resolved configurations. + * Capabilities and settings are resolved once at add-time (from registry), + * so no runtime merge is needed. + * + * - presetModelId: traceability marker (which preset this came from, if any) + * - Composite primary key: (providerId, modelId) + * + * Type definitions are sourced from @shared/data/types/model + */ +import type { + EndpointType, + Modality, + ModelCapability, + ParameterSupport, + ReasoningConfig, + RuntimeModelPricing +} from '@shared/data/types/model' +import { ParameterSupportDbSchema, ReasoningConfigSchema, RuntimeModelPricingSchema } from '@shared/data/types/model' +import { index, integer, primaryKey, sqliteTable, text } from 'drizzle-orm/sqlite-core' +import { createSchemaFactory } from 'drizzle-zod' +import * as z from 'zod' + +const { createInsertSchema, createSelectSchema } = createSchemaFactory({ zodInstance: z }) + +import { createUpdateTimestamps } from './_columnHelpers' + +// ═══════════════════════════════════════════════════════════════════════════════ +// Registry Enrichable Fields +// ═══════════════════════════════════════════════════════════════════════════════ + +/** + * Fields that can be auto-populated by registry enrichment. + * Used by `userOverrides` to track which fields the user has explicitly modified, + * so that registry updates don't overwrite user customizations. + * + * The `isRegistryEnrichableField` guard ensures runtime safety. + */ +export const REGISTRY_ENRICHABLE_FIELDS = [ + 'name', + 'description', + 'capabilities', + 'inputModalities', + 'outputModalities', + 'endpointTypes', + 'contextWindow', + 'maxOutputTokens', + 'supportsStreaming', + 'reasoning', + 'parameters', + 'pricing' +] as const + +export type RegistryEnrichableField = (typeof REGISTRY_ENRICHABLE_FIELDS)[number] + +const REGISTRY_ENRICHABLE_SET: ReadonlySet = new Set(REGISTRY_ENRICHABLE_FIELDS) + +/** Check if a field name is a registry-enrichable field */ +export function isRegistryEnrichableField(field: string): field is RegistryEnrichableField { + return REGISTRY_ENRICHABLE_SET.has(field) +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// Table Definition +// ═══════════════════════════════════════════════════════════════════════════════ + +export const userModelTable = sqliteTable( + 'user_model', + { + /** User Provider ID */ + providerId: text().notNull(), + + /** Model ID (composite key part) */ + modelId: text().notNull(), + + /** Associated preset model ID (for traceability) */ + presetModelId: text(), + + /** Display name (override or complete) */ + name: text(), + + /** Description */ + description: text(), + + /** UI grouping */ + group: text(), + + /** Complete capability list (resolved at add time) */ + capabilities: text({ mode: 'json' }).$type(), + + /** Supported input modalities (e.g., TEXT, VISION, AUDIO, VIDEO) */ + inputModalities: text({ mode: 'json' }).$type(), + + /** Supported output modalities (e.g., TEXT, VISION, AUDIO, VIDEO, VECTOR) */ + outputModalities: text({ mode: 'json' }).$type(), + + /** Endpoint types (optional, override Provider default) */ + endpointTypes: text({ mode: 'json' }).$type(), + + /** Custom endpoint URL (optional, complete override) */ + customEndpointUrl: text(), + + /** Context window size */ + contextWindow: integer(), + + /** Maximum output tokens */ + maxOutputTokens: integer(), + + /** Streaming support */ + supportsStreaming: integer({ mode: 'boolean' }), + + /** Reasoning configuration */ + reasoning: text({ mode: 'json' }).$type(), + + /** Parameter support */ + parameters: text({ mode: 'json' }).$type(), + + /** Pricing configuration */ + pricing: text({ mode: 'json' }).$type(), + + /** Whether this model is enabled */ + isEnabled: integer({ mode: 'boolean' }).default(true), + + /** Whether this model is hidden from lists */ + isHidden: integer({ mode: 'boolean' }).default(false), + + /** Whether this model has been deprecated by the provider (no longer in API model list) */ + isDeprecated: integer({ mode: 'boolean' }).default(false), + + /** Sort order in UI */ + sortOrder: integer().default(0), + + /** User notes */ + notes: text(), + + /** + * List of field names the user has explicitly modified. + * Registry enrichment skips these fields to preserve user customizations. + */ + userOverrides: text({ mode: 'json' }).$type(), + + ...createUpdateTimestamps + }, + (t) => [ + primaryKey({ columns: [t.providerId, t.modelId] }), + index('user_model_preset_idx').on(t.presetModelId), + index('user_model_provider_enabled_idx').on(t.providerId, t.isEnabled), + index('user_model_provider_sort_idx').on(t.providerId, t.sortOrder) + ] +) + +// Export table type +export type UserModel = typeof userModelTable.$inferSelect +export type NewUserModel = typeof userModelTable.$inferInsert + +const jsonColumnOverrides = { + capabilities: () => z.array(z.string()).nullable() as z.ZodNullable>>, + inputModalities: () => z.array(z.string()).nullable() as z.ZodNullable>>, + outputModalities: () => z.array(z.string()).nullable() as z.ZodNullable>>, + endpointTypes: () => z.array(z.string()).nullable() as z.ZodNullable>>, + reasoning: () => ReasoningConfigSchema.nullable(), + parameters: () => ParameterSupportDbSchema.nullable(), + pricing: () => RuntimeModelPricingSchema.nullable(), + userOverrides: () => z.array(z.string()).nullable() +} + +export const userModelInsertSchema = createInsertSchema(userModelTable, jsonColumnOverrides) +export const userModelSelectSchema = createSelectSchema(userModelTable, jsonColumnOverrides) + +// ═══════════════════════════════════════════════════════════════════════════════ +// Utility Functions +// ═══════════════════════════════════════════════════════════════════════════════ + +/** Check if this is a preset override or fully custom model */ +export function isPresetOverride(model: UserModel): boolean { + return model.presetModelId != null +} diff --git a/src/main/data/db/schemas/userProvider.ts b/src/main/data/db/schemas/userProvider.ts new file mode 100644 index 00000000000..c40b7d04785 --- /dev/null +++ b/src/main/data/db/schemas/userProvider.ts @@ -0,0 +1,99 @@ +/** + * User Provider table schema + * + * Core principle: One Provider instance = One apiHost (1:1 relationship) + * One apiHost can have multiple API Keys (1:N relationship) + * + * Relationship with preset providers: + * - presetProviderId links to catalog preset provider for inherited config + * - If presetProviderId is null, this is a fully custom provider + * + */ + +import { + type ApiFeatures, + ApiFeaturesSchema, + type ApiKeyEntry, + ApiKeyEntrySchema, + type AuthConfig, + AuthConfigSchema, + type EndpointConfig, + EndpointConfigSchema, + type ProviderSettings, + ProviderSettingsSchema, + type ProviderWebsites, + ProviderWebsitesSchema +} from '@shared/data/types/provider' +import { index, integer, sqliteTable, text } from 'drizzle-orm/sqlite-core' +import { createSchemaFactory } from 'drizzle-zod' +import * as z from 'zod' + +const { createInsertSchema, createSelectSchema } = createSchemaFactory({ zodInstance: z }) + +import type { EndpointType } from '@shared/data/types/model' + +import { createUpdateTimestamps } from './_columnHelpers' + +export const userProviderTable = sqliteTable( + 'user_provider', + { + providerId: text().primaryKey(), + + /** Associated preset provider ID (optional) + * Links to catalog provider for inherited API format and defaults + * If null, this is a fully custom provider requiring manual endpoint config + */ + presetProviderId: text(), + + name: text().notNull(), + + /** Per-endpoint-type configuration (baseUrl, reasoningFormatType, modelsApiUrls) */ + endpointConfigs: text('endpoint_configs', { mode: 'json' }).$type>>(), + + /** Default text generation endpoint (when supporting multiple) */ + defaultChatEndpoint: text().$type(), + + /** API Keys array */ + apiKeys: text({ mode: 'json' }).$type().default([]), + + /** Unified auth configuration for different auth methods */ + authConfig: text({ mode: 'json' }).$type(), + + /** API feature support (null = use preset default) */ + apiFeatures: text('api_features', { mode: 'json' }).$type(), + + /** Provider-specific settings as JSON */ + providerSettings: text({ mode: 'json' }).$type(), + + /** Website links (official, apiKey, docs, models) */ + websites: text({ mode: 'json' }).$type(), + + /** Whether this provider is enabled */ + isEnabled: integer({ mode: 'boolean' }).default(true), + + /** Sort order in UI */ + sortOrder: integer().default(0), + + ...createUpdateTimestamps + }, + (t) => [ + index('user_provider_preset_idx').on(t.presetProviderId), + index('user_provider_enabled_sort_idx').on(t.isEnabled, t.sortOrder) + ] +) + +// Export table type +export type UserProvider = typeof userProviderTable.$inferSelect +export type NewUserProvider = typeof userProviderTable.$inferInsert + +const jsonColumnOverrides = { + endpointConfigs: () => z.record(z.string(), EndpointConfigSchema).nullable(), + apiKeys: () => z.array(ApiKeyEntrySchema).nullable(), + authConfig: () => AuthConfigSchema.nullable(), + apiFeatures: () => ApiFeaturesSchema.nullable(), + providerSettings: () => ProviderSettingsSchema.nullable(), + websites: () => ProviderWebsitesSchema.nullable() +} + +export const userProviderInsertSchema = createInsertSchema(userProviderTable, jsonColumnOverrides) +export const userProviderSelectSchema = createSelectSchema(userProviderTable, jsonColumnOverrides) diff --git a/src/main/data/migration/v2/core/MigrationEngine.ts b/src/main/data/migration/v2/core/MigrationEngine.ts index d46db6252c4..8dbb9ca958f 100644 --- a/src/main/data/migration/v2/core/MigrationEngine.ts +++ b/src/main/data/migration/v2/core/MigrationEngine.ts @@ -12,6 +12,8 @@ import { preferenceTable } from '@data/db/schemas/preference' import { topicTable } from '@data/db/schemas/topic' import { translateHistoryTable } from '@data/db/schemas/translateHistory' import { translateLanguageTable } from '@data/db/schemas/translateLanguage' +import { userModelTable } from '@data/db/schemas/userModel' +import { userProviderTable } from '@data/db/schemas/userProvider' import type { DbType } from '@data/db/types' import { loggerService } from '@logger' import type { @@ -266,6 +268,8 @@ export class MigrationEngine { // Tables to clear - add more as they are created // Order matters: child tables must be cleared before parent tables const tables = [ + { table: userModelTable, name: 'user_model' }, // Must clear before user_provider + { table: userProviderTable, name: 'user_provider' }, { table: messageTable, name: 'message' }, // Must clear before topic (FK reference) { table: topicTable, name: 'topic' }, { table: mcpServerTable, name: 'mcp_server' }, @@ -291,6 +295,8 @@ export class MigrationEngine { // Clear tables in dependency order (children before parents) // Messages reference topics, so delete messages first + await db.delete(userModelTable) + await db.delete(userProviderTable) await db.delete(messageTable) await db.delete(topicTable) await db.delete(mcpServerTable) diff --git a/src/main/data/migration/v2/migrators/ProviderModelMigrator.ts b/src/main/data/migration/v2/migrators/ProviderModelMigrator.ts new file mode 100644 index 00000000000..4a685d5720e --- /dev/null +++ b/src/main/data/migration/v2/migrators/ProviderModelMigrator.ts @@ -0,0 +1,192 @@ +/** + * Migrates legacy Redux llm providers/models into v2 user tables. + */ + +import { userModelTable } from '@data/db/schemas/userModel' +import { userProviderTable } from '@data/db/schemas/userProvider' +import { loggerService } from '@logger' +import type { ExecuteResult, PrepareResult, ValidateResult } from '@shared/data/migration/v2/types' +import type { Provider as LegacyProvider } from '@types' +import { sql } from 'drizzle-orm' + +import type { MigrationContext } from '../core/MigrationContext' +import { BaseMigrator } from './BaseMigrator' +import { type OldLlmSettings, transformModel, transformProvider } from './mappings/ProviderModelMappings' + +const logger = loggerService.withContext('ProviderModelMigrator') + +const BATCH_SIZE = 100 + +interface LlmState { + providers?: LegacyProvider[] + settings?: OldLlmSettings +} + +export class ProviderModelMigrator extends BaseMigrator { + readonly id = 'provider_model' + readonly name = 'Provider Model' + readonly description = 'Migrate provider and model configuration from Redux to SQLite' + readonly order = 1.75 + + private providers: LegacyProvider[] = [] + private settings: OldLlmSettings = {} + private totalModelCount = 0 + + override reset(): void { + this.providers = [] + this.settings = {} + this.totalModelCount = 0 + } + + async prepare(ctx: MigrationContext): Promise { + try { + const warnings: string[] = [] + const llmState = ctx.sources.reduxState.getCategory('llm') + + if (!llmState?.providers || !Array.isArray(llmState.providers)) { + logger.warn('No llm.providers found in Redux state') + return { + success: true, + itemCount: 0, + warnings: ['No provider data found - skipping provider/model migration'] + } + } + + this.providers = llmState.providers + this.settings = llmState.settings ?? {} + this.totalModelCount = this.providers.reduce((count, provider) => { + const uniqueModelIds = new Set((provider.models ?? []).map((model) => model.id)) + return count + uniqueModelIds.size + }, 0) + + logger.info('Preparation completed', { + providerCount: this.providers.length, + modelCount: this.totalModelCount + }) + + return { + success: true, + itemCount: this.providers.length, + warnings: warnings.length > 0 ? warnings : undefined + } + } catch (error) { + logger.error('Preparation failed', error as Error) + return { + success: false, + itemCount: 0, + warnings: [error instanceof Error ? error.message : String(error)] + } + } + } + + async execute(ctx: MigrationContext): Promise { + if (this.providers.length === 0) { + return { success: true, processedCount: 0 } + } + + let processedProviders = 0 + let processedModels = 0 + + try { + await ctx.db.transaction(async (tx) => { + for (let providerIndex = 0; providerIndex < this.providers.length; providerIndex++) { + const provider = this.providers[providerIndex] + await tx.insert(userProviderTable).values(transformProvider(provider, this.settings, providerIndex)) + processedProviders++ + + const uniqueModels = Array.from(new Map((provider.models ?? []).map((model) => [model.id, model])).values()) + + for (let modelIndex = 0; modelIndex < uniqueModels.length; modelIndex += BATCH_SIZE) { + const batch = uniqueModels + .slice(modelIndex, modelIndex + BATCH_SIZE) + .map((model, batchIndex) => transformModel(model, provider.id, modelIndex + batchIndex)) + + if (batch.length > 0) { + await tx.insert(userModelTable).values(batch) + processedModels += batch.length + } + } + + this.reportProgress( + Math.round(((providerIndex + 1) / this.providers.length) * 100), + `Migrated ${processedProviders}/${this.providers.length} providers and ${processedModels} models` + ) + } + }) + + logger.info('Execute completed', { + processedProviders, + processedModels + }) + + return { + success: true, + processedCount: processedProviders + } + } catch (error) { + logger.error('Execute failed', error as Error) + return { + success: false, + processedCount: processedProviders, + error: error instanceof Error ? error.message : String(error) + } + } + } + + async validate(ctx: MigrationContext): Promise { + try { + const errors: { key: string; message: string }[] = [] + + const providerResult = await ctx.db.select({ count: sql`count(*)` }).from(userProviderTable).get() + const modelResult = await ctx.db.select({ count: sql`count(*)` }).from(userModelTable).get() + const targetProviderCount = providerResult?.count ?? 0 + const targetModelCount = modelResult?.count ?? 0 + + if (targetProviderCount !== this.providers.length) { + errors.push({ + key: 'provider_count_mismatch', + message: `Expected ${this.providers.length} providers but found ${targetProviderCount}` + }) + } + + if (targetModelCount !== this.totalModelCount) { + errors.push({ + key: 'model_count_mismatch', + message: `Expected ${this.totalModelCount} models but found ${targetModelCount}` + }) + } + + const sampleProviders = await ctx.db.select().from(userProviderTable).limit(5).all() + for (const provider of sampleProviders) { + const sourceProvider = this.providers.find((item) => item.id === provider.providerId) + if (sourceProvider?.apiKey && (!provider.apiKeys || provider.apiKeys.length === 0)) { + errors.push({ + key: `missing_api_key_${provider.providerId}`, + message: `Provider ${provider.providerId} should include migrated API keys` + }) + } + } + + return { + success: errors.length === 0, + errors, + stats: { + sourceCount: this.providers.length, + targetCount: targetProviderCount, + skippedCount: 0 + } + } + } catch (error) { + logger.error('Validation failed', error as Error) + return { + success: false, + errors: [{ key: 'validation', message: error instanceof Error ? error.message : String(error) }], + stats: { + sourceCount: this.providers.length, + targetCount: 0, + skippedCount: 0 + } + } + } + } +} diff --git a/src/main/data/migration/v2/migrators/index.ts b/src/main/data/migration/v2/migrators/index.ts index f17947d237a..b710b435831 100644 --- a/src/main/data/migration/v2/migrators/index.ts +++ b/src/main/data/migration/v2/migrators/index.ts @@ -12,6 +12,7 @@ import { KnowledgeMigrator } from './KnowledgeMigrator' import { McpServerMigrator } from './McpServerMigrator' import { MiniAppMigrator } from './MiniAppMigrator' import { PreferencesMigrator } from './PreferencesMigrator' +import { ProviderModelMigrator } from './ProviderModelMigrator' import { TranslateMigrator } from './TranslateMigrator' // Export migrator classes @@ -23,6 +24,7 @@ export { McpServerMigrator, MiniAppMigrator, PreferencesMigrator, + ProviderModelMigrator, TranslateMigrator } @@ -35,6 +37,7 @@ export function getAllMigrators() { new PreferencesMigrator(), new MiniAppMigrator(), new McpServerMigrator(), + new ProviderModelMigrator(), new AssistantMigrator(), new KnowledgeMigrator(), new ChatMigrator(), diff --git a/src/main/data/migration/v2/migrators/mappings/ProviderModelMappings.ts b/src/main/data/migration/v2/migrators/mappings/ProviderModelMappings.ts new file mode 100644 index 00000000000..9edad56c9e0 --- /dev/null +++ b/src/main/data/migration/v2/migrators/mappings/ProviderModelMappings.ts @@ -0,0 +1,440 @@ +/** + * Provider/model migration transforms for Redux llm -> SQLite user tables. + */ + +import { + ENDPOINT_TYPE, + type EndpointType, + MODEL_CAPABILITY, + type ModelCapability, + normalizeModelId +} from '@cherrystudio/provider-registry' +import type { NewUserModel } from '@data/db/schemas/userModel' +import type { NewUserProvider } from '@data/db/schemas/userProvider' +import type { RuntimeModelPricing } from '@shared/data/types/model' +import type { + ApiFeatures, + ApiKeyEntry, + AuthConfig, + EndpointConfig, + ProviderSettings, + ReasoningFormatType +} from '@shared/data/types/provider' +import type { Model as LegacyModel, ModelType, Provider as LegacyProvider } from '@types' +import { v4 as uuidv4 } from 'uuid' + +/** Legacy llm.settings structure used by a few providers. */ +export interface OldLlmSettings { + ollama?: { keepAliveTime?: number } + lmstudio?: { keepAliveTime?: number } + gpustack?: { keepAliveTime?: number } + vertexai?: { + serviceAccount?: { + privateKey?: string + clientEmail?: string + } + projectId?: string + location?: string + } + awsBedrock?: { + authType?: string + accessKeyId?: string + secretAccessKey?: string + apiKey?: string + region?: string + } + cherryIn?: { + accessToken?: string + refreshToken?: string + } +} + +const CAPABILITY_MAP: Partial> = { + text: undefined, + vision: MODEL_CAPABILITY.IMAGE_RECOGNITION, + reasoning: MODEL_CAPABILITY.REASONING, + function_calling: MODEL_CAPABILITY.FUNCTION_CALL, + embedding: MODEL_CAPABILITY.EMBEDDING, + web_search: MODEL_CAPABILITY.WEB_SEARCH, + rerank: MODEL_CAPABILITY.RERANK +} + +/** Legacy string endpoint/provider-type keys → EndpointType */ +const ENDPOINT_MAP: Partial> = { + openai: ENDPOINT_TYPE.OPENAI_CHAT_COMPLETIONS, + 'openai-response': ENDPOINT_TYPE.OPENAI_RESPONSES, + anthropic: ENDPOINT_TYPE.ANTHROPIC_MESSAGES, + gemini: ENDPOINT_TYPE.GOOGLE_GENERATE_CONTENT, + 'image-generation': ENDPOINT_TYPE.OPENAI_IMAGE_GENERATION, + 'jina-rerank': ENDPOINT_TYPE.JINA_RERANK, + 'new-api': ENDPOINT_TYPE.OPENAI_CHAT_COMPLETIONS, + gateway: ENDPOINT_TYPE.OPENAI_CHAT_COMPLETIONS, + ollama: ENDPOINT_TYPE.OLLAMA_CHAT +} + +const REASONING_FORMAT_MAP: Partial> = { + openai: 'openai-chat', + 'openai-response': 'openai-responses', + anthropic: 'anthropic', + gemini: 'gemini', + 'new-api': 'openai-chat', + gateway: 'openai-chat', + ollama: 'openai-chat' +} + +const SYSTEM_PROVIDER_IDS = new Set([ + 'cherryin', + 'silicon', + 'aihubmix', + 'ocoolai', + 'deepseek', + 'ppio', + 'alayanew', + 'qiniu', + 'dmxapi', + 'burncloud', + 'tokenflux', + '302ai', + 'cephalon', + 'lanyun', + 'ph8', + 'openrouter', + 'ollama', + 'ovms', + 'new-api', + 'lmstudio', + 'anthropic', + 'openai', + 'azure-openai', + 'gemini', + 'vertexai', + 'github', + 'copilot', + 'zhipu', + 'yi', + 'moonshot', + 'baichuan', + 'dashscope', + 'stepfun', + 'doubao', + 'infini', + 'minimax', + 'groq', + 'together', + 'fireworks', + 'nvidia', + 'grok', + 'hyperbolic', + 'mistral', + 'jina', + 'perplexity', + 'modelscope', + 'xirang', + 'hunyuan', + 'tencent-cloud-ti', + 'baidu-cloud', + 'gpustack', + 'voyageai', + 'aws-bedrock', + 'poe', + 'aionly', + 'longcat', + 'huggingface', + 'sophnet', + 'gateway', + 'cerebras', + 'mimo', + 'gitee-ai', + 'minimax-global', + 'zai' +]) + +export function transformProvider( + legacy: LegacyProvider, + settings: OldLlmSettings, + sortOrder: number +): NewUserProvider { + const endpointType = ENDPOINT_MAP[legacy.type] + + return { + providerId: legacy.id, + presetProviderId: SYSTEM_PROVIDER_IDS.has(legacy.id) ? legacy.id : null, + name: legacy.name, + endpointConfigs: buildEndpointConfigs(legacy, endpointType), + defaultChatEndpoint: endpointType ?? null, + apiKeys: buildApiKeys(legacy.apiKey), + authConfig: buildAuthConfig(legacy, settings), + apiFeatures: buildApiFeatures(legacy), + providerSettings: buildProviderSettings(legacy, settings), + isEnabled: legacy.enabled ?? true, + sortOrder + } +} + +function buildEndpointConfigs( + legacy: LegacyProvider, + endpointType: EndpointType | undefined +): NewUserProvider['endpointConfigs'] { + const configs: Partial> = {} + + if (legacy.apiHost && endpointType !== undefined) { + configs[endpointType] = { ...configs[endpointType], baseUrl: legacy.apiHost } + } + + if (legacy.anthropicApiHost) { + const ep = ENDPOINT_TYPE.ANTHROPIC_MESSAGES + configs[ep] = { ...configs[ep], baseUrl: legacy.anthropicApiHost } + } + + // Assign reasoning format type to the default endpoint + const reasoningFormatType = REASONING_FORMAT_MAP[legacy.type] + if (endpointType !== undefined && reasoningFormatType) { + configs[endpointType] = { ...configs[endpointType], reasoningFormatType } + } + + return Object.keys(configs).length > 0 ? configs : null +} + +function buildApiKeys(apiKey: string): ApiKeyEntry[] { + if (!apiKey) { + return [] + } + + return apiKey + .split(',') + .map((key) => key.trim()) + .filter(Boolean) + .map((key) => ({ + id: uuidv4(), + key, + isEnabled: true + })) +} + +function buildAuthConfig(legacy: LegacyProvider, settings: OldLlmSettings): AuthConfig | null { + if (legacy.isVertex && settings.vertexai) { + const vertex = settings.vertexai + return { + type: 'iam-gcp', + project: vertex.projectId ?? '', + location: vertex.location ?? '', + credentials: vertex.serviceAccount + ? { + privateKey: vertex.serviceAccount.privateKey, + clientEmail: vertex.serviceAccount.clientEmail + } + : undefined + } + } + + if (legacy.id === 'aws-bedrock' && settings.awsBedrock) { + const aws = settings.awsBedrock + return { + type: 'iam-aws', + region: aws.region ?? '', + accessKeyId: aws.accessKeyId, + secretAccessKey: aws.secretAccessKey + } + } + + if (legacy.id === 'azure-openai' && legacy.apiVersion) { + return { + type: 'iam-azure', + apiVersion: legacy.apiVersion + } + } + + if ( + legacy.id === 'cherryin' && + settings.cherryIn && + (settings.cherryIn.accessToken || settings.cherryIn.refreshToken) + ) { + return { + type: 'oauth', + clientId: '', + accessToken: settings.cherryIn.accessToken, + refreshToken: settings.cherryIn.refreshToken + } + } + + if (legacy.authType === 'oauth') { + return { + type: 'oauth', + clientId: '' + } + } + + return { + type: 'api-key' + } +} + +function buildApiFeatures(legacy: LegacyProvider): ApiFeatures | null { + const apiOptions = legacy.apiOptions + const features: ApiFeatures = {} + let hasValue = false + + const notArrayContent = apiOptions?.isNotSupportArrayContent ?? legacy.isNotSupportArrayContent + if (notArrayContent != null) { + features.arrayContent = !notArrayContent + hasValue = true + } + + const notStreamOptions = apiOptions?.isNotSupportStreamOptions ?? legacy.isNotSupportStreamOptions + if (notStreamOptions != null) { + features.streamOptions = !notStreamOptions + hasValue = true + } + + const supportsDeveloperRole = + apiOptions?.isSupportDeveloperRole ?? + (legacy.isNotSupportDeveloperRole != null ? !legacy.isNotSupportDeveloperRole : undefined) + if (supportsDeveloperRole != null) { + features.developerRole = supportsDeveloperRole + hasValue = true + } + + const supportsServiceTier = + apiOptions?.isSupportServiceTier ?? + (legacy.isNotSupportServiceTier != null ? !legacy.isNotSupportServiceTier : undefined) + if (supportsServiceTier != null) { + features.serviceTier = supportsServiceTier + hasValue = true + } + + if (apiOptions?.isNotSupportEnableThinking != null) { + features.enableThinking = !apiOptions.isNotSupportEnableThinking + hasValue = true + } + + if (apiOptions?.isNotSupportVerbosity != null) { + features.verbosity = !apiOptions.isNotSupportVerbosity + hasValue = true + } + + return hasValue ? features : null +} + +function buildProviderSettings(legacy: LegacyProvider, llmSettings: OldLlmSettings): ProviderSettings | null { + const settings: ProviderSettings = {} + let hasValue = false + + const keepAliveSettingsKey: Partial> = { + ollama: 'ollama', + lmstudio: 'lmstudio', + gpustack: 'gpustack' + } + + const keepAliveSource = keepAliveSettingsKey[legacy.id] + if (keepAliveSource) { + const keepAliveSettings = llmSettings[keepAliveSource] as { keepAliveTime?: number } | undefined + if (keepAliveSettings?.keepAliveTime != null) { + settings.keepAliveTime = keepAliveSettings.keepAliveTime + hasValue = true + } + } + + if (legacy.serviceTier) { + settings.serviceTier = legacy.serviceTier + hasValue = true + } + + if (legacy.verbosity) { + settings.verbosity = legacy.verbosity + hasValue = true + } + + if (legacy.rateLimit != null) { + settings.rateLimit = legacy.rateLimit + hasValue = true + } + + if (legacy.extra_headers && Object.keys(legacy.extra_headers).length > 0) { + settings.extraHeaders = legacy.extra_headers + hasValue = true + } + + if (legacy.notes) { + settings.notes = legacy.notes + hasValue = true + } + + if (legacy.anthropicCacheControl) { + settings.cacheControl = { + enabled: true, + tokenThreshold: legacy.anthropicCacheControl.tokenThreshold, + cacheSystemMessage: legacy.anthropicCacheControl.cacheSystemMessage, + cacheLastNMessages: legacy.anthropicCacheControl.cacheLastNMessages + } + hasValue = true + } + + return hasValue ? settings : null +} + +export function transformModel(legacy: LegacyModel, providerId: string, sortOrder: number): NewUserModel { + const hasCustomizedCapabilities = + legacy.capabilities?.some((capability) => capability.isUserSelected !== undefined) ?? false + + return { + providerId, + modelId: legacy.id, + presetModelId: normalizeModelId(legacy.id), + name: legacy.name ?? null, + description: legacy.description ?? null, + group: legacy.group ?? null, + capabilities: mapCapabilities(legacy.capabilities), + inputModalities: null, + outputModalities: null, + endpointTypes: mapEndpointTypes(legacy.endpoint_type, legacy.supported_endpoint_types), + contextWindow: null, + maxOutputTokens: null, + supportsStreaming: legacy.supported_text_delta ?? null, + reasoning: null, + parameters: null, + pricing: mapPricing(legacy.pricing), + isEnabled: true, + isHidden: false, + sortOrder, + userOverrides: hasCustomizedCapabilities ? ['capabilities'] : null + } +} + +function mapCapabilities(capabilities?: LegacyModel['capabilities']): ModelCapability[] | null { + if (!capabilities || capabilities.length === 0) { + return null + } + + const mapped = capabilities + .map((capability) => CAPABILITY_MAP[capability.type]) + .filter((capability): capability is ModelCapability => capability !== undefined) + + return mapped.length > 0 ? Array.from(new Set(mapped)) : null +} + +function mapEndpointTypes( + endpointType?: LegacyModel['endpoint_type'], + supportedEndpointTypes?: LegacyModel['supported_endpoint_types'] +): EndpointType[] | null { + const sourceTypes = supportedEndpointTypes ?? (endpointType ? [endpointType] : []) + if (sourceTypes.length === 0) { + return null + } + + const mapped = sourceTypes + .map((type) => (type ? ENDPOINT_MAP[type] : undefined)) + .filter((type): type is EndpointType => type !== undefined) + + return mapped.length > 0 ? Array.from(new Set(mapped)) : null +} + +function mapPricing(pricing?: LegacyModel['pricing']): RuntimeModelPricing | null { + if (!pricing) { + return null + } + + return { + input: { perMillionTokens: pricing.input_per_million_tokens }, + output: { perMillionTokens: pricing.output_per_million_tokens } + } +} diff --git a/src/main/data/services/ModelService.ts b/src/main/data/services/ModelService.ts new file mode 100644 index 00000000000..dc6e7bf1998 --- /dev/null +++ b/src/main/data/services/ModelService.ts @@ -0,0 +1,362 @@ +/** + * Model Service - handles model CRUD operations + * + * Provides business logic for: + * - Model CRUD operations + * - Row to Model conversion + * - Registry import support + */ + +import type { NewUserModel, UserModel } from '@data/db/schemas/userModel' +import { isRegistryEnrichableField, userModelTable } from '@data/db/schemas/userModel' +import { loggerService } from '@logger' +import { application } from '@main/core/application' +import { DataApiErrorFactory } from '@shared/data/api' +import type { CreateModelDto, ListModelsQuery, UpdateModelDto } from '@shared/data/api/schemas/models' +import type { + EndpointType, + Modality, + Model, + ModelCapability, + RuntimeParameterSupport, + RuntimeReasoning +} from '@shared/data/types/model' +import { createUniqueModelId } from '@shared/data/types/model' +import { mergeModelConfig } from '@shared/data/utils/modelMerger' +import { and, eq, inArray, type SQL } from 'drizzle-orm' + +const logger = loggerService.withContext('DataApi:ModelService') + +/** + * Convert database row to Model entity + * + * Since user_model stores fully resolved data (merged at add-time), + * this is a direct field mapping with no runtime merge needed. + */ +function rowToRuntimeModel(row: UserModel): Model { + return { + id: createUniqueModelId(row.providerId, row.modelId), + providerId: row.providerId, + apiModelId: row.modelId, + name: row.name ?? row.modelId, + description: row.description ?? undefined, + group: row.group ?? undefined, + capabilities: row.capabilities ?? [], + inputModalities: row.inputModalities ?? undefined, + outputModalities: row.outputModalities ?? undefined, + contextWindow: row.contextWindow ?? undefined, + maxOutputTokens: row.maxOutputTokens ?? undefined, + endpointTypes: row.endpointTypes ?? undefined, + supportsStreaming: row.supportsStreaming ?? true, + reasoning: (row.reasoning ?? undefined) as RuntimeReasoning | undefined, + parameterSupport: (row.parameters ?? undefined) as RuntimeParameterSupport | undefined, + pricing: row.pricing ?? undefined, + isEnabled: row.isEnabled ?? true, + isHidden: row.isHidden ?? false + } +} + +export class ModelService { + private static instance: ModelService + + private constructor() {} + + public static getInstance(): ModelService { + if (!ModelService.instance) { + ModelService.instance = new ModelService() + } + return ModelService.instance + } + + /** + * List models with optional filters + */ + async list(query: ListModelsQuery): Promise { + const db = application.get('DbService').getDb() + + const conditions: SQL[] = [] + + if (query.providerId) { + conditions.push(eq(userModelTable.providerId, query.providerId)) + } + + if (query.enabled !== undefined) { + conditions.push(eq(userModelTable.isEnabled, query.enabled)) + } + + const rows = await db + .select() + .from(userModelTable) + .where(conditions.length > 0 ? and(...conditions) : undefined) + .orderBy(userModelTable.sortOrder) + + let models = rows.map(rowToRuntimeModel) + + // Post-filter by capability (JSON array column, can't filter in SQL easily) + if (query.capability !== undefined) { + const cap = query.capability as ModelCapability + models = models.filter((m) => m.capabilities.includes(cap)) + } + + return models + } + + /** + * Get a model by composite key (providerId + modelId) + */ + async getByKey(providerId: string, modelId: string): Promise { + const db = application.get('DbService').getDb() + + const [row] = await db + .select() + .from(userModelTable) + .where(and(eq(userModelTable.providerId, providerId), eq(userModelTable.modelId, modelId))) + .limit(1) + + if (!row) { + throw DataApiErrorFactory.notFound('Model', `${providerId}/${modelId}`) + } + + return rowToRuntimeModel(row) + } + + /** + * Create a new model + * + * Automatically enriches from registry preset data when a match is found. + * DTO values take priority over registry (user > registryOverride > preset). + */ + async create(dto: CreateModelDto): Promise { + const db = application.get('DbService').getDb() + + // Look up registry data for auto-enrichment + const { presetModel, registryOverride, reasoningFormatTypes, defaultChatEndpoint } = await application + .get('ProviderRegistryService') + .lookupModel(dto.providerId, dto.modelId) + + let values: NewUserModel + + if (presetModel) { + // Registry match found — merge DTO with preset data + const userRow = { + providerId: dto.providerId, + modelId: dto.modelId, + presetModelId: presetModel.id, + name: dto.name ?? null, + description: dto.description ?? null, + group: dto.group ?? null, + capabilities: (dto.capabilities as ModelCapability[]) ?? null, + inputModalities: (dto.inputModalities as Modality[]) ?? null, + outputModalities: (dto.outputModalities as Modality[]) ?? null, + endpointTypes: (dto.endpointTypes as EndpointType[]) ?? null, + contextWindow: dto.contextWindow ?? null, + maxOutputTokens: dto.maxOutputTokens ?? null, + supportsStreaming: dto.supportsStreaming ?? null, + reasoning: dto.reasoning ?? null + } + + const merged = mergeModelConfig( + userRow, + registryOverride, + presetModel, + dto.providerId, + reasoningFormatTypes, + defaultChatEndpoint + ) + + values = { + providerId: dto.providerId, + modelId: dto.modelId, + presetModelId: presetModel.id, + name: merged.name, + description: merged.description ?? null, + group: merged.group ?? null, + capabilities: merged.capabilities, + inputModalities: merged.inputModalities ?? null, + outputModalities: merged.outputModalities ?? null, + endpointTypes: merged.endpointTypes ?? null, + contextWindow: merged.contextWindow ?? null, + maxOutputTokens: merged.maxOutputTokens ?? null, + supportsStreaming: merged.supportsStreaming, + reasoning: merged.reasoning ?? null, + parameters: merged.parameterSupport ?? null, + pricing: merged.pricing ?? null + } + + logger.info('Created model with registry enrichment', { + providerId: dto.providerId, + modelId: dto.modelId, + presetModelId: presetModel.id + }) + } else { + // No registry match — store as custom model + values = { + providerId: dto.providerId, + modelId: dto.modelId, + presetModelId: dto.presetModelId ?? null, + name: dto.name ?? null, + description: dto.description ?? null, + group: dto.group ?? null, + capabilities: (dto.capabilities as ModelCapability[]) ?? null, + inputModalities: (dto.inputModalities as Modality[]) ?? null, + outputModalities: (dto.outputModalities as Modality[]) ?? null, + endpointTypes: (dto.endpointTypes as EndpointType[]) ?? null, + contextWindow: dto.contextWindow ?? null, + maxOutputTokens: dto.maxOutputTokens ?? null, + supportsStreaming: dto.supportsStreaming ?? null, + reasoning: dto.reasoning ?? null, + parameters: dto.parameterSupport ?? null, + pricing: dto.pricing ?? null + } + + logger.info('Created custom model (no registry match)', { + providerId: dto.providerId, + modelId: dto.modelId + }) + } + + const [row] = await db.insert(userModelTable).values(values).returning() + + return rowToRuntimeModel(row) + } + + /** + * Update an existing model + */ + async update(providerId: string, modelId: string, dto: UpdateModelDto): Promise { + const db = application.get('DbService').getDb() + + // Fetch existing row (also verifies existence) + const [existing] = await db + .select() + .from(userModelTable) + .where(and(eq(userModelTable.providerId, providerId), eq(userModelTable.modelId, modelId))) + .limit(1) + + if (!existing) { + throw DataApiErrorFactory.notFound('Model', `${providerId}/${modelId}`) + } + + // Build update object + const updates: Partial = {} + + if (dto.name !== undefined) updates.name = dto.name + if (dto.description !== undefined) updates.description = dto.description + if (dto.group !== undefined) updates.group = dto.group + if (dto.capabilities !== undefined) updates.capabilities = dto.capabilities as ModelCapability[] + if (dto.endpointTypes !== undefined) updates.endpointTypes = dto.endpointTypes as EndpointType[] + if (dto.supportsStreaming !== undefined) updates.supportsStreaming = dto.supportsStreaming + if (dto.contextWindow !== undefined) updates.contextWindow = dto.contextWindow + if (dto.maxOutputTokens !== undefined) updates.maxOutputTokens = dto.maxOutputTokens + if (dto.reasoning !== undefined) updates.reasoning = dto.reasoning + if (dto.pricing !== undefined) updates.pricing = dto.pricing + if (dto.isEnabled !== undefined) updates.isEnabled = dto.isEnabled + if (dto.isHidden !== undefined) updates.isHidden = dto.isHidden + if (dto.sortOrder !== undefined) updates.sortOrder = dto.sortOrder + if (dto.notes !== undefined) updates.notes = dto.notes + + // Track which registry-enrichable fields the user explicitly changed + const changedEnrichableFields = Object.keys(dto).filter(isRegistryEnrichableField) + if (changedEnrichableFields.length > 0) { + const existingOverrides = existing.userOverrides ?? [] + updates.userOverrides = [...new Set([...existingOverrides, ...changedEnrichableFields])] + } + + const [row] = await db + .update(userModelTable) + .set(updates) + .where(and(eq(userModelTable.providerId, providerId), eq(userModelTable.modelId, modelId))) + .returning() + + logger.info('Updated model', { providerId, modelId, changes: Object.keys(dto) }) + + return rowToRuntimeModel(row) + } + + /** + * Delete a model + */ + async delete(providerId: string, modelId: string): Promise { + const db = application.get('DbService').getDb() + + // Verify model exists + await this.getByKey(providerId, modelId) + + await db + .delete(userModelTable) + .where(and(eq(userModelTable.providerId, providerId), eq(userModelTable.modelId, modelId))) + + logger.info('Deleted model', { providerId, modelId }) + } + + /** + * Batch upsert models for a provider (used by RegistryService). + * Inserts new models, updates existing ones. + * Respects `userOverrides`: fields the user has explicitly modified are not overwritten. + */ + async batchUpsert(models: NewUserModel[]): Promise { + if (models.length === 0) return + + const db = application.get('DbService').getDb() + + // Pre-fetch existing userOverrides for all affected models + const providerIds = [...new Set(models.map((m) => m.providerId))] + const existingRows = await db + .select({ + providerId: userModelTable.providerId, + modelId: userModelTable.modelId, + userOverrides: userModelTable.userOverrides + }) + .from(userModelTable) + .where(inArray(userModelTable.providerId, providerIds)) + + const overridesMap = new Map>() + for (const row of existingRows) { + if (row.userOverrides && row.userOverrides.length > 0) { + overridesMap.set(`${row.providerId}:${row.modelId}`, new Set(row.userOverrides)) + } + } + + for (const model of models) { + const userOverrides = overridesMap.get(`${model.providerId}:${model.modelId}`) + + // Build the update set, skipping user-overridden fields + const set: Partial = { + presetModelId: model.presetModelId + } + const enrichableFields = { + name: model.name, + description: model.description, + group: model.group, + capabilities: model.capabilities, + inputModalities: model.inputModalities, + outputModalities: model.outputModalities, + endpointTypes: model.endpointTypes, + contextWindow: model.contextWindow, + maxOutputTokens: model.maxOutputTokens, + supportsStreaming: model.supportsStreaming, + reasoning: model.reasoning, + parameters: model.parameters, + pricing: model.pricing + } + + for (const [field, value] of Object.entries(enrichableFields)) { + if (!userOverrides?.has(field)) { + ;(set as Record)[field] = value + } + } + + await db + .insert(userModelTable) + .values(model) + .onConflictDoUpdate({ + target: [userModelTable.providerId, userModelTable.modelId], + set + }) + } + + logger.info('Batch upserted models', { count: models.length, providerId: models[0]?.providerId }) + } +} + +export const modelService = ModelService.getInstance() diff --git a/src/main/data/services/ProviderRegistryService.ts b/src/main/data/services/ProviderRegistryService.ts new file mode 100644 index 00000000000..9834a7b71e4 --- /dev/null +++ b/src/main/data/services/ProviderRegistryService.ts @@ -0,0 +1,547 @@ +/** + * Registry Service - imports registry data into SQLite + * + * Responsible for: + * - Reading registry JSON files (models.json, provider-models.json, providers.json) + * - Merging configurations using mergeModelConfig/mergeProviderConfig + * - Writing resolved data to user_model / user_provider tables + * + * Managed by the lifecycle system. Seeds preset data during onInit. + */ + +import { join } from 'node:path' + +import type { + ProtoModelConfig, + ProtoProviderConfig, + ProtoProviderModelOverride, + RegistryEndpointConfig +} from '@cherrystudio/provider-registry' +import { EndpointType } from '@cherrystudio/provider-registry' +import { + readModelRegistry, + readProviderModelRegistry, + readProviderRegistry +} from '@cherrystudio/provider-registry/node' +import type { NewUserModel } from '@data/db/schemas/userModel' +import { userModelTable } from '@data/db/schemas/userModel' +import type { NewUserProvider } from '@data/db/schemas/userProvider' +import { userProviderTable } from '@data/db/schemas/userProvider' +import { loggerService } from '@logger' +import { isDev } from '@main/constant' +import { application } from '@main/core/application' +import { BaseService, DependsOn, Injectable, ServicePhase } from '@main/core/lifecycle' +import { Phase } from '@main/core/lifecycle' +import type { Model } from '@shared/data/types/model' +import type { EndpointConfig, ReasoningFormatType } from '@shared/data/types/provider' +import { extractReasoningFormatTypes, mergeModelConfig } from '@shared/data/utils/modelMerger' +import { eq, isNotNull } from 'drizzle-orm' + +import { modelService } from './ModelService' +import { providerService } from './ProviderService' + +const logger = loggerService.withContext('DataApi:ProviderRegistryService') + +/** + * Convert registry endpointConfigs (with reasoningFormat discriminated union) + * to runtime endpointConfigs (with reasoningFormatType string). + */ +function buildRuntimeEndpointConfigs( + registryConfigs: Record | undefined +): Partial> | null { + if (!registryConfigs || Object.keys(registryConfigs).length === 0) return null + + const configs: Partial> = {} + + for (const [k, regConfig] of Object.entries(registryConfigs)) { + const ep = k as EndpointType + const config: EndpointConfig = {} + + if (regConfig.baseUrl) config.baseUrl = regConfig.baseUrl + if (regConfig.modelsApiUrls) config.modelsApiUrls = regConfig.modelsApiUrls + if (regConfig.reasoningFormat?.type) config.reasoningFormatType = regConfig.reasoningFormat.type + + if (Object.keys(config).length > 0) configs[ep] = config + } + + return Object.keys(configs).length > 0 ? configs : null +} + +@Injectable('ProviderRegistryService') +@ServicePhase(Phase.BeforeReady) +@DependsOn(['DbService']) +export class ProviderRegistryService extends BaseService { + private registryModels: ProtoModelConfig[] | null = null + private registryProviderModels: ProtoProviderModelOverride[] | null = null + private registryProviders: ProtoProviderConfig[] | null = null + + protected async onInit(): Promise { + await this.initializeAllPresetProviders() + } + + protected onDestroy(): void { + this.clearCache() + } + + private getRegistryDataPath(): string { + if (isDev) { + return join(__dirname, '..', '..', 'packages', 'provider-registry', 'data') + } + return join(process.resourcesPath, 'packages', 'provider-registry', 'data') + } + + private loadRegistryModels(): ProtoModelConfig[] { + if (this.registryModels) return this.registryModels + + try { + const dataPath = this.getRegistryDataPath() + const data = readModelRegistry(join(dataPath, 'models.json')) + const models = data.models ?? [] + this.registryModels = models + logger.info('Loaded registry models', { count: models.length }) + return models + } catch (error) { + logger.warn('Failed to load registry models.json', { error }) + return [] + } + } + + private loadProviderModels(): ProtoProviderModelOverride[] { + if (this.registryProviderModels) return this.registryProviderModels + + try { + const dataPath = this.getRegistryDataPath() + const data = readProviderModelRegistry(join(dataPath, 'provider-models.json')) + const overrides = data.overrides ?? [] + this.registryProviderModels = overrides + logger.info('Loaded registry provider-models', { count: overrides.length }) + return overrides + } catch (error) { + logger.warn('Failed to load registry provider-models.json', { error }) + return [] + } + } + + private loadRegistryProviders(): ProtoProviderConfig[] { + if (this.registryProviders) return this.registryProviders + + try { + const dataPath = this.getRegistryDataPath() + const data = readProviderRegistry(join(dataPath, 'providers.json')) + const providers = data.providers ?? [] + this.registryProviders = providers + return providers + } catch (error) { + logger.warn('Failed to load registry providers.json', { error }) + return [] + } + } + + private getRegistryReasoningConfig(providerId: string): { + defaultChatEndpoint?: EndpointType + reasoningFormatTypes?: Partial> + } { + const providers = this.loadRegistryProviders() + const provider = providers.find((p) => p.id === providerId) + const endpointConfigs = provider ? buildRuntimeEndpointConfigs(provider.endpointConfigs) : null + + return { + defaultChatEndpoint: provider?.defaultChatEndpoint, + reasoningFormatTypes: extractReasoningFormatTypes(endpointConfigs) + } + } + + private async getEffectiveReasoningConfig(providerId: string): Promise<{ + defaultChatEndpoint?: EndpointType + reasoningFormatTypes?: Partial> + }> { + const db = application.get('DbService').getDb() + const registryConfig = this.getRegistryReasoningConfig(providerId) + const [provider] = await db + .select({ + defaultChatEndpoint: userProviderTable.defaultChatEndpoint, + endpointConfigs: userProviderTable.endpointConfigs + }) + .from(userProviderTable) + .where(eq(userProviderTable.providerId, providerId)) + .limit(1) + + if (provider) { + const defaultChatEndpoint = provider.defaultChatEndpoint ?? registryConfig.defaultChatEndpoint + const reasoningFormatTypes = + extractReasoningFormatTypes(provider.endpointConfigs) ?? registryConfig.reasoningFormatTypes + + return { defaultChatEndpoint, reasoningFormatTypes } + } + + return registryConfig + } + + async initializeProvider(providerId: string): Promise { + const registryModels = this.loadRegistryModels() + const providerModels = this.loadProviderModels() + const { defaultChatEndpoint, reasoningFormatTypes } = await this.getEffectiveReasoningConfig(providerId) + + const overrides = providerModels.filter((pm) => pm.providerId === providerId) + + if (overrides.length === 0) { + logger.info('No registry overrides found for provider', { providerId }) + return [] + } + + const modelMap = new Map() + for (const model of registryModels) { + modelMap.set(model.id, model) + } + + const mergedModels: Model[] = [] + const dbRows: NewUserModel[] = [] + + for (const override of overrides) { + const baseModel = modelMap.get(override.modelId) ?? null + + if (!baseModel) { + logger.warn('Base model not found for override', { providerId, modelId: override.modelId }) + continue + } + + const merged = mergeModelConfig(null, override, baseModel, providerId, reasoningFormatTypes, defaultChatEndpoint) + mergedModels.push(merged) + + dbRows.push({ + providerId, + modelId: baseModel.id, + presetModelId: baseModel.id, + name: merged.name, + description: merged.description ?? null, + group: merged.group ?? null, + capabilities: merged.capabilities, + inputModalities: merged.inputModalities ?? null, + outputModalities: merged.outputModalities ?? null, + endpointTypes: merged.endpointTypes ?? null, + contextWindow: merged.contextWindow ?? null, + maxOutputTokens: merged.maxOutputTokens ?? null, + supportsStreaming: merged.supportsStreaming, + reasoning: merged.reasoning ?? null, + parameters: merged.parameterSupport ?? null, + isEnabled: merged.isEnabled, + isHidden: merged.isHidden + }) + } + + await modelService.batchUpsert(dbRows) + + logger.info('Initialized provider models from registry', { providerId, count: mergedModels.length }) + + return mergedModels + } + + getRegistryModelsByProvider(providerId: string): Model[] { + const registryModels = this.loadRegistryModels() + const providerModels = this.loadProviderModels() + const { defaultChatEndpoint, reasoningFormatTypes } = this.getRegistryReasoningConfig(providerId) + + const overrides = providerModels.filter((pm) => pm.providerId === providerId) + if (overrides.length === 0) return [] + + const modelMap = new Map() + for (const model of registryModels) { + modelMap.set(model.id, model) + } + + const mergedModels: Model[] = [] + for (const override of overrides) { + const baseModel = modelMap.get(override.modelId) ?? null + if (!baseModel) continue + mergedModels.push( + mergeModelConfig(null, override, baseModel, providerId, reasoningFormatTypes, defaultChatEndpoint) + ) + } + + return mergedModels + } + + async initializePresetProviders(): Promise { + const dataPath = this.getRegistryDataPath() + let rawProviders: ReturnType['providers'] = [] + + try { + const data = readProviderRegistry(join(dataPath, 'providers.json')) + rawProviders = data.providers + } catch (error) { + logger.warn('Failed to load providers.json for provider import', { error }) + return + } + + const dbRows: NewUserProvider[] = rawProviders.map((p) => { + const registryWebsite = p.metadata?.website + const websites = + registryWebsite && + (registryWebsite.official || registryWebsite.docs || registryWebsite.apiKey || registryWebsite.models) + ? { + official: registryWebsite.official || undefined, + docs: registryWebsite.docs || undefined, + apiKey: registryWebsite.apiKey || undefined, + models: registryWebsite.models || undefined + } + : null + + const apiFeatures = p.apiFeatures + ? { + arrayContent: p.apiFeatures.arrayContent, + streamOptions: p.apiFeatures.streamOptions, + developerRole: p.apiFeatures.developerRole, + serviceTier: p.apiFeatures.serviceTier, + verbosity: p.apiFeatures.verbosity, + enableThinking: p.apiFeatures.enableThinking + } + : null + + const endpointConfigs = buildRuntimeEndpointConfigs(p.endpointConfigs) + + return { + providerId: p.id, + presetProviderId: p.id, + name: p.name, + endpointConfigs, + defaultChatEndpoint: p.defaultChatEndpoint ?? null, + apiFeatures, + websites + } + }) + + dbRows.push({ + providerId: 'cherryai', + name: 'CherryAI', + endpointConfigs: { + [EndpointType.OPENAI_CHAT_COMPLETIONS]: { + baseUrl: 'https://api.cherry-ai.com' + } + }, + defaultChatEndpoint: EndpointType.OPENAI_CHAT_COMPLETIONS + }) + + await providerService.batchUpsert(dbRows) + + logger.info('Initialized preset providers from registry', { count: dbRows.length }) + } + + private async initializeAllPresetProviders(): Promise { + await this.initializePresetProviders() + await this.enrichExistingModels() + + logger.info('Initialized all preset providers and enriched existing models') + } + + async enrichExistingModels(): Promise { + const registryModels = this.loadRegistryModels() + const providerModels = this.loadProviderModels() + + if (registryModels.length === 0) { + logger.warn('No registry models loaded, skipping model enrichment') + return + } + + const modelMap = new Map() + for (const m of registryModels) { + modelMap.set(m.id, m) + } + + const overridesByProvider = new Map>() + for (const pm of providerModels) { + let providerMap = overridesByProvider.get(pm.providerId) + if (!providerMap) { + providerMap = new Map() + overridesByProvider.set(pm.providerId, providerMap) + } + providerMap.set(pm.modelId, pm) + } + + const db = application.get('DbService').getDb() + const userModels = await db.select().from(userModelTable).where(isNotNull(userModelTable.presetModelId)) + + if (userModels.length === 0) { + logger.info('No user models with presetModelId found, skipping enrichment') + return + } + + const updateRows: NewUserModel[] = [] + let skippedCount = 0 + const providerRows = await db + .select({ + providerId: userProviderTable.providerId, + defaultChatEndpoint: userProviderTable.defaultChatEndpoint, + endpointConfigs: userProviderTable.endpointConfigs + }) + .from(userProviderTable) + const providerConfigMap = new Map(providerRows.map((row) => [row.providerId, row])) + + for (const row of userModels) { + const presetModelId = row.presetModelId! + const presetModel = modelMap.get(presetModelId) + + if (!presetModel) { + skippedCount++ + continue + } + + const providerOverrides = overridesByProvider.get(row.providerId) + const registryOverride = providerOverrides?.get(presetModelId) ?? null + const providerConfig = providerConfigMap.get(row.providerId) + const registryReasoningConfig = this.getRegistryReasoningConfig(row.providerId) + const defaultChatEndpoint = providerConfig?.defaultChatEndpoint ?? registryReasoningConfig.defaultChatEndpoint + const reasoningFormatTypes = + extractReasoningFormatTypes(providerConfig?.endpointConfigs) ?? registryReasoningConfig.reasoningFormatTypes + + const merged = mergeModelConfig( + { + providerId: row.providerId, + modelId: row.modelId, + presetModelId, + name: row.name, + description: row.description, + group: row.group, + capabilities: row.capabilities, + inputModalities: row.inputModalities, + outputModalities: row.outputModalities, + endpointTypes: row.endpointTypes, + contextWindow: row.contextWindow, + maxOutputTokens: row.maxOutputTokens, + supportsStreaming: row.supportsStreaming, + reasoning: row.reasoning, + isEnabled: row.isEnabled, + isHidden: row.isHidden + }, + registryOverride, + presetModel, + row.providerId, + reasoningFormatTypes, + defaultChatEndpoint + ) + + updateRows.push({ + providerId: row.providerId, + modelId: row.modelId, + presetModelId, + name: merged.name, + description: merged.description ?? null, + group: merged.group ?? null, + capabilities: merged.capabilities, + inputModalities: merged.inputModalities ?? null, + outputModalities: merged.outputModalities ?? null, + endpointTypes: merged.endpointTypes ?? null, + contextWindow: merged.contextWindow ?? null, + maxOutputTokens: merged.maxOutputTokens ?? null, + supportsStreaming: merged.supportsStreaming, + reasoning: merged.reasoning ?? null, + pricing: merged.pricing ?? null, + isEnabled: merged.isEnabled, + isHidden: merged.isHidden + }) + } + + if (updateRows.length > 0) { + await modelService.batchUpsert(updateRows) + } + + logger.info('Model enrichment completed', { + total: userModels.length, + enriched: updateRows.length, + skipped: skippedCount, + registrySize: registryModels.length + }) + } + + async lookupModel( + providerId: string, + modelId: string + ): Promise<{ + presetModel: ProtoModelConfig | null + registryOverride: ProtoProviderModelOverride | null + defaultChatEndpoint?: EndpointType + reasoningFormatTypes?: Partial> + }> { + const registryModels = this.loadRegistryModels() + const providerModels = this.loadProviderModels() + + const presetModel = registryModels.find((m) => m.id === modelId) ?? null + const registryOverride = providerModels.find((pm) => pm.providerId === providerId && pm.modelId === modelId) ?? null + const reasoningConfig = await this.getEffectiveReasoningConfig(providerId) + + return { presetModel, registryOverride, ...reasoningConfig } + } + + async resolveModels( + providerId: string, + rawModels: Array<{ + modelId: string + name?: string + group?: string + description?: string + endpointTypes?: string[] + }> + ): Promise { + const registryModels = this.loadRegistryModels() + const providerModels = this.loadProviderModels() + const { defaultChatEndpoint, reasoningFormatTypes } = await this.getEffectiveReasoningConfig(providerId) + + const modelMap = new Map() + for (const m of registryModels) { + modelMap.set(m.id, m) + } + const overrideMap = new Map() + for (const pm of providerModels) { + if (pm.providerId === providerId) { + overrideMap.set(pm.modelId, pm) + } + } + + const results: Model[] = [] + const seen = new Set() + + for (const raw of rawModels) { + if (!raw.modelId || seen.has(raw.modelId)) continue + seen.add(raw.modelId) + + const presetModel = modelMap.get(raw.modelId) ?? null + const registryOverride = overrideMap.get(raw.modelId) ?? null + + const userRow = { + providerId, + modelId: raw.modelId, + presetModelId: presetModel ? presetModel.id : null, + name: raw.name ?? null, + group: raw.group ?? null, + description: raw.description ?? null, + endpointTypes: raw.endpointTypes ?? null + } + + try { + if (presetModel) { + results.push( + mergeModelConfig( + userRow, + registryOverride, + presetModel, + providerId, + reasoningFormatTypes, + defaultChatEndpoint + ) + ) + } else { + results.push(mergeModelConfig({ ...userRow, presetModelId: null }, null, null, providerId)) + } + } catch (error) { + logger.warn('Failed to resolve model', { providerId, modelId: raw.modelId, error }) + } + } + + return results + } + + clearCache(): void { + this.registryModels = null + this.registryProviderModels = null + this.registryProviders = null + } +} diff --git a/src/main/data/services/ProviderService.ts b/src/main/data/services/ProviderService.ts new file mode 100644 index 00000000000..1aaf9bfafff --- /dev/null +++ b/src/main/data/services/ProviderService.ts @@ -0,0 +1,362 @@ +/** + * Provider Service - handles provider CRUD operations + * + * Provides business logic for: + * - Provider CRUD operations + * - Row to Provider conversion + */ + +import type { NewUserProvider, UserProvider } from '@data/db/schemas/userProvider' +import { userProviderTable } from '@data/db/schemas/userProvider' +import { loggerService } from '@logger' +import { application } from '@main/core/application' +import { DataApiErrorFactory } from '@shared/data/api' +import type { CreateProviderDto, ListProvidersQuery, UpdateProviderDto } from '@shared/data/api/schemas/providers' +import type { + ApiKeyEntry, + AuthConfig, + AuthType, + Provider, + ProviderSettings, + RuntimeApiFeatures +} from '@shared/data/types/provider' +import { DEFAULT_API_FEATURES, DEFAULT_PROVIDER_SETTINGS } from '@shared/data/types/provider' +import { eq } from 'drizzle-orm' + +const logger = loggerService.withContext('DataApi:ProviderService') + +/** + * Convert database row to Provider entity + */ +function rowToRuntimeProvider(row: UserProvider): Provider { + // Process API keys (strip actual key values for security) + // oxlint-disable-next-line no-unused-vars + const apiKeys = (row.apiKeys ?? []).map(({ key: _key, ...rest }) => rest) + + // Determine auth type + let authType: AuthType = 'api-key' + if (row.authConfig?.type) { + authType = row.authConfig.type + } + + // Merge API features + const apiFeatures: RuntimeApiFeatures = { + ...DEFAULT_API_FEATURES, + ...row.apiFeatures + } + + // Merge settings + const settings: ProviderSettings = { + ...DEFAULT_PROVIDER_SETTINGS, + ...(row.providerSettings as Partial | null) + } + + return { + id: row.providerId, + presetProviderId: row.presetProviderId ?? undefined, + name: row.name, + endpointConfigs: row.endpointConfigs ?? undefined, + defaultChatEndpoint: row.defaultChatEndpoint ?? undefined, + apiKeys, + authType, + apiFeatures, + settings, + websites: row.websites ?? undefined, + isEnabled: row.isEnabled ?? true + } +} + +export class ProviderService { + private static instance: ProviderService + + private constructor() {} + + public static getInstance(): ProviderService { + if (!ProviderService.instance) { + ProviderService.instance = new ProviderService() + } + return ProviderService.instance + } + + /** + * List providers with optional filters + */ + async list(query: ListProvidersQuery): Promise { + const db = application.get('DbService').getDb() + + let rows: UserProvider[] + + if (query.enabled !== undefined) { + rows = await db + .select() + .from(userProviderTable) + .where(eq(userProviderTable.isEnabled, query.enabled)) + .orderBy(userProviderTable.sortOrder) + } else { + rows = await db.select().from(userProviderTable).orderBy(userProviderTable.sortOrder) + } + + return rows.map(rowToRuntimeProvider) + } + + /** + * Get a provider by its provider ID + */ + async getByProviderId(providerId: string): Promise { + const db = application.get('DbService').getDb() + + const [row] = await db.select().from(userProviderTable).where(eq(userProviderTable.providerId, providerId)).limit(1) + + if (!row) { + throw DataApiErrorFactory.notFound('Provider', providerId) + } + + return rowToRuntimeProvider(row) + } + + /** + * Create a new provider + */ + async create(dto: CreateProviderDto): Promise { + const db = application.get('DbService').getDb() + + const values: NewUserProvider = { + providerId: dto.providerId, + presetProviderId: dto.presetProviderId ?? null, + name: dto.name, + endpointConfigs: dto.endpointConfigs ?? null, + defaultChatEndpoint: dto.defaultChatEndpoint ?? null, + apiKeys: dto.apiKeys ?? [], + authConfig: dto.authConfig ?? null, + apiFeatures: dto.apiFeatures ?? null, + providerSettings: dto.providerSettings ?? null + } + + const [row] = await db.insert(userProviderTable).values(values).returning() + + logger.info('Created provider', { providerId: dto.providerId }) + + return rowToRuntimeProvider(row) + } + + /** + * Update an existing provider + */ + async update(providerId: string, dto: UpdateProviderDto): Promise { + const db = application.get('DbService').getDb() + + // Verify provider exists + await this.getByProviderId(providerId) + + // Build update object + const updates: Partial = {} + + if (dto.name !== undefined) updates.name = dto.name + if (dto.endpointConfigs !== undefined) updates.endpointConfigs = dto.endpointConfigs + if (dto.defaultChatEndpoint !== undefined) updates.defaultChatEndpoint = dto.defaultChatEndpoint + if (dto.apiKeys !== undefined) updates.apiKeys = dto.apiKeys + if (dto.authConfig !== undefined) updates.authConfig = dto.authConfig + if (dto.apiFeatures !== undefined) updates.apiFeatures = dto.apiFeatures + if (dto.providerSettings !== undefined) updates.providerSettings = dto.providerSettings + if (dto.isEnabled !== undefined) updates.isEnabled = dto.isEnabled + if (dto.sortOrder !== undefined) updates.sortOrder = dto.sortOrder + + const [row] = await db + .update(userProviderTable) + .set(updates) + .where(eq(userProviderTable.providerId, providerId)) + .returning() + + logger.info('Updated provider', { providerId, changes: Object.keys(dto) }) + + return rowToRuntimeProvider(row) + } + + /** + * Batch upsert providers (used by RegistryService for preset providers) + * Inserts new providers, updates only preset fields on existing ones. + * Does NOT overwrite user-customized fields (apiKeys, isEnabled, sortOrder, authConfig). + */ + async batchUpsert(providers: NewUserProvider[]): Promise { + if (providers.length === 0) return + + const db = application.get('DbService').getDb() + + for (const provider of providers) { + await db + .insert(userProviderTable) + .values(provider) + .onConflictDoUpdate({ + target: [userProviderTable.providerId], + set: { + presetProviderId: provider.presetProviderId, + name: provider.name, + endpointConfigs: provider.endpointConfigs, + defaultChatEndpoint: provider.defaultChatEndpoint, + apiFeatures: provider.apiFeatures, + providerSettings: provider.providerSettings, + websites: provider.websites + } + }) + } + + logger.info('Batch upserted providers', { count: providers.length }) + } + + /** + * Get a rotated API key for a provider (round-robin across enabled keys). + * Returns empty string for providers that don't have keys. + */ + async getRotatedApiKey(providerId: string): Promise { + const db = application.get('DbService').getDb() + + const [row] = await db.select().from(userProviderTable).where(eq(userProviderTable.providerId, providerId)).limit(1) + + if (!row) { + throw DataApiErrorFactory.notFound('Provider', providerId) + } + + const enabledKeys = (row.apiKeys ?? []).filter((k) => k.isEnabled) + + if (enabledKeys.length === 0) { + return '' + } + + if (enabledKeys.length === 1) { + return enabledKeys[0].key + } + + // Round-robin using CacheService + const cache = application.get('CacheService') + const cacheKey = `provider:${providerId}:last_used_key_id` + const lastUsedKeyId = cache.get(cacheKey) + + if (!lastUsedKeyId) { + cache.set(cacheKey, enabledKeys[0].id) + return enabledKeys[0].key + } + + const currentIndex = enabledKeys.findIndex((k) => k.id === lastUsedKeyId) + const nextIndex = (currentIndex + 1) % enabledKeys.length + const nextKey = enabledKeys[nextIndex] + cache.set(cacheKey, nextKey.id) + + return nextKey.key + } + + /** + * Get all enabled API key values for a provider. + * Used by health check to test each key individually. + */ + async getEnabledApiKeys(providerId: string): Promise { + const db = application.get('DbService').getDb() + + const [row] = await db.select().from(userProviderTable).where(eq(userProviderTable.providerId, providerId)).limit(1) + + if (!row) { + throw DataApiErrorFactory.notFound('Provider', providerId) + } + + return (row.apiKeys ?? []).filter((k) => k.isEnabled) + } + + /** + * Get full auth config (includes sensitive credentials). + */ + async getAuthConfig(providerId: string): Promise { + const db = application.get('DbService').getDb() + const [row] = await db.select().from(userProviderTable).where(eq(userProviderTable.providerId, providerId)).limit(1) + + if (!row) { + throw DataApiErrorFactory.notFound('Provider', providerId) + } + + return row.authConfig ?? null + } + + /** + * Add an API key to a provider. Skips if the key value already exists. + * Returns the updated Provider. + */ + async addApiKey(providerId: string, key: string, label?: string): Promise { + const db = application.get('DbService').getDb() + + const [row] = await db.select().from(userProviderTable).where(eq(userProviderTable.providerId, providerId)).limit(1) + + if (!row) { + throw DataApiErrorFactory.notFound('Provider', providerId) + } + + const existingKeys = row.apiKeys ?? [] + + // Skip if key value already exists + if (existingKeys.some((k) => k.key === key)) { + logger.info('API key already exists, skipping', { providerId }) + return rowToRuntimeProvider(row) + } + + const newEntry = { + id: crypto.randomUUID(), + key, + label, + isEnabled: true + } + + const updatedKeys = [...existingKeys, newEntry] + + const [updated] = await db + .update(userProviderTable) + .set({ apiKeys: updatedKeys }) + .where(eq(userProviderTable.providerId, providerId)) + .returning() + + logger.info('Added API key to provider', { providerId }) + + return rowToRuntimeProvider(updated) + } + + /** + * Delete an API key by key ID and return updated provider. + */ + async deleteApiKey(providerId: string, keyId: string): Promise { + const db = application.get('DbService').getDb() + const [row] = await db.select().from(userProviderTable).where(eq(userProviderTable.providerId, providerId)).limit(1) + + if (!row) { + throw DataApiErrorFactory.notFound('Provider', providerId) + } + + const existingKeys = row.apiKeys ?? [] + const updatedKeys = existingKeys.filter((entry) => entry.id !== keyId) + + if (updatedKeys.length === existingKeys.length) { + throw DataApiErrorFactory.notFound('API key', keyId) + } + + const [updated] = await db + .update(userProviderTable) + .set({ apiKeys: updatedKeys }) + .where(eq(userProviderTable.providerId, providerId)) + .returning() + + logger.info('Deleted API key from provider', { providerId, keyId }) + + return rowToRuntimeProvider(updated) + } + + /** + * Delete a provider + */ + async delete(providerId: string): Promise { + const db = application.get('DbService').getDb() + + // Verify provider exists + await this.getByProviderId(providerId) + + await db.delete(userProviderTable).where(eq(userProviderTable.providerId, providerId)) + + logger.info('Deleted provider', { providerId }) + } +} + +export const providerService = ProviderService.getInstance() diff --git a/src/renderer/src/aiCore/services/listModels.ts b/src/renderer/src/aiCore/services/listModels.ts index 099ef9f0e09..a0a1999375e 100644 --- a/src/renderer/src/aiCore/services/listModels.ts +++ b/src/renderer/src/aiCore/services/listModels.ts @@ -200,7 +200,7 @@ const githubFetcher: ModelFetcher = { abortSignal: signal }).catch(() => ({ data: [] as { id: string; owned_by?: string }[] })) ]) - const catalogModels = catalogResponse.map((m) => + const registryModels = catalogResponse.map((m) => toModel(m.id, provider, { name: m.name || m.id, description: pickPreferredString([m.summary, m.description]), @@ -208,7 +208,7 @@ const githubFetcher: ModelFetcher = { }) ) const v1Models = v1Response.data.map((m) => toModel(m.id, provider, { owned_by: m.owned_by })) - return dedup([...catalogModels, ...v1Models], (m) => m.id) + return dedup([...registryModels, ...v1Models], (m) => m.id) } } diff --git a/src/renderer/src/components/Popups/ApiKeyListPopup/list.tsx b/src/renderer/src/components/Popups/ApiKeyListPopup/list.tsx index 0c31f81b4cf..e4e15771e7d 100644 --- a/src/renderer/src/components/Popups/ApiKeyListPopup/list.tsx +++ b/src/renderer/src/components/Popups/ApiKeyListPopup/list.tsx @@ -1,19 +1,21 @@ import { Button, Flex, Tooltip } from '@cherrystudio/ui' +import { useModels } from '@data/hooks/useModels' +import { useProvider, useProviderApiKeys, useProviderMutations } from '@data/hooks/useProviders' import { DeleteIcon } from '@renderer/components/Icons' import { StreamlineGoodHealthAndWellBeing } from '@renderer/components/Icons/SVGIcon' import Scrollbar from '@renderer/components/Scrollbar' import { usePreprocessProvider } from '@renderer/hooks/usePreprocess' -import { useProvider } from '@renderer/hooks/useProvider' import { useWebSearchProvider } from '@renderer/hooks/useWebSearchProviders' import { SettingHelpText } from '@renderer/pages/settings' import { isProviderSupportAuth } from '@renderer/services/ProviderService' import type { PreprocessProviderId, WebSearchProviderId } from '@renderer/types' import type { ApiKeyWithStatus } from '@renderer/types/healthCheck' import { HealthStatus } from '@renderer/types/healthCheck' +import { toV1ProviderShim } from '@renderer/utils/v1ProviderShim' import { Card, List, Popconfirm, Space, Typography } from 'antd' import { Plus } from 'lucide-react' import type { FC } from 'react' -import { useState } from 'react' +import { useCallback, useMemo, useState } from 'react' import { useTranslation } from 'react-i18next' import styled from 'styled-components' @@ -191,9 +193,37 @@ type DocPreprocessApiKeyListProps = SpecificApiKeyListProps & { } export const LlmApiKeyList: FC = ({ providerId, showHealthCheck = true }) => { - const { provider, updateProvider } = useProvider(providerId) + // TODO(v2-cleanup): Remove v1 shim after useApiKeys/checkApi migrate to v2 + const { provider } = useProvider(providerId) + const { data: apiKeysData } = useProviderApiKeys(providerId) + const { models } = useModels({ providerId }) + const { updateApiKeys } = useProviderMutations(providerId) + + const joinedApiKey = useMemo(() => apiKeysData?.keys?.map((k) => k.key).join(',') ?? '', [apiKeysData]) + + const v1Provider = useMemo(() => { + if (!provider) return null + return toV1ProviderShim(provider, { models, apiKey: joinedApiKey }) + }, [provider, models, joinedApiKey]) + + const shimUpdateProvider = useCallback( + (updates: Partial<{ apiKey: string }>) => { + if (updates.apiKey === undefined) return + const keys = updates.apiKey + .split(',') + .map((k) => k.trim()) + .filter(Boolean) + const apiKeys = keys.map((key) => ({ id: crypto.randomUUID(), key, isEnabled: true })) + void updateApiKeys(apiKeys) + }, + [updateApiKeys] + ) - return + if (!v1Provider) return null + + return ( + + ) } export const WebSearchApiKeyList: FC = ({ providerId, showHealthCheck = true }) => { diff --git a/src/renderer/src/data/__tests__/accessors.v2.test.ts b/src/renderer/src/data/__tests__/accessors.v2.test.ts new file mode 100644 index 00000000000..ae56b7d18dd --- /dev/null +++ b/src/renderer/src/data/__tests__/accessors.v2.test.ts @@ -0,0 +1,129 @@ +import { DataApiError, ErrorCode } from '@shared/data/api/apiErrors' +import { mockDataApiService } from '@test-mocks/renderer/DataApiService' +import { beforeEach, describe, expect, it, vi } from 'vitest' + +import { + getEnabledProvidersAsync, + getModelAsync, + getModelByUniqueIdAsync, + getProviderByIdAsync, + getProvidersAsync +} from '../accessors.v2' + +const mockProviders = [ + { id: 'openai', name: 'OpenAI', isEnabled: true }, + { id: 'anthropic', name: 'Anthropic', isEnabled: true } +] + +const mockModel = { + id: 'openai::gpt-4o', + providerId: 'openai', + name: 'GPT-4o', + capabilities: [], + supportsStreaming: true, + isEnabled: true, + isHidden: false +} + +describe('accessors.v2', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + describe('getProvidersAsync', () => { + it('should call dataApiService.get with /providers', async () => { + mockDataApiService.get.mockResolvedValueOnce(mockProviders) + + const result = await getProvidersAsync() + + expect(mockDataApiService.get).toHaveBeenCalledWith('/providers') + expect(result).toEqual(mockProviders) + }) + }) + + describe('getEnabledProvidersAsync', () => { + it('should call dataApiService.get with enabled query', async () => { + mockDataApiService.get.mockResolvedValueOnce(mockProviders) + + const result = await getEnabledProvidersAsync() + + expect(mockDataApiService.get).toHaveBeenCalledWith('/providers', { query: { enabled: true } }) + expect(result).toEqual(mockProviders) + }) + }) + + describe('getProviderByIdAsync', () => { + it('should return the provider when found', async () => { + mockDataApiService.get.mockResolvedValueOnce(mockProviders[0]) + + const result = await getProviderByIdAsync('openai') + + expect(mockDataApiService.get).toHaveBeenCalledWith('/providers/openai') + expect(result).toEqual(mockProviders[0]) + }) + + it('should return undefined on NOT_FOUND error', async () => { + mockDataApiService.get.mockRejectedValueOnce(new DataApiError(ErrorCode.NOT_FOUND, 'Provider not found', 404)) + + const result = await getProviderByIdAsync('nonexistent') + expect(result).toBeUndefined() + }) + + it('should rethrow non-404 errors', async () => { + const serverError = new DataApiError(ErrorCode.INTERNAL_SERVER_ERROR, 'Server error', 500) + mockDataApiService.get.mockRejectedValueOnce(serverError) + + await expect(getProviderByIdAsync('openai')).rejects.toThrow(serverError) + }) + + it('should rethrow non-DataApiError errors', async () => { + const networkError = new Error('Network error') + mockDataApiService.get.mockRejectedValueOnce(networkError) + + await expect(getProviderByIdAsync('openai')).rejects.toThrow(networkError) + }) + }) + + describe('getModelAsync', () => { + it('should return the model when found', async () => { + mockDataApiService.get.mockResolvedValueOnce(mockModel) + + const result = await getModelAsync('openai', 'gpt-4o') + + expect(mockDataApiService.get).toHaveBeenCalledWith('/models/openai/gpt-4o') + expect(result).toEqual(mockModel) + }) + + it('should return undefined on NOT_FOUND error', async () => { + mockDataApiService.get.mockRejectedValueOnce(new DataApiError(ErrorCode.NOT_FOUND, 'Model not found', 404)) + + const result = await getModelAsync('openai', 'nonexistent') + expect(result).toBeUndefined() + }) + + it('should rethrow non-404 errors', async () => { + const serverError = new DataApiError(ErrorCode.INTERNAL_SERVER_ERROR, 'Server error', 500) + mockDataApiService.get.mockRejectedValueOnce(serverError) + + await expect(getModelAsync('openai', 'gpt-4o')).rejects.toThrow(serverError) + }) + }) + + describe('getModelByUniqueIdAsync', () => { + it('should parse UniqueModelId and call getModelAsync', async () => { + mockDataApiService.get.mockResolvedValueOnce(mockModel) + + const result = await getModelByUniqueIdAsync('openai::gpt-4o' as any) + + expect(mockDataApiService.get).toHaveBeenCalledWith('/models/openai/gpt-4o') + expect(result).toEqual(mockModel) + }) + + it('should return undefined when model not found', async () => { + mockDataApiService.get.mockRejectedValueOnce(new DataApiError(ErrorCode.NOT_FOUND, 'Model not found', 404)) + + const result = await getModelByUniqueIdAsync('openai::nonexistent' as any) + expect(result).toBeUndefined() + }) + }) +}) diff --git a/src/renderer/src/data/accessors.v2.ts b/src/renderer/src/data/accessors.v2.ts new file mode 100644 index 00000000000..777358d5e94 --- /dev/null +++ b/src/renderer/src/data/accessors.v2.ts @@ -0,0 +1,40 @@ +import { dataApiService } from '@data/DataApiService' +import { DataApiError, ErrorCode } from '@shared/data/api/apiErrors' +import type { Model } from '@shared/data/types/model' +import { parseUniqueModelId, type UniqueModelId } from '@shared/data/types/model' +import type { Provider } from '@shared/data/types/provider' + +function isNotFoundError(e: unknown): boolean { + return e instanceof DataApiError && e.code === ErrorCode.NOT_FOUND +} + +export async function getProvidersAsync(): Promise { + return dataApiService.get('/providers' as const) +} + +export async function getEnabledProvidersAsync(): Promise { + return dataApiService.get('/providers' as const, { query: { enabled: true } }) +} + +export async function getProviderByIdAsync(id: string): Promise { + try { + return await dataApiService.get(`/providers/${id}` as const) + } catch (e) { + if (isNotFoundError(e)) return undefined + throw e + } +} + +export async function getModelAsync(providerId: string, modelId: string): Promise { + try { + return await dataApiService.get(`/models/${providerId}/${modelId}` as any) + } catch (e) { + if (isNotFoundError(e)) return undefined + throw e + } +} + +export async function getModelByUniqueIdAsync(uniqueModelId: UniqueModelId): Promise { + const { providerId, modelId } = parseUniqueModelId(uniqueModelId) + return getModelAsync(providerId, modelId) +} diff --git a/src/renderer/src/data/hooks/__tests__/useModels.test.ts b/src/renderer/src/data/hooks/__tests__/useModels.test.ts new file mode 100644 index 00000000000..82ee67b0bb0 --- /dev/null +++ b/src/renderer/src/data/hooks/__tests__/useModels.test.ts @@ -0,0 +1,184 @@ +import { mockDataApiService } from '@test-mocks/renderer/DataApiService' +import { mockUseInvalidateCache, mockUseMutation, mockUseQuery } from '@test-mocks/renderer/useDataApi' +import { act, renderHook } from '@testing-library/react' +import { beforeEach, describe, expect, it, vi } from 'vitest' + +import { useModelMutations, useModels } from '../useModels' + +// ─── Mock data ──────────────────────────────────────────────────────── +const mockModel1: any = { + id: 'openai::gpt-4o', + providerId: 'openai', + modelId: 'gpt-4o', + name: 'GPT-4o', + isEnabled: true +} + +const mockModel2: any = { + id: 'anthropic::claude-3-opus', + providerId: 'anthropic', + modelId: 'claude-3-opus', + name: 'Claude 3 Opus', + isEnabled: true +} + +const mockModelList = [mockModel1, mockModel2] + +describe('useModels', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it('should return models array from useQuery', () => { + mockUseQuery.mockImplementation(() => ({ + data: mockModelList, + isLoading: false, + isRefreshing: false, + error: undefined, + refetch: vi.fn(), + mutate: vi.fn() + })) + + const { result } = renderHook(() => useModels()) + + expect(result.current.models).toEqual(mockModelList) + expect(result.current.isLoading).toBe(false) + }) + + it('should return empty array when data is undefined', () => { + mockUseQuery.mockImplementation(() => ({ + data: undefined, + isLoading: true, + isRefreshing: false, + error: undefined, + refetch: vi.fn(), + mutate: vi.fn() + })) + + const { result } = renderHook(() => useModels()) + + expect(result.current.models).toEqual([]) + expect(result.current.isLoading).toBe(true) + }) + + it('should call useQuery with /models path and no query when no args', () => { + renderHook(() => useModels()) + + expect(mockUseQuery).toHaveBeenCalledWith('/models', {}) + }) + + it('should pass providerId as query parameter', () => { + renderHook(() => useModels({ providerId: 'openai' })) + + expect(mockUseQuery).toHaveBeenCalledWith('/models', { query: { providerId: 'openai' } }) + }) + + it('should pass enabled option separately from query params', () => { + renderHook(() => useModels({ enabled: false })) + + expect(mockUseQuery).toHaveBeenCalledWith('/models', { enabled: false }) + }) + + it('should pass both providerId and enabled', () => { + renderHook(() => useModels({ providerId: 'openai', enabled: true })) + + expect(mockUseQuery).toHaveBeenCalledWith('/models', { + query: { providerId: 'openai' }, + enabled: true + }) + }) + + it('should expose refetch from mutate', () => { + const mockMutate = vi.fn() + mockUseQuery.mockImplementation(() => ({ + data: mockModelList, + isLoading: false, + isRefreshing: false, + error: undefined, + refetch: vi.fn(), + mutate: mockMutate + })) + + const { result } = renderHook(() => useModels()) + + expect(result.current.refetch).toBe(mockMutate) + }) +}) + +describe('useModelMutations', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it('should set up POST mutation for /models', () => { + renderHook(() => useModelMutations()) + + expect(mockUseMutation).toHaveBeenCalledWith('POST', '/models', { + refresh: ['/models'] + }) + }) + + it('should call createTrigger when createModel is invoked', async () => { + const mockTrigger = vi.fn().mockResolvedValue({ id: 'new-model' }) + mockUseMutation.mockImplementation(() => ({ + trigger: mockTrigger, + isLoading: false, + error: undefined + })) + + const { result } = renderHook(() => useModelMutations()) + + const dto = { providerId: 'openai', modelId: 'gpt-5' } + await act(async () => { + await result.current.createModel(dto) + }) + + expect(mockTrigger).toHaveBeenCalledWith({ body: dto }) + }) + + it('should delete model via dataApiService and invalidate cache', async () => { + const mockInvalidate = vi.fn().mockResolvedValue(undefined) + mockUseInvalidateCache.mockImplementation(() => mockInvalidate) + mockDataApiService.delete.mockResolvedValue({ deleted: true }) + + const { result } = renderHook(() => useModelMutations()) + + await act(async () => { + await result.current.deleteModel('openai', 'gpt-4o') + }) + + expect(mockDataApiService.delete).toHaveBeenCalledWith('/models/openai/gpt-4o') + expect(mockInvalidate).toHaveBeenCalledWith('/models') + }) + + it('should patch model via dataApiService and invalidate cache', async () => { + const mockInvalidate = vi.fn().mockResolvedValue(undefined) + mockUseInvalidateCache.mockImplementation(() => mockInvalidate) + mockDataApiService.patch.mockResolvedValue({}) + + const { result } = renderHook(() => useModelMutations()) + + await act(async () => { + await result.current.patchModel('openai', 'gpt-4o', { isEnabled: false }) + }) + + expect(mockDataApiService.patch).toHaveBeenCalledWith('/models/openai/gpt-4o', { + body: { isEnabled: false } + }) + expect(mockInvalidate).toHaveBeenCalledWith('/models') + }) + + it('should encode model ID in path correctly', async () => { + const mockInvalidate = vi.fn().mockResolvedValue(undefined) + mockUseInvalidateCache.mockImplementation(() => mockInvalidate) + mockDataApiService.delete.mockResolvedValue({ deleted: true }) + + const { result } = renderHook(() => useModelMutations()) + + await act(async () => { + await result.current.deleteModel('anthropic', 'claude-3-opus') + }) + + expect(mockDataApiService.delete).toHaveBeenCalledWith('/models/anthropic/claude-3-opus') + }) +}) diff --git a/src/renderer/src/data/hooks/__tests__/useProviders.test.ts b/src/renderer/src/data/hooks/__tests__/useProviders.test.ts new file mode 100644 index 00000000000..1cd348180f5 --- /dev/null +++ b/src/renderer/src/data/hooks/__tests__/useProviders.test.ts @@ -0,0 +1,388 @@ +import { mockDataApiService } from '@test-mocks/renderer/DataApiService' +import { mockUseInvalidateCache, mockUseMutation, mockUseQuery } from '@test-mocks/renderer/useDataApi' +import { act, renderHook } from '@testing-library/react' +import { beforeEach, describe, expect, it, vi } from 'vitest' + +import { + useProvider, + useProviderApiKeys, + useProviderAuthConfig, + useProviderMutations, + useProviderRegistryModels, + useProviders +} from '../useProviders' + +// ─── Mock data ──────────────────────────────────────────────────────── +const mockProvider1: any = { + id: 'openai', + name: 'OpenAI', + isEnabled: true, + sortOrder: 0 +} + +const mockProvider2: any = { + id: 'anthropic', + name: 'Anthropic', + isEnabled: true, + sortOrder: 1 +} + +const mockProviderList = [mockProvider1, mockProvider2] + +describe('useProviders', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it('should return providers array from useQuery', () => { + mockUseQuery.mockImplementation(() => ({ + data: mockProviderList, + isLoading: false, + isRefreshing: false, + error: undefined, + refetch: vi.fn(), + mutate: vi.fn() + })) + + const { result } = renderHook(() => useProviders()) + + expect(result.current.providers).toEqual(mockProviderList) + expect(result.current.isLoading).toBe(false) + }) + + it('should return empty array when data is undefined', () => { + mockUseQuery.mockImplementation(() => ({ + data: undefined, + isLoading: true, + isRefreshing: false, + error: undefined, + refetch: vi.fn(), + mutate: vi.fn() + })) + + const { result } = renderHook(() => useProviders()) + + expect(result.current.providers).toEqual([]) + expect(result.current.isLoading).toBe(true) + }) + + it('should call useQuery with /providers path', () => { + renderHook(() => useProviders()) + + expect(mockUseQuery).toHaveBeenCalledWith('/providers', undefined) + }) + + it('should pass enabled query option when provided', () => { + renderHook(() => useProviders({ enabled: false })) + + expect(mockUseQuery).toHaveBeenCalledWith('/providers', { query: { enabled: false } }) + }) + + it('should call useMutation for POST /providers', () => { + renderHook(() => useProviders()) + + expect(mockUseMutation).toHaveBeenCalledWith('POST', '/providers', { + refresh: ['/providers'] + }) + }) + + it('should call createTrigger when addProvider is invoked', async () => { + const mockTrigger = vi.fn().mockResolvedValue({ id: 'new-provider' }) + mockUseMutation.mockImplementation(() => ({ + trigger: mockTrigger, + isLoading: false, + error: undefined + })) + + const { result } = renderHook(() => useProviders()) + + const dto = { providerId: 'new-provider', name: 'New Provider' } + await act(async () => { + await result.current.addProvider(dto) + }) + + expect(mockTrigger).toHaveBeenCalledWith({ body: dto }) + }) + + it('should expose refetch from mutate', () => { + const mockMutate = vi.fn() + mockUseQuery.mockImplementation(() => ({ + data: mockProviderList, + isLoading: false, + isRefreshing: false, + error: undefined, + refetch: vi.fn(), + mutate: mockMutate + })) + + const { result } = renderHook(() => useProviders()) + + expect(result.current.refetch).toBe(mockMutate) + }) + + it('should perform optimistic reorder and patch each provider', async () => { + const mockMutate = vi.fn().mockResolvedValue(undefined) + mockUseQuery.mockImplementation(() => ({ + data: mockProviderList, + isLoading: false, + isRefreshing: false, + error: undefined, + refetch: vi.fn(), + mutate: mockMutate + })) + mockDataApiService.patch.mockResolvedValue({}) + + const { result } = renderHook(() => useProviders()) + + const reordered = [mockProvider2, mockProvider1] + await act(async () => { + await result.current.reorderProviders(reordered) + }) + + // Optimistic update + expect(mockMutate).toHaveBeenCalledWith(reordered, false) + + // Patch calls with sortOrder + expect(mockDataApiService.patch).toHaveBeenCalledWith('/providers/anthropic', { body: { sortOrder: 0 } }) + expect(mockDataApiService.patch).toHaveBeenCalledWith('/providers/openai', { body: { sortOrder: 1 } }) + + // Revalidate after success + expect(mockMutate).toHaveBeenCalledWith() + }) + + it('should revalidate on reorder failure', async () => { + const mockMutate = vi.fn().mockResolvedValue(undefined) + mockUseQuery.mockImplementation(() => ({ + data: mockProviderList, + isLoading: false, + isRefreshing: false, + error: undefined, + refetch: vi.fn(), + mutate: mockMutate + })) + mockDataApiService.patch.mockRejectedValue(new Error('Network error')) + + const { result } = renderHook(() => useProviders()) + + await act(async () => { + await result.current.reorderProviders([mockProvider2, mockProvider1]) + }) + + // Should still revalidate on error (rollback) + const revalidateCalls = mockMutate.mock.calls.filter((call: any[]) => call.length === 0) + expect(revalidateCalls.length).toBeGreaterThanOrEqual(1) + }) +}) + +describe('useProvider', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it('should query single provider by ID', () => { + mockUseQuery.mockImplementation((path: string) => ({ + data: path.includes('openai') ? mockProvider1 : undefined, + isLoading: false, + isRefreshing: false, + error: undefined, + refetch: vi.fn(), + mutate: vi.fn() + })) + + const { result } = renderHook(() => useProvider('openai')) + + expect(result.current.provider).toEqual(mockProvider1) + expect(result.current.isLoading).toBe(false) + expect(mockUseQuery).toHaveBeenCalledWith('/providers/openai') + }) + + it('should include mutation functions', () => { + const { result } = renderHook(() => useProvider('openai')) + + expect(result.current.updateProvider).toBeDefined() + expect(result.current.deleteProvider).toBeDefined() + expect(result.current.updateAuthConfig).toBeDefined() + expect(result.current.addApiKey).toBeDefined() + expect(result.current.deleteApiKey).toBeDefined() + }) +}) + +describe('useProviderMutations', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it('should set up PATCH and DELETE mutations with correct paths', () => { + renderHook(() => useProviderMutations('openai')) + + const patchCall = mockUseMutation.mock.calls.find((c: any[]) => c[0] === 'PATCH') + const deleteCall = mockUseMutation.mock.calls.find((c: any[]) => c[0] === 'DELETE') + + expect(patchCall).toBeDefined() + expect(patchCall![1]).toBe('/providers/openai') + expect(patchCall![2]).toEqual({ refresh: ['/providers'] }) + + expect(deleteCall).toBeDefined() + expect(deleteCall![1]).toBe('/providers/openai') + expect(deleteCall![2]).toEqual({ refresh: ['/providers'] }) + }) + + it('should call patchTrigger when updateProvider is invoked', async () => { + const mockTrigger = vi.fn().mockResolvedValue({}) + mockUseMutation.mockImplementation(() => ({ + trigger: mockTrigger, + isLoading: false, + error: undefined + })) + + const { result } = renderHook(() => useProviderMutations('openai')) + + await act(async () => { + await result.current.updateProvider({ isEnabled: false }) + }) + + expect(mockTrigger).toHaveBeenCalledWith({ body: { isEnabled: false } }) + }) + + it('should call deleteTrigger when deleteProvider is invoked', async () => { + const mockTrigger = vi.fn().mockResolvedValue(undefined) + mockUseMutation.mockImplementation(() => ({ + trigger: mockTrigger, + isLoading: false, + error: undefined + })) + + const { result } = renderHook(() => useProviderMutations('openai')) + + await act(async () => { + await result.current.deleteProvider() + }) + + expect(mockTrigger).toHaveBeenCalled() + }) + + it('should patch authConfig and invalidate auth-config cache', async () => { + const mockTrigger = vi.fn().mockResolvedValue({}) + const mockInvalidate = vi.fn().mockResolvedValue(undefined) + mockUseMutation.mockImplementation(() => ({ + trigger: mockTrigger, + isLoading: false, + error: undefined + })) + mockUseInvalidateCache.mockImplementation(() => mockInvalidate) + + const { result } = renderHook(() => useProviderMutations('openai')) + + const authConfig = { authType: 'api-key' } as any + await act(async () => { + await result.current.updateAuthConfig(authConfig) + }) + + expect(mockTrigger).toHaveBeenCalledWith({ body: { authConfig } }) + expect(mockInvalidate).toHaveBeenCalledWith('/providers/openai/auth-config') + }) + + it('should post API key and invalidate related caches', async () => { + const mockInvalidate = vi.fn().mockResolvedValue(undefined) + mockUseInvalidateCache.mockImplementation(() => mockInvalidate) + mockDataApiService.post.mockResolvedValue({}) + + const { result } = renderHook(() => useProviderMutations('openai')) + + await act(async () => { + await result.current.addApiKey('sk-test-key', 'My Key') + }) + + expect(mockDataApiService.post).toHaveBeenCalledWith('/providers/openai/api-keys', { + body: { key: 'sk-test-key', label: 'My Key' } + }) + expect(mockInvalidate).toHaveBeenCalledWith(['/providers/openai', '/providers/openai/api-keys', '/providers']) + }) + + it('should delete API key and invalidate related caches', async () => { + const mockInvalidate = vi.fn().mockResolvedValue(undefined) + mockUseInvalidateCache.mockImplementation(() => mockInvalidate) + mockDataApiService.delete.mockResolvedValue({}) + + const { result } = renderHook(() => useProviderMutations('openai')) + + await act(async () => { + await result.current.deleteApiKey('key-123') + }) + + expect(mockDataApiService.delete).toHaveBeenCalledWith('/providers/openai/api-keys/key-123') + expect(mockInvalidate).toHaveBeenCalledWith(['/providers/openai', '/providers/openai/api-keys', '/providers']) + }) +}) + +describe('useProviderAuthConfig', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it('should query auth config for a provider', () => { + const mockAuthConfig = { authType: 'oauth' } as any + mockUseQuery.mockImplementation((path: string) => ({ + data: path.includes('auth-config') ? mockAuthConfig : undefined, + isLoading: false, + isRefreshing: false, + error: undefined, + refetch: vi.fn(), + mutate: vi.fn() + })) + + const { result } = renderHook(() => useProviderAuthConfig('vertexai')) + + expect(result.current.data).toEqual(mockAuthConfig) + expect(result.current.isLoading).toBe(false) + expect(mockUseQuery).toHaveBeenCalledWith('/providers/vertexai/auth-config') + }) +}) + +describe('useProviderApiKeys', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it('should query API keys for a provider', () => { + const mockKeys = { keys: [{ id: 'k1', key: 'sk-xxx', isEnabled: true }] } + mockUseQuery.mockImplementation((path: string) => ({ + data: path.includes('api-keys') ? mockKeys : undefined, + isLoading: false, + isRefreshing: false, + error: undefined, + refetch: vi.fn(), + mutate: vi.fn() + })) + + const { result } = renderHook(() => useProviderApiKeys('openai')) + + expect(result.current.data).toEqual(mockKeys) + expect(result.current.isLoading).toBe(false) + expect(mockUseQuery).toHaveBeenCalledWith('/providers/openai/api-keys') + }) +}) + +describe('useProviderRegistryModels', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it('should query registry models for a provider', () => { + const mockModels = [{ id: 'gpt-4o', name: 'GPT-4o', providerId: 'openai' }] + mockUseQuery.mockImplementation((path: string) => ({ + data: path.includes('registry-models') ? mockModels : undefined, + isLoading: false, + isRefreshing: false, + error: undefined, + refetch: vi.fn(), + mutate: vi.fn() + })) + + const { result } = renderHook(() => useProviderRegistryModels('openai')) + + expect(result.current.data).toEqual(mockModels) + expect(result.current.isLoading).toBe(false) + expect(mockUseQuery).toHaveBeenCalledWith('/providers/openai/registry-models') + }) +}) diff --git a/src/renderer/src/data/hooks/useModels.ts b/src/renderer/src/data/hooks/useModels.ts new file mode 100644 index 00000000000..19bdd65253f --- /dev/null +++ b/src/renderer/src/data/hooks/useModels.ts @@ -0,0 +1,60 @@ +import { dataApiService } from '@data/DataApiService' +import type { ConcreteApiPaths } from '@shared/data/api/apiTypes' +import type { CreateModelDto, UpdateModelDto } from '@shared/data/api/schemas/models' +import type { Model } from '@shared/data/types/model' +import { useCallback, useMemo } from 'react' + +import { useInvalidateCache, useMutation, useQuery } from './useDataApi' + +/** Helper to build `/models/:providerId/:modelId` concrete path (tsgo cannot resolve two-segment template literals) */ +function modelPath(providerId: string, modelId: string): ConcreteApiPaths { + return `/models/${providerId}/${modelId}` as ConcreteApiPaths +} + +const REFRESH_MODELS = ['/models'] as const +const EMPTY_MODELS: Model[] = [] + +// ─── Layer 1: List ──────────────────────────────────────────────────── +export function useModels(query?: { providerId?: string; enabled?: boolean }) { + const { providerId, enabled, ...rest } = query ?? {} + const queryParams = providerId ? { providerId, ...rest } : rest + const hasQuery = Object.keys(queryParams).length > 0 + + const { data, isLoading, mutate } = useQuery('/models', { + ...(hasQuery ? { query: queryParams } : {}), + ...(enabled !== undefined ? { enabled } : {}) + }) as { data: Model[] | undefined; isLoading: boolean; mutate: any } + + const models = useMemo(() => data ?? EMPTY_MODELS, [data]) + + return { models, isLoading, refetch: mutate } +} + +// ─── Layer 2: Mutations ─────────────────────────────────────────────── +export function useModelMutations() { + const invalidate = useInvalidateCache() + + const { trigger: createTrigger } = useMutation('POST', '/models', { + refresh: [...REFRESH_MODELS] + }) + + const createModel = useCallback((dto: CreateModelDto) => createTrigger({ body: dto }), [createTrigger]) + + const deleteModel = useCallback( + async (providerId: string, modelId: string) => { + await dataApiService.delete(modelPath(providerId, modelId)) + await invalidate('/models') + }, + [invalidate] + ) + + const patchModel = useCallback( + async (providerId: string, modelId: string, updates: UpdateModelDto) => { + await dataApiService.patch(modelPath(providerId, modelId), { body: updates }) + await invalidate('/models') + }, + [invalidate] + ) + + return { createModel, deleteModel, patchModel } +} diff --git a/src/renderer/src/data/hooks/useProviders.ts b/src/renderer/src/data/hooks/useProviders.ts new file mode 100644 index 00000000000..2f7027a52b5 --- /dev/null +++ b/src/renderer/src/data/hooks/useProviders.ts @@ -0,0 +1,161 @@ +import { dataApiService } from '@data/DataApiService' +import type { CreateProviderDto, UpdateProviderDto } from '@shared/data/api/schemas/providers' +import type { Model } from '@shared/data/types/model' +import type { ApiKeyEntry, AuthConfig, Provider } from '@shared/data/types/provider' +import { useCallback, useMemo } from 'react' + +import { useInvalidateCache, useMutation, useQuery } from './useDataApi' + +const REFRESH_PROVIDERS = ['/providers'] as const +const EMPTY_PROVIDERS: Provider[] = [] + +// ─── Layer 1: List + Create + Reorder ───────────────────────────────── +export function useProviders(query?: { enabled?: boolean }) { + const { data, isLoading, mutate } = useQuery('/providers', query ? { query } : undefined) + + const { trigger: createTrigger } = useMutation('POST', '/providers', { + refresh: [...REFRESH_PROVIDERS] + }) + + const addProvider = useCallback((dto: CreateProviderDto) => createTrigger({ body: dto }), [createTrigger]) + + const reorderProviders = useCallback( + async (reorderedList: Provider[]) => { + void mutate(reorderedList as any, false) // optimistic + try { + await Promise.all( + reorderedList.map((p, i) => dataApiService.patch(`/providers/${p.id}` as const, { body: { sortOrder: i } })) + ) + void mutate() + } catch { + void mutate() + } + }, + [mutate] + ) + + const providers = useMemo(() => data ?? EMPTY_PROVIDERS, [data]) + + return { + providers, + isLoading, + addProvider, + reorderProviders, + refetch: mutate + } +} + +// ─── Layer 2: Single read + write + delete ──────────────────────────── +export function useProvider(providerId: string) { + const { data, isLoading } = useQuery(`/providers/${providerId}` as const) as { + data: Provider | undefined + isLoading: boolean + [k: string]: any + } + + const mutations = useProviderMutations(providerId) + + return { provider: data, isLoading, ...mutations } +} + +// ─── Layer 3: Pure mutations ────────────────────────────────────────── +export function useProviderMutations(providerId: string) { + const path = `/providers/${providerId}` as const + const invalidate = useInvalidateCache() + + const { trigger: patchTrigger } = useMutation('PATCH', path, { + refresh: [...REFRESH_PROVIDERS] + }) + + const { trigger: deleteTrigger } = useMutation('DELETE', path, { + refresh: [...REFRESH_PROVIDERS] + }) + + const updateProvider = useCallback((updates: UpdateProviderDto) => patchTrigger({ body: updates }), [patchTrigger]) + + const deleteProvider = useCallback(() => deleteTrigger(), [deleteTrigger]) + + const updateAuthConfig = useCallback( + async (authConfig: AuthConfig) => { + await patchTrigger({ body: { authConfig } }) + await invalidate(`/providers/${providerId}/auth-config`) + }, + [patchTrigger, invalidate, providerId] + ) + + const addApiKey = useCallback( + async (key: string, label?: string) => { + await dataApiService.post(`/providers/${providerId}/api-keys` as const, { + body: { key, label } + }) + await invalidate([`/providers/${providerId}`, `/providers/${providerId}/api-keys`, '/providers']) + }, + [providerId, invalidate] + ) + + const deleteApiKey = useCallback( + async (keyId: string) => { + await dataApiService.delete(`/providers/${providerId}/api-keys/${keyId}` as const) + await invalidate([`/providers/${providerId}`, `/providers/${providerId}/api-keys`, '/providers']) + }, + [providerId, invalidate] + ) + + const updateApiKeys = useCallback( + async (apiKeys: ApiKeyEntry[]) => { + await patchTrigger({ body: { apiKeys } }) + await invalidate(`/providers/${providerId}/api-keys`) + }, + [patchTrigger, invalidate, providerId] + ) + + return { updateProvider, deleteProvider, updateAuthConfig, updateApiKeys, addApiKey, deleteApiKey } +} + +// ─── Typed query helpers ───────────────────────────────────────────── +export function useProviderAuthConfig(providerId: string) { + return useQuery(`/providers/${providerId}/auth-config` as const) as { + data: AuthConfig | null | undefined + isLoading: boolean + [k: string]: any + } +} + +export function useProviderApiKeys(providerId: string) { + return useQuery(`/providers/${providerId}/api-keys` as const) as { + data: { keys: ApiKeyEntry[] } | undefined + isLoading: boolean + [k: string]: any + } +} + +export function useProviderRegistryModels(providerId: string) { + return useQuery(`/providers/${providerId}/registry-models` as const) as { + data: Model[] | undefined + isLoading: boolean + [k: string]: any + } +} + +// ─── Dynamic ID operations (for context menus, URL schema handlers) ── +export function useProviderActions() { + const invalidate = useInvalidateCache() + + const patchProviderById = useCallback( + async (providerId: string, updates: UpdateProviderDto) => { + await dataApiService.patch(`/providers/${providerId}` as const, { body: updates }) + await invalidate('/providers') + }, + [invalidate] + ) + + const deleteProviderById = useCallback( + async (providerId: string) => { + await dataApiService.delete(`/providers/${providerId}` as const) + await invalidate('/providers') + }, + [invalidate] + ) + + return { patchProviderById, deleteProviderById } +} diff --git a/src/renderer/src/pages/settings/ProviderSettings/ApiOptionsSettings/ApiOptionsSettings.tsx b/src/renderer/src/pages/settings/ProviderSettings/ApiOptionsSettings/ApiOptionsSettings.tsx index 9fb02ec65c4..008388c9a97 100644 --- a/src/renderer/src/pages/settings/ProviderSettings/ApiOptionsSettings/ApiOptionsSettings.tsx +++ b/src/renderer/src/pages/settings/ProviderSettings/ApiOptionsSettings/ApiOptionsSettings.tsx @@ -1,8 +1,7 @@ import { ColFlex, RowFlex, Switch } from '@cherrystudio/ui' import { InfoTooltip } from '@cherrystudio/ui' -import { useProvider } from '@renderer/hooks/useProvider' -import { type AnthropicCacheControlSettings, type Provider } from '@renderer/types' -import { isSupportAnthropicPromptCacheProvider } from '@renderer/utils/provider' +import { useProvider } from '@renderer/data/hooks/useProviders' +import { isAnthropicProvider, isAzureOpenAIProvider, isOpenAICompatibleProvider } from '@renderer/utils/provider.v2' import { Divider, InputNumber } from 'antd' import { startTransition, useCallback, useMemo } from 'react' import { useTranslation } from 'react-i18next' @@ -23,10 +22,10 @@ const ApiOptionsSettings = ({ providerId }: Props) => { const { t } = useTranslation() const { provider, updateProvider } = useProvider(providerId) - const updateProviderTransition = useCallback( - (updates: Partial) => { + const patchProvider = useCallback( + (updates: Record) => { startTransition(() => { - updateProvider(updates) + void updateProvider(updates) }) }, [updateProvider] @@ -39,58 +38,48 @@ const ApiOptionsSettings = ({ providerId }: Props) => { label: t('settings.provider.api.options.developer_role.label'), tip: t('settings.provider.api.options.developer_role.help'), onChange: (checked: boolean) => { - updateProviderTransition({ - apiOptions: { ...provider.apiOptions, isSupportDeveloperRole: checked } - }) + patchProvider({ apiFeatures: { ...provider?.apiFeatures, developerRole: checked } }) }, - checked: !!provider.apiOptions?.isSupportDeveloperRole + checked: provider?.apiFeatures.developerRole ?? false }, { key: 'openai_stream_options', label: t('settings.provider.api.options.stream_options.label'), tip: t('settings.provider.api.options.stream_options.help'), onChange: (checked: boolean) => { - updateProviderTransition({ - apiOptions: { ...provider.apiOptions, isNotSupportStreamOptions: !checked } - }) + patchProvider({ apiFeatures: { ...provider?.apiFeatures, streamOptions: checked } }) }, - checked: !provider.apiOptions?.isNotSupportStreamOptions + checked: provider?.apiFeatures.streamOptions ?? true }, { key: 'openai_service_tier', label: t('settings.provider.api.options.service_tier.label'), tip: t('settings.provider.api.options.service_tier.help'), onChange: (checked: boolean) => { - updateProviderTransition({ - apiOptions: { ...provider.apiOptions, isSupportServiceTier: checked } - }) + patchProvider({ apiFeatures: { ...provider?.apiFeatures, serviceTier: checked } }) }, - checked: !!provider.apiOptions?.isSupportServiceTier + checked: provider?.apiFeatures.serviceTier ?? false }, { key: 'openai_enable_thinking', label: t('settings.provider.api.options.enable_thinking.label'), tip: t('settings.provider.api.options.enable_thinking.help'), onChange: (checked: boolean) => { - updateProviderTransition({ - apiOptions: { ...provider.apiOptions, isNotSupportEnableThinking: !checked } - }) + patchProvider({ apiFeatures: { ...provider?.apiFeatures, enableThinking: checked } }) }, - checked: !provider.apiOptions?.isNotSupportEnableThinking + checked: provider?.apiFeatures.enableThinking ?? true }, { key: 'openai_verbosity', label: t('settings.provider.api.options.verbosity.label'), tip: t('settings.provider.api.options.verbosity.help'), onChange: (checked: boolean) => { - updateProviderTransition({ - apiOptions: { ...provider.apiOptions, isNotSupportVerbosity: !checked } - }) + patchProvider({ apiFeatures: { ...provider?.apiFeatures, verbosity: checked } }) }, - checked: !provider.apiOptions?.isNotSupportVerbosity + checked: provider?.apiFeatures.verbosity ?? false } ], - [t, provider, updateProviderTransition] + [t, provider, patchProvider] ) const options = useMemo(() => { @@ -100,40 +89,42 @@ const ApiOptionsSettings = ({ providerId }: Props) => { label: t('settings.provider.api.options.array_content.label'), tip: t('settings.provider.api.options.array_content.help'), onChange: (checked: boolean) => { - updateProviderTransition({ - apiOptions: { ...provider.apiOptions, isNotSupportArrayContent: !checked } - }) + patchProvider({ apiFeatures: { ...provider?.apiFeatures, arrayContent: checked } }) }, - checked: !provider.apiOptions?.isNotSupportArrayContent + checked: provider?.apiFeatures.arrayContent ?? true } ] - if (provider.type === 'openai' || provider.type === 'openai-response' || provider.type === 'azure-openai') { + if (provider && (isOpenAICompatibleProvider(provider) || isAzureOpenAIProvider(provider))) { items.push(...openAIOptions) } return items - }, [openAIOptions, provider.apiOptions, provider.type, t, updateProviderTransition]) + }, [openAIOptions, provider, t, patchProvider]) - const isSupportAnthropicPromptCache = isSupportAnthropicPromptCacheProvider(provider) + const isSupportAnthropicPromptCache = provider ? isAnthropicProvider(provider) : false const cacheSettings = useMemo( () => - provider.anthropicCacheControl ?? { + provider?.settings?.cacheControl ?? { + enabled: false, tokenThreshold: 0, cacheSystemMessage: true, cacheLastNMessages: 0 }, - [provider.anthropicCacheControl] + [provider?.settings?.cacheControl] ) const updateCacheSettings = useCallback( - (updates: Partial) => { - updateProviderTransition({ - anthropicCacheControl: { ...cacheSettings, ...updates } + (updates: Partial) => { + patchProvider({ + providerSettings: { + ...provider?.settings, + cacheControl: { ...cacheSettings, enabled: true, ...updates } + } }) }, - [cacheSettings, updateProviderTransition] + [cacheSettings, provider?.settings, patchProvider] ) return ( @@ -166,7 +157,7 @@ const ApiOptionsSettings = ({ providerId }: Props) => { style={{ width: 100 }} /> - {cacheSettings.tokenThreshold > 0 && ( + {(cacheSettings.tokenThreshold ?? 0) > 0 && ( <> diff --git a/src/renderer/src/pages/settings/ProviderSettings/AwsBedrockSettings.tsx b/src/renderer/src/pages/settings/ProviderSettings/AwsBedrockSettings.tsx index 218979e6dcb..c4216696681 100644 --- a/src/renderer/src/pages/settings/ProviderSettings/AwsBedrockSettings.tsx +++ b/src/renderer/src/pages/settings/ProviderSettings/AwsBedrockSettings.tsx @@ -1,35 +1,62 @@ import { RowFlex } from '@cherrystudio/ui' import { PROVIDER_URLS } from '@renderer/config/providers' -import { useAwsBedrockSettings } from '@renderer/hooks/useAwsBedrock' +import { useProvider, useProviderAuthConfig } from '@renderer/data/hooks/useProviders' import { Alert, Input, Radio } from 'antd' import type { FC } from 'react' -import { useState } from 'react' +import { useEffect, useState } from 'react' import { useTranslation } from 'react-i18next' import { SettingHelpLink, SettingHelpText, SettingHelpTextRow, SettingSubtitle } from '..' -const AwsBedrockSettings: FC = () => { +interface Props { + providerId: string +} + +const AwsBedrockSettings: FC = ({ providerId }) => { const { t } = useTranslation() - const { - authType, - accessKeyId, - secretAccessKey, - apiKey, - region, - setAuthType, - setAccessKeyId, - setSecretAccessKey, - setApiKey, - setRegion - } = useAwsBedrockSettings() + const { provider, updateAuthConfig } = useProvider(providerId) + const { data: authConfig } = useProviderAuthConfig(providerId) + + const isIamMode = provider?.authType === 'iam-aws' + const awsConfig = authConfig?.type === 'iam-aws' ? authConfig : null const providerConfig = PROVIDER_URLS['aws-bedrock'] const apiKeyWebsite = providerConfig?.websites?.apiKey - const [localAccessKeyId, setLocalAccessKeyId] = useState(accessKeyId) - const [localSecretAccessKey, setLocalSecretAccessKey] = useState(secretAccessKey) - const [localApiKey, setLocalApiKey] = useState(apiKey) - const [localRegion, setLocalRegion] = useState(region) + const [localAccessKeyId, setLocalAccessKeyId] = useState(awsConfig?.accessKeyId ?? '') + const [localSecretAccessKey, setLocalSecretAccessKey] = useState(awsConfig?.secretAccessKey ?? '') + const [localRegion, setLocalRegion] = useState(awsConfig?.region ?? '') + + useEffect(() => { + if (awsConfig) { + setLocalAccessKeyId(awsConfig.accessKeyId ?? '') + setLocalSecretAccessKey(awsConfig.secretAccessKey ?? '') + setLocalRegion(awsConfig.region ?? '') + } + }, [authConfig]) + + const handleAuthTypeChange = async (value: string) => { + if (value === 'iam') { + await updateAuthConfig({ type: 'iam-aws', region: localRegion || 'us-east-1' }) + } else { + await updateAuthConfig({ type: 'api-key' }) + } + } + + const saveIamConfig = async () => { + await updateAuthConfig({ + type: 'iam-aws' as const, + region: localRegion, + accessKeyId: localAccessKeyId, + secretAccessKey: localSecretAccessKey + }) + } + + const saveRegion = async () => { + if (isIamMode) { + await saveIamConfig() + } + } return ( <> @@ -38,7 +65,10 @@ const AwsBedrockSettings: FC = () => { {/* Authentication Type Selector */} {t('settings.provider.aws-bedrock.auth_type')} - setAuthType(e.target.value)} style={{ marginTop: 5 }}> + handleAuthTypeChange(e.target.value)} + style={{ marginTop: 5 }}> {t('settings.provider.aws-bedrock.auth_type_iam')} {t('settings.provider.aws-bedrock.auth_type_api_key')} @@ -47,7 +77,7 @@ const AwsBedrockSettings: FC = () => { {/* IAM Credentials Fields */} - {authType === 'iam' && ( + {isIamMode && ( <> {t('settings.provider.aws-bedrock.access_key_id')} @@ -56,7 +86,7 @@ const AwsBedrockSettings: FC = () => { value={localAccessKeyId} placeholder={t('settings.provider.aws-bedrock.access_key_id')} onChange={(e) => setLocalAccessKeyId(e.target.value)} - onBlur={() => setAccessKeyId(localAccessKeyId)} + onBlur={saveIamConfig} style={{ marginTop: 5 }} /> @@ -70,7 +100,7 @@ const AwsBedrockSettings: FC = () => { value={localSecretAccessKey} placeholder={t('settings.provider.aws-bedrock.secret_access_key')} onChange={(e) => setLocalSecretAccessKey(e.target.value)} - onBlur={() => setSecretAccessKey(localSecretAccessKey)} + onBlur={saveIamConfig} style={{ marginTop: 5 }} spellCheck={false} /> @@ -87,29 +117,12 @@ const AwsBedrockSettings: FC = () => { )} - {authType === 'apiKey' && ( - <> - {t('settings.provider.aws-bedrock.api_key')} - setLocalApiKey(e.target.value)} - onBlur={() => setApiKey(localApiKey)} - style={{ marginTop: 5 }} - spellCheck={false} - /> - - {t('settings.provider.aws-bedrock.api_key_help')} - - - )} - {t('settings.provider.aws-bedrock.region')} setLocalRegion(e.target.value)} - onBlur={() => setRegion(localRegion)} + onBlur={saveRegion} style={{ marginTop: 5 }} /> diff --git a/src/renderer/src/pages/settings/ProviderSettings/CherryINOAuth.tsx b/src/renderer/src/pages/settings/ProviderSettings/CherryINOAuth.tsx index ceab42aa107..1af3140a135 100644 --- a/src/renderer/src/pages/settings/ProviderSettings/CherryINOAuth.tsx +++ b/src/renderer/src/pages/settings/ProviderSettings/CherryINOAuth.tsx @@ -1,9 +1,9 @@ import { Cherryin } from '@cherrystudio/ui/icons' import { loggerService } from '@logger' -import { useProvider } from '@renderer/hooks/useProvider' +import { useProvider } from '@renderer/data/hooks/useProviders' import { oauthWithCherryIn } from '@renderer/utils/oauth' +import { hasApiKeys } from '@renderer/utils/provider.v2' import { Button, Skeleton } from 'antd' -import { isEmpty } from 'lodash' import { CreditCard, LogIn, LogOut, RefreshCw } from 'lucide-react' import type { FC } from 'react' import { useCallback, useEffect, useState } from 'react' @@ -34,7 +34,7 @@ interface CherryINOAuthProps { } const CherryINOAuth: FC = ({ providerId }) => { - const { updateProvider, provider } = useProvider(providerId) + const { provider, updateProvider, addApiKey, deleteApiKey } = useProvider(providerId) const { t } = useTranslation() const [isLoggingOut, setIsLoggingOut] = useState(false) @@ -42,9 +42,9 @@ const CherryINOAuth: FC = ({ providerId }) => { const [balanceInfo, setBalanceInfo] = useState(null) const [hasOAuthToken, setHasOAuthToken] = useState(null) - const hasApiKey = !isEmpty(provider.apiKey) + const hasKeys = provider ? hasApiKeys(provider) : false // User is considered logged in via OAuth only if they have both API key and OAuth token - const isOAuthLoggedIn = hasApiKey && hasOAuthToken === true + const isOAuthLoggedIn = hasKeys && hasOAuthToken === true const fetchData = useCallback(async () => { setIsLoadingData(true) @@ -83,8 +83,10 @@ const CherryINOAuth: FC = ({ providerId }) => { const handleOAuthLogin = useCallback(async () => { try { await oauthWithCherryIn( - (apiKeys: string) => { - updateProvider({ apiKey: apiKeys, enabled: true }) + async (apiKeys: string) => { + // POST each key to /api-keys endpoint + await addApiKey(apiKeys, 'OAuth') + await updateProvider({ isEnabled: true }) setHasOAuthToken(true) window.toast.success(t('auth.get_key_success')) }, @@ -96,7 +98,7 @@ const CherryINOAuth: FC = ({ providerId }) => { logger.error('OAuth Error:', error as Error) window.toast.error(t('settings.provider.oauth.error')) } - }, [updateProvider, t]) + }, [addApiKey, updateProvider, t]) const handleLogout = useCallback(() => { window.modal.confirm({ @@ -108,14 +110,17 @@ const CherryINOAuth: FC = ({ providerId }) => { try { await window.api.cherryin.logout(CHERRYIN_OAUTH_SERVER) - updateProvider({ apiKey: '' }) + // Delete OAuth key by label + const oauthKey = provider?.apiKeys.find((k) => k.label === 'OAuth') + if (oauthKey) { + await deleteApiKey(oauthKey.id) + } setHasOAuthToken(false) setBalanceInfo(null) window.toast.success(t('settings.provider.oauth.logout_success')) } catch (error) { logger.error('Logout error:', error as Error) // Still clear local state even if server revocation failed - updateProvider({ apiKey: '' }) setHasOAuthToken(false) setBalanceInfo(null) window.toast.warning(t('settings.provider.oauth.logout_warning')) @@ -124,7 +129,7 @@ const CherryINOAuth: FC = ({ providerId }) => { } } }) - }, [updateProvider, t]) + }, [provider?.apiKeys, deleteApiKey, t]) const handleTopup = useCallback(() => { window.open(CHERRYIN_TOPUP_URL, '_blank') @@ -135,7 +140,7 @@ const CherryINOAuth: FC = ({ providerId }) => { // 2. Has API key + OAuth token → Show logged-in UI // 3. Has API key + No OAuth token (legacy manual key) → Show connect button to upgrade to OAuth const renderContent = () => { - if (!hasApiKey) { + if (!hasKeys) { // Case 1: No API key - show login button return ( - ) - }, [list, t, loadingModels, provider, onRemoveAll, onAddAll, loadModels]) + }, [list, t, loadingModels, provider, existingModelIds, onRemoveAll, onAddAll, loadModels]) return ( = ({ providerId, resolve }) => { /> ) : ( )} diff --git a/src/renderer/src/pages/settings/ProviderSettings/ModelList/ModelList.tsx b/src/renderer/src/pages/settings/ProviderSettings/ModelList/ModelList.tsx index 7365986934b..99386a48ae1 100644 --- a/src/renderer/src/pages/settings/ProviderSettings/ModelList/ModelList.tsx +++ b/src/renderer/src/pages/settings/ProviderSettings/ModelList/ModelList.tsx @@ -1,9 +1,10 @@ import { Button, ColFlex, Flex, RowFlex, Tooltip } from '@cherrystudio/ui' +import { useModelMutations, useModels } from '@data/hooks/useModels' +import { useProvider, useProviderApiKeys } from '@data/hooks/useProviders' import CollapsibleSearchBar from '@renderer/components/CollapsibleSearchBar' import { LoadingIcon, StreamlineGoodHealthAndWellBeing } from '@renderer/components/Icons' import CustomTag from '@renderer/components/Tags/CustomTag' import { PROVIDER_URLS } from '@renderer/config/providers' -import { useProvider } from '@renderer/hooks/useProvider' import { getProviderLabel } from '@renderer/i18n/label' import { SettingHelpLink, SettingHelpText, SettingHelpTextRow, SettingSubtitle } from '@renderer/pages/settings' import EditModelPopup from '@renderer/pages/settings/ProviderSettings/EditModelPopup/EditModelPopup' @@ -11,13 +12,16 @@ import AddModelPopup from '@renderer/pages/settings/ProviderSettings/ModelList/A import DownloadOVMSModelPopup from '@renderer/pages/settings/ProviderSettings/ModelList/DownloadOVMSModelPopup' import ManageModelsPopup from '@renderer/pages/settings/ProviderSettings/ModelList/ManageModelsPopup' import NewApiAddModelPopup from '@renderer/pages/settings/ProviderSettings/ModelList/NewApiAddModelPopup' -import type { Model } from '@renderer/types' +import type { Model as V1Model, Provider as V1Provider } from '@renderer/types' import { filterModelsByKeywords } from '@renderer/utils' import { getDuplicateModelNames } from '@renderer/utils/model' -import { isNewApiProvider } from '@renderer/utils/provider' -import { Space, Spin } from 'antd' +import { isNewApiProvider } from '@renderer/utils/provider.v2' +import { toV1ProviderShim } from '@renderer/utils/v1ProviderShim' +import type { Model } from '@shared/data/types/model' +import { parseUniqueModelId } from '@shared/data/types/model' +import { Spin } from 'antd' import { groupBy, isEmpty, sortBy, toPairs } from 'lodash' -import { Plus, RefreshCw } from 'lucide-react' +import { ListCheck, Plus } from 'lucide-react' import React, { memo, startTransition, useCallback, useEffect, useMemo, useState } from 'react' import { useTranslation } from 'react-i18next' @@ -35,7 +39,7 @@ const MODEL_COUNT_THRESHOLD = 10 * 根据搜索文本筛选模型、分组并排序 */ const calculateModelGroups = (models: Model[], searchText: string): ModelGroups => { - const filteredModels = searchText ? filterModelsByKeywords(searchText, models) : models + const filteredModels = searchText ? filterModelsByKeywords(searchText, models as any) : models const grouped = groupBy(filteredModels, 'group') return sortBy(toPairs(grouped), [0]).reduce((acc, [key, value]) => { acc[key] = value @@ -48,14 +52,30 @@ const calculateModelGroups = (models: Model[], searchText: string): ModelGroups */ const ModelList: React.FC = ({ providerId }) => { const { t } = useTranslation() - const { provider, models, removeModel } = useProvider(providerId) + const { provider } = useProvider(providerId) + const { models } = useModels({ providerId }) + const { data: apiKeysData } = useProviderApiKeys(providerId) + const joinedApiKey = apiKeysData?.keys?.map((k) => k.key).join(',') ?? '' + const { deleteModel } = useModelMutations() + const duplicateModelNames = useMemo(() => getDuplicateModelNames(models as any), [models]) + + const removeModel = useCallback( + async (model: Model) => { + const { modelId } = parseUniqueModelId(model.id) + await deleteModel(model.providerId, modelId) + }, + [deleteModel] + ) // 稳定的编辑模型回调,避免内联函数导致子组件 memo 失效 - const handleEditModel = useCallback((model: Model) => EditModelPopup.show({ provider, model }), [provider]) + const handleEditModel = useCallback( + (model: Model) => provider && EditModelPopup.show({ provider: provider as any, model: model as any }), + [provider] + ) - const providerConfig = PROVIDER_URLS[provider.id] - const docsWebsite = providerConfig?.websites?.docs - const modelsWebsite = providerConfig?.websites?.models + const providerConfig = provider ? PROVIDER_URLS[provider.id as keyof typeof PROVIDER_URLS] : undefined + const docsWebsite = provider?.websites?.docs ?? providerConfig?.websites?.docs + const modelsWebsite = provider?.websites?.models ?? providerConfig?.websites?.models const [searchText, _setSearchText] = useState('') const [displayedModelGroups, setDisplayedModelGroups] = useState(() => { @@ -65,8 +85,25 @@ const ModelList: React.FC = ({ providerId }) => { return calculateModelGroups(models, '') }) - const { isChecking: isHealthChecking, modelStatuses, runHealthCheck } = useHealthCheck(provider, models) - const duplicateModelNames = useMemo(() => getDuplicateModelNames(models), [models]) + const v1ProviderForHealth = useMemo((): V1Provider => { + if (provider) { + return toV1ProviderShim(provider, { models, apiKey: joinedApiKey }) + } + return { + id: '', + name: '', + type: 'openai', + apiKey: '', + apiHost: '', + models: [] + } as V1Provider + }, [provider, models, joinedApiKey]) + // TODO(v2-cleanup): Remove v1 shim after useHealthCheck migrates to v2 + const { + isChecking: isHealthChecking, + modelStatuses, + runHealthCheck + } = useHealthCheck(v1ProviderForHealth, models as unknown as V1Model[]) // 将 modelStatuses 数组转换为 Map,实现 O(1) 查找 const modelStatusMap = useMemo(() => { @@ -94,75 +131,51 @@ const ModelList: React.FC = ({ providerId }) => { }, [displayedModelGroups]) const onManageModel = useCallback(() => { - void ManageModelsPopup.show({ providerId: provider.id }) - }, [provider.id]) + if (provider) void ManageModelsPopup.show({ providerId: provider.id }) + }, [provider]) const onAddModel = useCallback(() => { + if (!provider) return if (isNewApiProvider(provider)) { - void NewApiAddModelPopup.show({ title: t('settings.models.add.add_model'), provider }) + void NewApiAddModelPopup.show({ title: t('settings.models.add.add_model'), provider: provider as any }) } else { - void AddModelPopup.show({ title: t('settings.models.add.add_model'), provider }) + void AddModelPopup.show({ title: t('settings.models.add.add_model'), provider: provider as any }) } }, [provider, t]) const onDownloadModel = useCallback( - () => DownloadOVMSModelPopup.show({ title: t('ovms.download.title'), provider }), + () => provider && DownloadOVMSModelPopup.show({ title: t('ovms.download.title'), provider: provider as any }), [provider, t] ) const isLoading = useMemo(() => displayedModelGroups === null, [displayedModelGroups]) - const hasNoModels = useMemo(() => models.length === 0, [models.length]) - - const actionButtons = ( - - - {provider.id !== 'ovms' ? ( - - - - ) : ( - - - - )} - - ) return ( <> - - - - {t('common.models')} - - {modelCount} - - {!hasNoModels && ( - <> - - - - - + + + + {t('common.models')} + {modelCount > 0 && ( + + {modelCount} + )} + + + + + + - {!hasNoModels && actionButtons} - {hasNoModels &&
{actionButtons}
} }> {displayedModelGroups && !isEmpty(displayedModelGroups) && ( @@ -170,12 +183,12 @@ const ModelList: React.FC = ({ providerId }) => { displayedModelGroups[group].forEach((model) => removeModel(model))} /> ))} @@ -188,7 +201,7 @@ const ModelList: React.FC = ({ providerId }) => { {t('settings.provider.docs_check')} {docsWebsite && ( - {getProviderLabel(provider.id) + ' '} + {getProviderLabel(provider?.id ?? '') + ' '} {t('common.docs')} )} @@ -204,6 +217,23 @@ const ModelList: React.FC = ({ providerId }) => {
)} + + + {provider?.id !== 'ovms' ? ( + + ) : ( + + )} + ) } diff --git a/src/renderer/src/pages/settings/ProviderSettings/ModelList/NewApiAddModelPopup.tsx b/src/renderer/src/pages/settings/ProviderSettings/ModelList/NewApiAddModelPopup.tsx index e577c4e8ed1..608e4404525 100644 --- a/src/renderer/src/pages/settings/ProviderSettings/ModelList/NewApiAddModelPopup.tsx +++ b/src/renderer/src/pages/settings/ProviderSettings/ModelList/NewApiAddModelPopup.tsx @@ -1,24 +1,23 @@ import { Flex } from '@cherrystudio/ui' import { Button } from '@cherrystudio/ui' +import { useModelMutations, useModels } from '@data/hooks/useModels' import { TopView } from '@renderer/components/TopView' import { endpointTypeOptions } from '@renderer/config/endpointTypes' -import { isNotSupportTextDeltaModel } from '@renderer/config/models' import { useDynamicLabelWidth } from '@renderer/hooks/useDynamicLabelWidth' -import { useProvider } from '@renderer/hooks/useProvider' -import type { EndpointType, Model, Provider } from '@renderer/types' import { getDefaultGroupName } from '@renderer/utils' -import { isNewApiProvider } from '@renderer/utils/provider' +import { isNewApiProvider } from '@renderer/utils/provider.v2' +import type { Model } from '@shared/data/types/model' +import type { Provider } from '@shared/data/types/provider' import type { FormProps } from 'antd' import { Form, Input, Modal, Select } from 'antd' -import { find } from 'lodash' import { useState } from 'react' import { useTranslation } from 'react-i18next' interface ShowParams { title: string - provider: Provider - model?: Model - endpointType?: EndpointType + provider: Provider | any + model?: Model | any + endpointType?: number | string } interface Props extends ShowParams { @@ -30,13 +29,14 @@ type FieldType = { id: string name?: string group?: string - endpointType?: EndpointType + endpointType?: number | string } const PopupContainer: React.FC = ({ title, provider, resolve, model, endpointType }) => { const [open, setOpen] = useState(true) const [form] = Form.useForm() - const { addModel, models } = useProvider(provider.id) + const { models } = useModels({ providerId: provider.id }) + const { createModel } = useModelMutations() const { t } = useTranslation() const onOk = () => { @@ -51,38 +51,38 @@ const PopupContainer: React.FC = ({ title, provider, resolve, model, endp resolve({}) } - const onAddModel = (values: FieldType) => { - const id = values.id.trim() + const onAddModel = async (values: FieldType) => { + const modelId = values.id.trim() - if (find(models, { id })) { + if (models.some((m) => m.id.endsWith(`::${modelId}`))) { window.toast.error(t('error.model.exists')) return } - const model: Model = { - id, - provider: provider.id, - name: values.name ? values.name : id.toUpperCase(), - group: values.group ?? getDefaultGroupName(id), - endpoint_type: isNewApiProvider(provider) ? values.endpointType : undefined - } - - addModel({ ...model, supported_text_delta: !isNotSupportTextDeltaModel(model) }) + await createModel({ + providerId: provider.id, + modelId, + name: values.name ? values.name : modelId.toUpperCase(), + group: values.group ?? getDefaultGroupName(modelId), + endpointTypes: isNewApiProvider(provider) && values.endpointType ? [values.endpointType as string] : undefined + }) return true } - const onFinish: FormProps['onFinish'] = (values) => { + const onFinish: FormProps['onFinish'] = async (values) => { const id = values.id.trim().replaceAll(',', ',') if (id.includes(',')) { const ids = id.split(',') - ids.forEach((id) => onAddModel({ id, name: id } as FieldType)) + for (const singleId of ids) { + await onAddModel({ id: singleId, name: singleId } as FieldType) + } resolve({}) return } - if (onAddModel(values)) { + if (await onAddModel(values)) { resolve({}) } } diff --git a/src/renderer/src/pages/settings/ProviderSettings/ModelList/NewApiBatchAddModelPopup.tsx b/src/renderer/src/pages/settings/ProviderSettings/ModelList/NewApiBatchAddModelPopup.tsx index 5cb19c03bf3..e3ed918f5e4 100644 --- a/src/renderer/src/pages/settings/ProviderSettings/ModelList/NewApiBatchAddModelPopup.tsx +++ b/src/renderer/src/pages/settings/ProviderSettings/ModelList/NewApiBatchAddModelPopup.tsx @@ -1,11 +1,10 @@ import { Flex } from '@cherrystudio/ui' import { Button } from '@cherrystudio/ui' +import { useModelMutations } from '@data/hooks/useModels' import { TopView } from '@renderer/components/TopView' import { endpointTypeOptions } from '@renderer/config/endpointTypes' -import { isNotSupportTextDeltaModel } from '@renderer/config/models' import { useDynamicLabelWidth } from '@renderer/hooks/useDynamicLabelWidth' -import { useProvider } from '@renderer/hooks/useProvider' -import type { EndpointType, Model, Provider } from '@renderer/types' +import { parseUniqueModelId } from '@shared/data/types/model' import type { FormProps } from 'antd' import { Form, Modal, Select } from 'antd' import { useState } from 'react' @@ -13,8 +12,8 @@ import { useTranslation } from 'react-i18next' interface ShowParams { title: string - provider: Provider - batchModels: Model[] + provider: any + batchModels: any[] } interface Props extends ShowParams { @@ -24,13 +23,13 @@ interface Props extends ShowParams { type FieldType = { provider: string group?: string - endpointType?: EndpointType + endpointType?: number | string } const PopupContainer: React.FC = ({ title, provider, resolve, batchModels }) => { const [open, setOpen] = useState(true) const [form] = Form.useForm() - const { addModel } = useProvider(provider.id) + const { createModel } = useModelMutations() const { t } = useTranslation() const onOk = () => { @@ -45,19 +44,22 @@ const PopupContainer: React.FC = ({ title, provider, resolve, batchModels resolve({}) } - const onAddModel = (values: FieldType) => { - batchModels.forEach((model) => { - addModel({ - ...model, - endpoint_type: values.endpointType, - supported_text_delta: !isNotSupportTextDeltaModel(model) + const onAddModel = async (values: FieldType) => { + for (const model of batchModels) { + const modelId = model.apiModelId ?? parseUniqueModelId(model.id).modelId + await createModel({ + providerId: provider.id, + modelId, + name: model.name, + group: model.group, + endpointTypes: values.endpointType ? [values.endpointType as string] : undefined }) - }) + } return true } - const onFinish: FormProps['onFinish'] = (values) => { - if (onAddModel(values)) { + const onFinish: FormProps['onFinish'] = async (values) => { + if (await onAddModel(values)) { resolve({}) } } diff --git a/src/renderer/src/pages/settings/ProviderSettings/ModelList/utils.ts b/src/renderer/src/pages/settings/ProviderSettings/ModelList/utils.ts index 0b484fdedce..eab48225d44 100644 --- a/src/renderer/src/pages/settings/ProviderSettings/ModelList/utils.ts +++ b/src/renderer/src/pages/settings/ProviderSettings/ModelList/utils.ts @@ -1,9 +1,4 @@ -import type { Model, Provider } from '@renderer/types' - -// Check if the model exists in the provider's model list -export const isModelInProvider = (provider: Provider, modelId: string): boolean => { - return provider.models.some((m) => m.id === modelId) -} +import type { Model } from '@renderer/types' export const isValidNewApiModel = (model: Model): boolean => { return !!(model.supported_endpoint_types && model.supported_endpoint_types.length > 0) diff --git a/src/renderer/src/pages/settings/ProviderSettings/ModelNotesPopup.tsx b/src/renderer/src/pages/settings/ProviderSettings/ModelNotesPopup.tsx index dd0417612f1..d2e1e9aaa1d 100644 --- a/src/renderer/src/pages/settings/ProviderSettings/ModelNotesPopup.tsx +++ b/src/renderer/src/pages/settings/ProviderSettings/ModelNotesPopup.tsx @@ -1,7 +1,6 @@ import MarkdownEditor from '@renderer/components/MarkdownEditor' import { TopView } from '@renderer/components/TopView' -import { useProvider } from '@renderer/hooks/useProvider' -import type { Provider } from '@renderer/types' +import { useProvider } from '@renderer/data/hooks/useProviders' import { Modal } from 'antd' import type { FC } from 'react' import { useState } from 'react' @@ -9,24 +8,21 @@ import { useTranslation } from 'react-i18next' import styled from 'styled-components' interface ShowParams { - provider: Provider + providerId: string } interface Props extends ShowParams { resolve: (data: any) => void } -const PopupContainer: FC = ({ provider: _provider, resolve }) => { +const PopupContainer: FC = ({ providerId, resolve }) => { const { t } = useTranslation() const [open, setOpen] = useState(true) - const { provider, updateProvider } = useProvider(_provider.id) - const [notes, setNotes] = useState(provider.notes || '') + const { provider, updateProvider } = useProvider(providerId) + const [notes, setNotes] = useState(provider?.settings?.notes || '') - const handleSave = () => { - updateProvider({ - ...provider, - notes - }) + const handleSave = async () => { + await updateProvider({ providerSettings: { ...provider?.settings, notes } }) setOpen(false) } diff --git a/src/renderer/src/pages/settings/ProviderSettings/ProviderList.tsx b/src/renderer/src/pages/settings/ProviderSettings/ProviderList.tsx index c32fbd0c5b0..cb24899cfb8 100644 --- a/src/renderer/src/pages/settings/ProviderSettings/ProviderList.tsx +++ b/src/renderer/src/pages/settings/ProviderSettings/ProviderList.tsx @@ -1,4 +1,5 @@ import { Button } from '@cherrystudio/ui' +import { useProviderActions, useProviders } from '@data/hooks/useProviders' import type { DropResult } from '@hello-pangea/dnd' import { loggerService } from '@logger' import { @@ -8,13 +9,17 @@ import { } from '@renderer/components/DraggableList' import { DeleteIcon, EditIcon } from '@renderer/components/Icons' import { ProviderAvatar } from '@renderer/components/ProviderAvatar' -import { useAllProviders, useProviders } from '@renderer/hooks/useProvider' import { useTimer } from '@renderer/hooks/useTimer' import ImageStorage from '@renderer/services/ImageStorage' -import type { Provider, ProviderType } from '@renderer/types' -import { isSystemProvider } from '@renderer/types' -import { getFancyProviderName, matchKeywordsInModel, matchKeywordsInProvider, uuid } from '@renderer/utils' -import { isAnthropicSupportedProvider } from '@renderer/utils/provider' +import { uuid } from '@renderer/utils' +import { + getFancyProviderName, + isAnthropicSupportedProvider, + isSystemProvider, + matchKeywordsInProvider +} from '@renderer/utils/provider.v2' +import { toV1ProviderShim } from '@renderer/utils/v1ProviderShim' +import type { Provider } from '@shared/data/types/provider' import { useNavigate, useSearch } from '@tanstack/react-router' import type { MenuProps } from 'antd' import { Dropdown, Input, Tag } from 'antd' @@ -52,10 +57,10 @@ interface ProviderListProps { const ProviderList: FC = ({ isOnboarding = false }) => { // TODO: Define validateSearch in routes/settings/provider.tsx and replace with Route.useSearch() // for type-safe search params. Currently using untyped useSearch as a stopgap after removing react-router-dom. - const search = useSearch({ strict: false }) + const search = useSearch({ strict: false }) as Record const navigate = useNavigate() - const providers = useAllProviders() - const { updateProviders, addProvider, removeProvider, updateProvider } = useProviders() + const { providers, addProvider, reorderProviders } = useProviders() + const { patchProviderById, deleteProviderById } = useProviderActions() const { setTimeoutTimer } = useTimer() const [selectedProvider, _setSelectedProvider] = useState(providers[0]) const { t } = useTranslation() @@ -135,25 +140,27 @@ const ProviderList: FC = ({ isOnboarding = false }) => { id: string apiKey: string baseUrl: string - type?: ProviderType + type?: string name?: string }) => { const { id } = data - const { updatedProvider, isNew, displayName } = await UrlSchemaInfoPopup.show(data) + const { updatedProvider, isNew, displayName } = await UrlSchemaInfoPopup.show(data as any) void navigate({ to: '/settings/provider', search: { id } }) if (!updatedProvider) { return } + // TODO: UrlSchemaInfoPopup still returns v1 Provider — adapt to v2 API if (isNew) { - addProvider(updatedProvider) + await addProvider({ providerId: updatedProvider.id, name: updatedProvider.name || id }) } else { - updateProvider(updatedProvider) + await patchProviderById(updatedProvider.id, { name: updatedProvider.name }) } - setSelectedProvider(updatedProvider) + const created = providers.find((p) => p.id === id) ?? (updatedProvider as unknown as Provider) + setSelectedProvider(created) window.toast.success(t('settings.models.provider_key_added', { provider: displayName })) } @@ -180,40 +187,26 @@ const ProviderList: FC = ({ isOnboarding = false }) => { }, [search.addProviderData]) const onAddProvider = async () => { - const { name: providerName, type, logo } = await AddProviderPopup.show() + const { name: providerName, logo } = await AddProviderPopup.show() if (!providerName.trim()) { return } - const provider = { - id: uuid(), - name: providerName.trim(), - type, - apiKey: '', - apiHost: '', - models: [], - enabled: true, - isSystem: false - } as Provider - - let updatedLogos = { ...providerLogos } + const providerId = uuid() + if (logo) { try { - await ImageStorage.set(`provider-${provider.id}`, logo) - updatedLogos = { - ...updatedLogos, - [provider.id]: logo - } - setProviderLogos(updatedLogos) + await ImageStorage.set(`provider-${providerId}`, logo) + setProviderLogos((prev) => ({ ...prev, [providerId]: logo })) } catch (error) { logger.error('Failed to save logo', error as Error) window.toast.error(t('message.error.save_provider_logo')) } } - addProvider(provider) - setSelectedProvider(provider) + const newProvider = await addProvider({ providerId, name: providerName.trim() }) + setSelectedProvider(newProvider) } const getDropdownMenus = (provider: Provider): MenuProps['items'] => { @@ -221,7 +214,7 @@ const ProviderList: FC = ({ isOnboarding = false }) => { label: t('settings.provider.notes.title'), key: 'notes', icon: , - onClick: () => ModelNotesPopup.show({ provider }) + onClick: () => ModelNotesPopup.show({ providerId: provider.id }) } const editMenu = { @@ -229,10 +222,11 @@ const ProviderList: FC = ({ isOnboarding = false }) => { key: 'edit', icon: , async onClick() { - const { name, type, logoFile, logo } = await AddProviderPopup.show(provider) + // TODO(v2-cleanup): Remove v1 shim after AddProviderPopup migrates to v2 + const { name, logoFile, logo } = await AddProviderPopup.show(toV1ProviderShim(provider)) if (name) { - updateProvider({ ...provider, name, type }) + await patchProviderById(provider.id, { name }) if (provider.id) { if (logo) { try { @@ -290,7 +284,7 @@ const ProviderList: FC = ({ isOnboarding = false }) => { } setSelectedProvider(providers.filter((p) => isSystemProvider(p))[0]) - removeProvider(provider) + await deleteProviderById(provider.id) } }) } @@ -304,10 +298,6 @@ const ProviderList: FC = ({ isOnboarding = false }) => { if (isSystemProvider(provider)) { return [noteMenu] - } else if (provider.isSystem) { - // 这里是处理数据中存在新版本删掉的系统提供商的情况 - // 未来期望能重构一下,不要依赖isSystem字段 - return [noteMenu, deleteMenu] } else { return menus } @@ -325,15 +315,13 @@ const ProviderList: FC = ({ isOnboarding = false }) => { } const keywords = searchText.toLowerCase().split(/\s+/).filter(Boolean) - const isProviderMatch = matchKeywordsInProvider(keywords, provider) - const isModelMatch = provider.models.some((model) => matchKeywordsInModel(keywords, model)) - return isProviderMatch || isModelMatch + return matchKeywordsInProvider(keywords, provider) }) const { onDragEnd: handleReorder, itemKey } = useDraggableReorder({ originalList: providers, filteredList: filteredProviders, - onUpdate: updateProviders, + onUpdate: reorderProviders, itemKey: 'id' }) @@ -427,11 +415,11 @@ const ProviderList: FC = ({ isOnboarding = false }) => { width: 24, height: 24 }} - provider={provider} + provider={provider as any} customLogos={providerLogos} /> {getFancyProviderName(provider)} - {provider.enabled && ( + {provider.isEnabled && ( ON @@ -451,7 +439,9 @@ const ProviderList: FC = ({ isOnboarding = false }) => { - + {selectedProvider && ( + + )} ) } @@ -467,6 +457,7 @@ const ProviderListContainer = styled.div` display: flex; flex-direction: column; min-width: calc(var(--settings-width) + 10px); + height: calc(100vh - var(--navbar-height)); padding-bottom: 5px; border-right: 0.5px solid var(--color-border); ` diff --git a/src/renderer/src/pages/settings/ProviderSettings/ProviderOAuth.tsx b/src/renderer/src/pages/settings/ProviderSettings/ProviderOAuth.tsx index 61bc158fee3..a1f4c5c31b5 100644 --- a/src/renderer/src/pages/settings/ProviderSettings/ProviderOAuth.tsx +++ b/src/renderer/src/pages/settings/ProviderSettings/ProviderOAuth.tsx @@ -3,10 +3,10 @@ import { Button } from '@cherrystudio/ui' import { resolveProviderIcon } from '@cherrystudio/ui/icons' import OAuthButton from '@renderer/components/OAuth/OAuthButton' import { PROVIDER_URLS } from '@renderer/config/providers' -import { useProvider } from '@renderer/hooks/useProvider' +import { useProvider } from '@renderer/data/hooks/useProviders' import { getProviderLabel } from '@renderer/i18n/label' import { providerBills, providerCharge } from '@renderer/utils/oauth' -import { isEmpty } from 'lodash' +import { hasApiKeys } from '@renderer/utils/provider.v2' import { CircleDollarSign, ReceiptText } from 'lucide-react' import type { FC } from 'react' import { Trans, useTranslation } from 'react-i18next' @@ -18,12 +18,15 @@ interface Props { const ProviderOAuth: FC = ({ providerId }) => { const { t } = useTranslation() - const { provider, updateProvider } = useProvider(providerId) + const { provider, updateProvider, addApiKey } = useProvider(providerId) - const setApiKey = (newKey: string) => { - updateProvider({ apiKey: newKey, enabled: true }) + const setApiKey = async (newKey: string) => { + await addApiKey(newKey, 'OAuth') + await updateProvider({ isEnabled: true }) } + if (!provider) return null + let providerWebsite = PROVIDER_URLS[provider.id]?.api?.url.replace('https://', '').replace('api.', '') || provider.name if (provider.id === 'ppio') { @@ -35,8 +38,8 @@ const ProviderOAuth: FC = ({ providerId }) => { return ( {Icon ? : {provider.name[0]}} - {isEmpty(provider.apiKey) ? ( - + {!hasApiKeys(provider) ? ( + {t('settings.provider.oauth.button', { provider: getProviderLabel(provider.id) })} ) : ( diff --git a/src/renderer/src/pages/settings/ProviderSettings/ProviderSetting.tsx b/src/renderer/src/pages/settings/ProviderSettings/ProviderSetting.tsx index f788d4be957..fa31da76bf4 100644 --- a/src/renderer/src/pages/settings/ProviderSettings/ProviderSetting.tsx +++ b/src/renderer/src/pages/settings/ProviderSettings/ProviderSetting.tsx @@ -1,5 +1,7 @@ import { Button, Flex, RowFlex, Switch, Tooltip, WarnTooltip } from '@cherrystudio/ui' import { HelpTooltip } from '@cherrystudio/ui' +import { useModels } from '@data/hooks/useModels' +import { useProvider, useProviderApiKeys, useProviderMutations } from '@data/hooks/useProviders' import { adaptProvider } from '@renderer/aiCore/provider/providerConfig' import OpenAIAlert from '@renderer/components/Alert/OpenAIAlert' import { showErrorDetailPopup } from '@renderer/components/ErrorDetailModal' @@ -9,7 +11,6 @@ import Selector from '@renderer/components/Selector' import { isRerankModel } from '@renderer/config/models' import { PROVIDER_URLS } from '@renderer/config/providers' import { useTheme } from '@renderer/context/ThemeProvider' -import { useAllProviders, useProvider, useProviders } from '@renderer/hooks/useProvider' import { useTimer } from '@renderer/hooks/useTimer' import AnthropicSettings from '@renderer/pages/settings/ProviderSettings/AnthropicSettings' import { ModelList } from '@renderer/pages/settings/ProviderSettings/ModelList' @@ -17,24 +18,28 @@ import { checkApi } from '@renderer/services/ApiService' import { isProviderSupportAuth } from '@renderer/services/ProviderService' import { useAppDispatch } from '@renderer/store' import { updateWebSearchProvider } from '@renderer/store/websearch' -import type { SystemProviderId } from '@renderer/types' -import { isSystemProvider, isSystemProviderId, SystemProviderIds } from '@renderer/types' +import type { Model as V1Model, SystemProviderId } from '@renderer/types' +import { isSystemProviderId, SystemProviderIds } from '@renderer/types' import type { ApiKeyConnectivity } from '@renderer/types/healthCheck' import { HealthStatus } from '@renderer/types/healthCheck' -import { formatApiHost, formatApiKeys, getFancyProviderName, validateApiHost } from '@renderer/utils' +import { formatApiHost, formatApiKeys, validateApiHost } from '@renderer/utils' import { serializeHealthCheckError } from '@renderer/utils/error' import { - isAIGatewayProvider, + getFancyProviderName, isAnthropicProvider, + isAnthropicSupportedProvider, isAzureOpenAIProvider, isGeminiProvider, isNewApiProvider, isOllamaProvider, isOpenAICompatibleProvider, - isOpenAIProvider, - isSupportAnthropicPromptCacheProvider, + isOpenAIResponsesProvider, + isSystemProvider, isVertexProvider -} from '@renderer/utils/provider' +} from '@renderer/utils/provider.v2' +import { toV1ProviderShim } from '@renderer/utils/v1ProviderShim' +import { EndpointType } from '@shared/data/types/model' +import type { Provider } from '@shared/data/types/provider' import { Divider, Input, Select, Space } from 'antd' import Link from 'antd/es/typography/Link' import { debounce, isEmpty } from 'lodash' @@ -68,7 +73,6 @@ import VertexAISettings from './VertexAISettings' interface Props { providerId: string - /** Whether in onboarding mode for new users */ isOnboarding?: boolean } @@ -88,8 +92,7 @@ const ANTHROPIC_COMPATIBLE_PROVIDER_IDS = [ SystemProviderIds.dmxapi, SystemProviderIds.mimo, SystemProviderIds.openrouter, - SystemProviderIds.tokenflux, - SystemProviderIds.ollama + SystemProviderIds.tokenflux ] as const type AnthropicCompatibleProviderId = (typeof ANTHROPIC_COMPATIBLE_PROVIDER_IDS)[number] @@ -101,12 +104,38 @@ const isAnthropicCompatibleProviderId = (id: string): id is AnthropicCompatibleP type HostField = 'apiHost' | 'anthropicApiHost' const ProviderSetting: FC = ({ providerId, isOnboarding = false }) => { - const { provider, updateProvider, models } = useProvider(providerId) - const allProviders = useAllProviders() - const { updateProviders } = useProviders() - const [apiHost, setApiHost] = useState(provider.apiHost) - const [anthropicApiHost, setAnthropicHost] = useState(provider.anthropicApiHost) - const [apiVersion, setApiVersion] = useState(provider.apiVersion) + const { provider } = useProvider(providerId) + if (!provider) return null + return +} + +interface ContentProps { + provider: Provider + providerId: string + isOnboarding?: boolean +} + +const ProviderSettingContent: FC = ({ provider, providerId, isOnboarding = false }) => { + const { updateProvider, updateApiKeys } = useProviderMutations(providerId) + const { models } = useModels({ providerId }) + const { data: apiKeysData } = useProviderApiKeys(providerId) + const patchProvider = useCallback( + async (updates: Record) => { + await updateProvider(updates) + }, + [updateProvider] + ) + + // Derive v1-like fields from v2 Provider + const primaryEndpoint = provider.defaultChatEndpoint ?? EndpointType.OPENAI_CHAT_COMPLETIONS + const providerApiHost = provider.endpointConfigs?.[primaryEndpoint]?.baseUrl ?? '' + const providerAnthropicHost = provider.endpointConfigs?.[EndpointType.ANTHROPIC_MESSAGES]?.baseUrl + const providerApiVersion = provider.settings?.apiVersion ?? '' + const providerApiKey = apiKeysData?.keys?.map((k) => k.key).join(',') ?? '' + + const [apiHost, setApiHost] = useState(providerApiHost) + const [anthropicApiHost, setAnthropicHost] = useState(providerAnthropicHost) + const [apiVersion, setApiVersion] = useState(providerApiVersion) const [activeHostField, setActiveHostField] = useState('apiHost') const { t, i18n } = useTranslation() const { theme } = useTheme() @@ -122,14 +151,15 @@ const ProviderSetting: FC = ({ providerId, isOnboarding = false }) => { const noAPIKeyInputProviders = ['copilot', 'vertexai'] as const satisfies SystemProviderId[] const hideApiKeyInput = noAPIKeyInputProviders.some((id) => id === provider.id) - const providerConfig = PROVIDER_URLS[provider.id] - const officialWebsite = providerConfig?.websites?.official - const apiKeyWebsite = providerConfig?.websites?.apiKey + // Use v2 provider.websites first, fallback to static config + const providerConfig = PROVIDER_URLS[provider.id as keyof typeof PROVIDER_URLS] + const officialWebsite = provider.websites?.official ?? providerConfig?.websites?.official + const apiKeyWebsite = provider.websites?.apiKey ?? providerConfig?.websites?.apiKey const configuredApiHost = providerConfig?.api?.url const fancyProviderName = getFancyProviderName(provider) - const [localApiKey, setLocalApiKey] = useState(provider.apiKey) + const [localApiKey, setLocalApiKey] = useState(providerApiKey) const [apiKeyConnectivity, setApiKeyConnectivity] = useState({ status: HealthStatus.NOT_CHECKED, checking: false @@ -142,92 +172,73 @@ const ProviderSetting: FC = ({ providerId, isOnboarding = false }) => { [dispatch, provider.id] ) - // Store callbacks in ref to avoid recreating debounce function when dependencies change - const callbacks = { updateProvider, updateWebSearchProviderKey, isOnboarding, providerEnabled: provider.enabled } + const callbacks = { updateApiKeys, updateWebSearchProviderKey, isOnboarding, providerEnabled: provider.isEnabled } const callbacksRef = useRef(callbacks) callbacksRef.current = callbacks const debouncedUpdateApiKey = useMemo( () => - debounce((value: string) => { - const { updateProvider, updateWebSearchProviderKey, isOnboarding, providerEnabled } = callbacksRef.current - const formattedKey = formatApiKeys(value) - updateProvider({ apiKey: formattedKey }) - updateWebSearchProviderKey({ apiKey: formattedKey }) + debounce(async (value: string) => { + const { + updateApiKeys: _updateApiKeys, + updateWebSearchProviderKey: _updateWS, + isOnboarding: _onb, + providerEnabled + } = callbacksRef.current + const formatted = formatApiKeys(value) + const keys = formatted.split(',').filter(Boolean) + const apiKeys = keys.map((key) => ({ id: crypto.randomUUID(), key, isEnabled: true })) + await _updateApiKeys(apiKeys) + _updateWS({ apiKey: formatted }) // Auto-enable provider when apiKey is updated in onboarding mode - if (isOnboarding && formattedKey && !providerEnabled) { - updateProvider({ enabled: true }) + if (_onb && formatted && !providerEnabled) { + await patchProvider({ isEnabled: true }) } }, 150), - [] - ) - // Track whether update comes from external source to avoid loops - const isExternalUpdateRef = useRef(false) + [patchProvider] + ) - // Sync provider.apiKey to localApiKey and reset connectivity status + // 同步 provider apiKey 到 localApiKey + // 重置连通性检查状态 useEffect(() => { - // Cancel any pending debounce calls to prevent old values from overwriting new ones - debouncedUpdateApiKey.cancel() - isExternalUpdateRef.current = true - setLocalApiKey(provider.apiKey) + setLocalApiKey(providerApiKey) setApiKeyConnectivity({ status: HealthStatus.NOT_CHECKED }) - }, [provider.apiKey, debouncedUpdateApiKey]) + }, [providerApiKey]) - // Sync localApiKey to provider.apiKey (debounced) - // Only trigger on user input, not on external updates + // 同步 localApiKey 到 provider(防抖) useEffect(() => { - if (isExternalUpdateRef.current) { - isExternalUpdateRef.current = false - return - } - if (localApiKey !== provider.apiKey) { - debouncedUpdateApiKey(localApiKey) + if (localApiKey !== providerApiKey) { + void debouncedUpdateApiKey(localApiKey) } - }, [localApiKey, provider.apiKey, debouncedUpdateApiKey]) - // Flush pending updates on unmount to prevent data loss - useEffect(() => { - return () => { - debouncedUpdateApiKey.flush() - } - }, [debouncedUpdateApiKey]) + // 卸载时取消任何待执行的更新 + return () => debouncedUpdateApiKey.cancel() + }, [localApiKey, providerApiKey, debouncedUpdateApiKey]) const isApiKeyConnectable = useMemo(() => { return apiKeyConnectivity.status === 'success' }, [apiKeyConnectivity]) - const moveProviderToTop = useCallback( - (providerId: string) => { - const reorderedProviders = [...allProviders] - const index = reorderedProviders.findIndex((p) => p.id === providerId) - - if (index !== -1) { - const updatedProvider = { ...reorderedProviders[index], enabled: true } - reorderedProviders.splice(index, 1) - reorderedProviders.unshift(updatedProvider) - updateProviders(reorderedProviders) - } - }, - [allProviders, updateProviders] - ) + const moveProviderToTop = useCallback(async () => { + await updateProvider({ sortOrder: 0, isEnabled: true }) + }, [updateProvider]) const onUpdateApiHost = () => { if (!validateApiHost(apiHost)) { - setApiHost(provider.apiHost) + setApiHost(providerApiHost) window.toast.error(t('settings.provider.api_host_no_valid')) return } if (isVertexProvider(provider) || apiHost.trim()) { - // For new-api provider, keep apiHost and anthropicApiHost in sync - if (isNewApiProvider(provider)) { - updateProvider({ apiHost, anthropicApiHost: apiHost }) - setAnthropicHost(apiHost) - } else { - updateProvider({ apiHost }) - } + void patchProvider({ + endpointConfigs: { + ...provider.endpointConfigs, + [primaryEndpoint]: { ...provider.endpointConfigs?.[primaryEndpoint], baseUrl: apiHost } + } + }) } else { - setApiHost(provider.apiHost) + setApiHost(providerApiHost) } } @@ -235,19 +246,31 @@ const ProviderSetting: FC = ({ providerId, isOnboarding = false }) => { const trimmedHost = anthropicApiHost?.trim() if (trimmedHost) { - updateProvider({ anthropicApiHost: trimmedHost }) + void patchProvider({ + endpointConfigs: { + ...provider.endpointConfigs, + [EndpointType.ANTHROPIC_MESSAGES]: { + ...provider.endpointConfigs?.[EndpointType.ANTHROPIC_MESSAGES], + baseUrl: trimmedHost + } + } + }) setAnthropicHost(trimmedHost) } else { - updateProvider({ anthropicApiHost: undefined }) + const restConfigs = { ...provider.endpointConfigs } + delete restConfigs[EndpointType.ANTHROPIC_MESSAGES] + void patchProvider({ endpointConfigs: restConfigs }) setAnthropicHost(undefined) } } - const onUpdateApiVersion = () => updateProvider({ apiVersion }) + const onUpdateApiVersion = () => patchProvider({ providerSettings: { ...provider.settings, apiVersion } }) const openApiKeyList = async () => { - if (localApiKey !== provider.apiKey) { - updateProvider({ apiKey: formatApiKeys(localApiKey) }) - await new Promise((resolve) => setTimeout(resolve, 0)) + if (localApiKey !== providerApiKey) { + const formatted = formatApiKeys(localApiKey) + const keys = formatted.split(',').filter(Boolean) + const apiKeys = keys.map((key) => ({ id: crypto.randomUUID(), key, isEnabled: true })) + await updateApiKeys(apiKeys) } await ApiKeyListPopup.show({ @@ -259,14 +282,13 @@ const ProviderSetting: FC = ({ providerId, isOnboarding = false }) => { const onCheckApi = async () => { const formattedLocalKey = formatApiKeys(localApiKey) - // 如果存在多个密钥,直接打开管理窗口 if (formattedLocalKey.includes(',')) { await openApiKeyList() return } - const modelsToCheck = models.filter((model) => !isRerankModel(model)) + const modelsToCheck = models.filter((model) => !isRerankModel(model as any)) if (isEmpty(modelsToCheck)) { window.toast.error({ @@ -276,7 +298,13 @@ const ProviderSetting: FC = ({ providerId, isOnboarding = false }) => { return } - const model = await SelectProviderModelPopup.show({ provider }) + // TODO(v2-cleanup): Remove v1 shim after SelectProviderModelPopup migrates to v2 + const v1ProviderForPopup = toV1ProviderShim(provider, { + models, + apiKey: formattedLocalKey, + apiHost + }) + const model = await SelectProviderModelPopup.show({ provider: v1ProviderForPopup }) if (!model) { window.toast.error(i18n.t('message.error.enter.model')) @@ -285,7 +313,13 @@ const ProviderSetting: FC = ({ providerId, isOnboarding = false }) => { try { setApiKeyConnectivity((prev) => ({ ...prev, checking: true, status: HealthStatus.NOT_CHECKED })) - await checkApi({ ...provider, apiHost, apiKey: formattedLocalKey }, model) + // TODO(v2-cleanup): Remove v1 shim after checkApi migrates to v2 + const v1ProviderForCheck = toV1ProviderShim(provider, { + models, + apiKey: formattedLocalKey, + apiHost + }) + await checkApi(v1ProviderForCheck, model as V1Model) window.toast.success({ timeout: 2000, @@ -293,12 +327,6 @@ const ProviderSetting: FC = ({ providerId, isOnboarding = false }) => { }) setApiKeyConnectivity((prev) => ({ ...prev, status: HealthStatus.SUCCESS })) - - // Auto-enable provider when API check succeeds in onboarding mode - if (isOnboarding && !provider.enabled) { - updateProvider({ enabled: true }) - } - setTimeoutTimer( 'onCheckApi', () => { @@ -322,15 +350,22 @@ const ProviderSetting: FC = ({ providerId, isOnboarding = false }) => { const onReset = useCallback(() => { setApiHost(configuredApiHost) - updateProvider({ apiHost: configuredApiHost }) - }, [configuredApiHost, updateProvider]) + void patchProvider({ + endpointConfigs: { + ...provider?.endpointConfigs, + [primaryEndpoint]: { ...provider?.endpointConfigs?.[primaryEndpoint], baseUrl: configuredApiHost } + } + }) + }, [configuredApiHost, patchProvider, provider?.endpointConfigs, primaryEndpoint]) const isApiHostResettable = useMemo(() => { return !isEmpty(configuredApiHost) && apiHost !== configuredApiHost }, [configuredApiHost, apiHost]) const hostPreview = () => { - const formattedApiHost = adaptProvider({ provider: { ...provider, apiHost } }).apiHost + // TODO(v2-cleanup): Remove v1 shim after adaptProvider migrates to v2 + const v1ProviderForAdapt = toV1ProviderShim(provider, { apiHost }) + const formattedApiHost = adaptProvider({ provider: v1ProviderForAdapt }).apiHost if (isOllamaProvider(provider)) { return formattedApiHost + '/chat' @@ -341,8 +376,8 @@ const ProviderSetting: FC = ({ providerId, isOnboarding = false }) => { } if (isAzureOpenAIProvider(provider)) { - const apiVersion = provider.apiVersion || '' - const path = !['preview', 'v1'].includes(apiVersion) + const ver = provider.settings?.apiVersion || '' + const path = !['preview', 'v1'].includes(ver) ? `/v1/chat/completions?apiVersion=v1` : `/v1/responses?apiVersion=v1` return formattedApiHost + path @@ -355,15 +390,12 @@ const ProviderSetting: FC = ({ providerId, isOnboarding = false }) => { if (isGeminiProvider(provider)) { return formattedApiHost + '/models' } - if (isOpenAIProvider(provider)) { + if (isOpenAIResponsesProvider(provider)) { return formattedApiHost + '/responses' } if (isVertexProvider(provider)) { return formattedApiHost + '/publishers/google' } - if (isAIGatewayProvider(provider)) { - return formattedApiHost + '/language-model' - } return formattedApiHost } @@ -390,12 +422,12 @@ const ProviderSetting: FC = ({ providerId, isOnboarding = false }) => { if (provider.id === 'copilot') { return } - setApiHost(provider.apiHost) - }, [provider.apiHost, provider.id]) + setApiHost(providerApiHost) + }, [providerApiHost, provider.id]) useEffect(() => { - setAnthropicHost(provider.anthropicApiHost) - }, [provider.anthropicApiHost]) + setAnthropicHost(providerAnthropicHost) + }, [providerAnthropicHost]) const canConfigureAnthropicHost = useMemo(() => { if (isCherryIN) { @@ -405,17 +437,16 @@ const ProviderSetting: FC = ({ providerId, isOnboarding = false }) => { return true } return ( - provider.type !== 'anthropic' && isSystemProviderId(provider.id) && isAnthropicCompatibleProviderId(provider.id) + !isAnthropicProvider(provider) && isSystemProviderId(provider.id) && isAnthropicCompatibleProviderId(provider.id) ) }, [isCherryIN, provider]) const anthropicHostPreview = useMemo(() => { - const rawHost = anthropicApiHost ?? provider.anthropicApiHost - // AI SDK uses the baseURL with /v1, then appends /messages + const rawHost = anthropicApiHost ?? providerAnthropicHost const normalizedHost = formatApiHost(rawHost) return `${normalizedHost}/messages` - }, [anthropicApiHost, provider.anthropicApiHost]) + }, [anthropicApiHost, providerAnthropicHost]) const hostSelectorOptions = useMemo(() => { const options: { value: HostField; label: string }[] = [ @@ -448,13 +479,13 @@ const ProviderSetting: FC = ({ providerId, isOnboarding = false }) => { {fancyProviderName} {officialWebsite && ( - + )} - {(!isSystemProvider(provider) || isSupportAnthropicPromptCacheProvider(provider)) && ( + {(!isSystemProvider(provider) || isAnthropicSupportedProvider(provider)) && (