Merge branch 'feature/1.5-local-model' into 'main'

Feature/1.5 local model

See merge request apipark/APIPark!199
This commit is contained in:
刘健
2025-02-17 10:45:56 +08:00
160 changed files with 6794 additions and 92 deletions
@@ -1,7 +1,9 @@
package ai
package ai_provider_local
var (
ollamaSvg = `<?xml version="1.0" standalone="no"?>
OllamaBase = "http://apipark-ollama:11434"
OllamaConfig = "{\n \"mirostat\": 0,\n \"mirostat_eta\": 0.1,\n \"mirostat_tau\": 5.0,\n \"num_ctx\": 4096,\n \"repeat_last_n\":64,\n \"repeat_penalty\": 1.1,\n \"temperature\": 0.7,\n \"seed\": 42,\n \"num_predict\": 42,\n \"top_k\": 40,\n \"top_p\": 0.9,\n \"min_p\": 0.5\n}\n"
OllamaSvg = `<?xml version="1.0" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 20010904//EN"
"http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
<svg version="1.0" xmlns="http://www.w3.org/2000/svg"
@@ -0,0 +1,38 @@
model: claude-3-5-haiku-20241022
label:
en_US: claude-3-5-haiku-20241022
model_type: llm
features:
- agent-thought
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 200000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
pricing:
input: '1.00'
output: '5.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,40 @@
model: claude-3-5-sonnet-20241022
label:
en_US: claude-3-5-sonnet-20241022
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 200000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
pricing:
input: '3.00'
output: '15.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,26 @@
model: ai21.jamba-1-5-large-v1:0
label:
en_US: Jamba 1.5 Large
model_type: llm
model_properties:
mode: completion
context_size: 256000
parameter_rules:
- name: temperature
use_template: temperature
default: 1
min: 0.0
max: 2.0
- name: top_p
use_template: top_p
- name: max_gen_len
use_template: max_tokens
required: true
default: 4096
min: 1
max: 4096
pricing:
input: '0.002'
output: '0.008'
unit: '0.001'
currency: USD
@@ -0,0 +1,26 @@
model: ai21.jamba-1-5-mini-v1:0
label:
en_US: Jamba 1.5 Mini
model_type: llm
model_properties:
mode: completion
context_size: 256000
parameter_rules:
- name: temperature
use_template: temperature
default: 1
min: 0.0
max: 2.0
- name: top_p
use_template: top_p
- name: max_gen_len
use_template: max_tokens
required: true
default: 4096
min: 1
max: 4096
pricing:
input: '0.0002'
output: '0.0004'
unit: '0.001'
currency: USD
@@ -0,0 +1,53 @@
model: amazon.nova-lite-v1:0
label:
en_US: Nova Lite V1
model_type: llm
features:
- agent-thought
- tool-call
- stream-tool-call
- vision
model_properties:
mode: chat
context_size: 300000
parameter_rules:
- name: max_new_tokens
use_template: max_tokens
required: true
default: 2048
min: 1
max: 5000
- name: temperature
use_template: temperature
required: false
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。
en_US: The amount of randomness injected into the response.
- name: top_p
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
- name: top_k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
pricing:
input: '0.00006'
output: '0.00024'
unit: '0.001'
currency: USD
@@ -0,0 +1,52 @@
model: amazon.nova-micro-v1:0
label:
en_US: Nova Micro V1
model_type: llm
features:
- agent-thought
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: max_new_tokens
use_template: max_tokens
required: true
default: 2048
min: 1
max: 5000
- name: temperature
use_template: temperature
required: false
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。
en_US: The amount of randomness injected into the response.
- name: top_p
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
- name: top_k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
pricing:
input: '0.000035'
output: '0.00014'
unit: '0.001'
currency: USD
@@ -0,0 +1,53 @@
model: amazon.nova-pro-v1:0
label:
en_US: Nova Pro V1
model_type: llm
features:
- agent-thought
- tool-call
- stream-tool-call
- vision
model_properties:
mode: chat
context_size: 300000
parameter_rules:
- name: max_new_tokens
use_template: max_tokens
required: true
default: 2048
min: 1
max: 5000
- name: temperature
use_template: temperature
required: false
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。
en_US: The amount of randomness injected into the response.
- name: top_p
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
- name: top_k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
pricing:
input: '0.0008'
output: '0.0032'
unit: '0.001'
currency: USD
@@ -0,0 +1,60 @@
model: anthropic.claude-3-5-haiku-20241022-v1:0
label:
en_US: Claude 3.5 Haiku
model_type: llm
features:
- agent-thought
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 200000
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
parameter_rules:
- name: max_tokens
use_template: max_tokens
required: true
type: int
default: 8192
min: 1
max: 8192
help:
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
# docs: https://docs.anthropic.com/claude/docs/system-prompts
- name: temperature
use_template: temperature
required: false
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。
en_US: The amount of randomness injected into the response.
- name: top_p
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
- name: top_k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
- name: response_format
use_template: response_format
pricing:
input: '0.001'
output: '0.005'
unit: '0.001'
currency: USD
@@ -0,0 +1,60 @@
model: anthropic.claude-3-5-sonnet-20241022-v2:0
label:
en_US: Claude 3.5 Sonnet V2
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 200000
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
parameter_rules:
- name: max_tokens
use_template: max_tokens
required: true
type: int
default: 8192
min: 1
max: 8192
help:
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
- name: temperature
use_template: temperature
required: false
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。
en_US: The amount of randomness injected into the response.
- name: top_p
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
- name: top_k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
- name: response_format
use_template: response_format
pricing:
input: '0.003'
output: '0.015'
unit: '0.001'
currency: USD
@@ -0,0 +1,60 @@
model: eu.anthropic.claude-3-5-sonnet-20241022-v2:0
label:
en_US: Claude 3.5 Sonnet V2(EU.Cross Region Inference)
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 200000
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
parameter_rules:
- name: max_tokens
use_template: max_tokens
required: true
type: int
default: 4096
min: 1
max: 4096
help:
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
- name: temperature
use_template: temperature
required: false
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。
en_US: The amount of randomness injected into the response.
- name: top_p
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
- name: top_k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
- name: response_format
use_template: response_format
pricing:
input: '0.003'
output: '0.015'
unit: '0.001'
currency: USD
@@ -0,0 +1,53 @@
model: us.amazon.nova-lite-v1:0
label:
en_US: Nova Lite V1 (US.Cross Region Inference)
model_type: llm
features:
- agent-thought
- tool-call
- stream-tool-call
- vision
model_properties:
mode: chat
context_size: 300000
parameter_rules:
- name: max_new_tokens
use_template: max_tokens
required: true
default: 2048
min: 1
max: 5000
- name: temperature
use_template: temperature
required: false
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。
en_US: The amount of randomness injected into the response.
- name: top_p
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
- name: top_k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
pricing:
input: '0.00006'
output: '0.00024'
unit: '0.001'
currency: USD
@@ -0,0 +1,52 @@
model: us.amazon.nova-micro-v1:0
label:
en_US: Nova Micro V1 (US.Cross Region Inference)
model_type: llm
features:
- agent-thought
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: max_new_tokens
use_template: max_tokens
required: true
default: 2048
min: 1
max: 5000
- name: temperature
use_template: temperature
required: false
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。
en_US: The amount of randomness injected into the response.
- name: top_p
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
- name: top_k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
pricing:
input: '0.000035'
output: '0.00014'
unit: '0.001'
currency: USD
@@ -0,0 +1,53 @@
model: us.amazon.nova-pro-v1:0
label:
en_US: Nova Pro V1 (US.Cross Region Inference)
model_type: llm
features:
- agent-thought
- tool-call
- stream-tool-call
- vision
model_properties:
mode: chat
context_size: 300000
parameter_rules:
- name: max_new_tokens
use_template: max_tokens
required: true
default: 2048
min: 1
max: 5000
- name: temperature
use_template: temperature
required: false
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。
en_US: The amount of randomness injected into the response.
- name: top_p
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
- name: top_k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
pricing:
input: '0.0008'
output: '0.0032'
unit: '0.001'
currency: USD
@@ -0,0 +1,60 @@
model: us.anthropic.claude-3-5-haiku-20241022-v1:0
label:
en_US: Claude 3.5 Haiku(US.Cross Region Inference)
model_type: llm
features:
- agent-thought
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 200000
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
parameter_rules:
- name: max_tokens
use_template: max_tokens
required: true
type: int
default: 8192
min: 1
max: 8192
help:
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
# docs: https://docs.anthropic.com/claude/docs/system-prompts
- name: temperature
use_template: temperature
required: false
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。
en_US: The amount of randomness injected into the response.
- name: top_p
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
- name: top_k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
- name: response_format
use_template: response_format
pricing:
input: '0.001'
output: '0.005'
unit: '0.001'
currency: USD
@@ -0,0 +1,60 @@
model: us.anthropic.claude-3-5-sonnet-20241022-v2:0
label:
en_US: Claude 3.5 Sonnet V2(US.Cross Region Inference)
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 200000
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
parameter_rules:
- name: max_tokens
use_template: max_tokens
required: true
type: int
default: 8192
min: 1
max: 8192
help:
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
- name: temperature
use_template: temperature
required: false
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。
en_US: The amount of randomness injected into the response.
- name: top_p
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
- name: top_k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
- name: response_format
use_template: response_format
pricing:
input: '0.003'
output: '0.015'
unit: '0.001'
currency: USD
@@ -0,0 +1,29 @@
model: us.meta.llama3-2-11b-instruct-v1:0
label:
en_US: US Meta Llama 3.2 11B Instruct
model_type: llm
features:
- vision
- tool-call
model_properties:
mode: completion
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
default: 0.5
min: 0.0
max: 1
- name: top_p
use_template: top_p
- name: max_gen_len
use_template: max_tokens
required: true
default: 512
min: 1
max: 2048
pricing:
input: '0.00035'
output: '0.00035'
unit: '0.001'
currency: USD
@@ -0,0 +1,26 @@
model: us.meta.llama3-2-1b-instruct-v1:0
label:
en_US: US Meta Llama 3.2 1B Instruct
model_type: llm
model_properties:
mode: completion
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
default: 0.5
min: 0.0
max: 1
- name: top_p
use_template: top_p
- name: max_gen_len
use_template: max_tokens
required: true
default: 512
min: 1
max: 2048
pricing:
input: '0.0001'
output: '0.0001'
unit: '0.001'
currency: USD
@@ -0,0 +1,26 @@
model: us.meta.llama3-2-3b-instruct-v1:0
label:
en_US: US Meta Llama 3.2 3B Instruct
model_type: llm
model_properties:
mode: completion
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
default: 0.5
min: 0.0
max: 1
- name: top_p
use_template: top_p
- name: max_gen_len
use_template: max_tokens
required: true
default: 512
min: 1
max: 2048
pricing:
input: '0.00015'
output: '0.00015'
unit: '0.001'
currency: USD
@@ -0,0 +1,31 @@
model: us.meta.llama3-2-90b-instruct-v1:0
label:
en_US: US Meta Llama 3.2 90B Instruct
model_type: llm
features:
- tool-call
model_properties:
mode: completion
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
default: 0.5
min: 0.0
max: 1
- name: top_p
use_template: top_p
default: 0.9
min: 0
max: 1
- name: max_gen_len
use_template: max_tokens
required: true
default: 512
min: 1
max: 2048
pricing:
input: '0.002'
output: '0.002'
unit: '0.001'
currency: USD
@@ -0,0 +1,21 @@
model: deepseek-reasoner
label:
zh_Hans: deepseek-reasoner
en_US: deepseek-reasoner
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 64000
parameter_rules:
- name: max_tokens
use_template: max_tokens
min: 1
max: 8192
default: 4096
pricing:
input: "4"
output: "16"
unit: "0.000001"
currency: RMB
@@ -0,0 +1,46 @@
model: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
label:
zh_Hans: Llama 3.2 11B Vision Instruct
en_US: Llama 3.2 11B Vision Instruct
model_type: llm
features:
- agent-thought
- tool-call
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
- name: max_tokens
use_template: max_tokens
- name: context_length_exceeded_behavior
default: None
label:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
help:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
type: string
options:
- None
- truncate
- error
- name: response_format
use_template: response_format
pricing:
input: '0.2'
output: '0.2'
unit: '0.000001'
currency: USD
@@ -0,0 +1,46 @@
model: accounts/fireworks/models/llama-v3p2-1b-instruct
label:
zh_Hans: Llama 3.2 1B Instruct
en_US: Llama 3.2 1B Instruct
model_type: llm
features:
- agent-thought
- tool-call
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
- name: max_tokens
use_template: max_tokens
- name: context_length_exceeded_behavior
default: None
label:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
help:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
type: string
options:
- None
- truncate
- error
- name: response_format
use_template: response_format
pricing:
input: '0.1'
output: '0.1'
unit: '0.000001'
currency: USD
@@ -0,0 +1,46 @@
model: accounts/fireworks/models/llama-v3p2-3b-instruct
label:
zh_Hans: Llama 3.2 3B Instruct
en_US: Llama 3.2 3B Instruct
model_type: llm
features:
- agent-thought
- tool-call
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
- name: max_tokens
use_template: max_tokens
- name: context_length_exceeded_behavior
default: None
label:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
help:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
type: string
options:
- None
- truncate
- error
- name: response_format
use_template: response_format
pricing:
input: '0.1'
output: '0.1'
unit: '0.000001'
currency: USD
@@ -0,0 +1,46 @@
model: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
label:
zh_Hans: Llama 3.2 90B Vision Instruct
en_US: Llama 3.2 90B Vision Instruct
model_type: llm
features:
- agent-thought
- tool-call
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
- name: max_tokens
use_template: max_tokens
- name: context_length_exceeded_behavior
default: None
label:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
help:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
type: string
options:
- None
- truncate
- error
- name: response_format
use_template: response_format
pricing:
input: '0.9'
output: '0.9'
unit: '0.000001'
currency: USD
@@ -0,0 +1,46 @@
model: accounts/fireworks/models/qwen2p5-72b-instruct
label:
zh_Hans: Qwen2.5 72B Instruct
en_US: Qwen2.5 72B Instruct
model_type: llm
features:
- agent-thought
- tool-call
model_properties:
mode: chat
context_size: 32768
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
- name: max_tokens
use_template: max_tokens
- name: context_length_exceeded_behavior
default: None
label:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
help:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
type: string
options:
- None
- truncate
- error
- name: response_format
use_template: response_format
pricing:
input: '0.9'
output: '0.9'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-1.5-flash-001
label:
en_US: Gemini 1.5 Flash 001
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-1.5-flash-002
label:
en_US: Gemini 1.5 Flash 002
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-1.5-flash-8b-exp-0924
label:
en_US: Gemini 1.5 Flash 8B 0924
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-1.5-flash
label:
en_US: Gemini 1.5 Flash
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-1.5-pro-001
label:
en_US: Gemini 1.5 Pro 001
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 2097152
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-1.5-pro-002
label:
en_US: Gemini 1.5 Pro 002
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 2097152
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-1.5-pro
label:
en_US: Gemini 1.5 Pro
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 2097152
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-2.0-flash-001
label:
en_US: Gemini 2.0 Flash 001
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-2.0-flash-exp
label:
en_US: Gemini 2.0 Flash Exp
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-2.0-flash-lite-preview-02-05
label:
en_US: Gemini 2.0 Flash Lite Preview 0205
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,39 @@
model: gemini-2.0-flash-thinking-exp-01-21
label:
en_US: Gemini 2.0 Flash Thinking Exp 01-21
model_type: llm
features:
- agent-thought
- vision
- document
- video
- audio
model_properties:
mode: chat
context_size: 32767
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,39 @@
model: gemini-2.0-flash-thinking-exp-1219
label:
en_US: Gemini 2.0 Flash Thinking Exp 1219
model_type: llm
features:
- agent-thought
- vision
- document
- video
- audio
model_properties:
mode: chat
context_size: 32767
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-2.0-pro-exp-02-05
label:
en_US: Gemini 2.0 pro exp 02-05
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-exp-1114
label:
en_US: Gemini exp 1114
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 32767
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-exp-1121
label:
en_US: Gemini exp 1121
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 32767
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-exp-1206
label:
en_US: Gemini exp 1206
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 2097152
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: learnlm-1.5-pro-experimental
label:
en_US: LearnLM 1.5 Pro Experimental
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 32767
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,36 @@
model: deepseek-r1-distill-llama-70b
label:
en_US: DeepSeek R1 Distill Llama 70b
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '3.00'
output: '3.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,37 @@
model: gemma-7b-it
label:
zh_Hans: Gemma 7B Instruction Tuned
en_US: Gemma 7B Instruction Tuned
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 8192
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD
@@ -0,0 +1,37 @@
model: gemma2-9b-it
label:
zh_Hans: Gemma 2 9B Instruction Tuned
en_US: Gemma 2 9B Instruction Tuned
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 8192
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD
@@ -0,0 +1,38 @@
model: llama-3.2-11b-text-preview
deprecated: true
label:
zh_Hans: Llama 3.2 11B Text (Preview)
en_US: Llama 3.2 11B Text (Preview)
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD
@@ -0,0 +1,38 @@
model: llama-3.2-11b-vision-preview
label:
zh_Hans: Llama 3.2 11B Vision (Preview)
en_US: Llama 3.2 11B Vision (Preview)
model_type: llm
features:
- agent-thought
- vision
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD
@@ -0,0 +1,37 @@
model: llama-3.2-1b-preview
label:
zh_Hans: Llama 3.2 1B Text (Preview)
en_US: Llama 3.2 1B Text (Preview)
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD
@@ -0,0 +1,37 @@
model: llama-3.2-3b-preview
label:
zh_Hans: Llama 3.2 3B Text (Preview)
en_US: Llama 3.2 3B Text (Preview)
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD
@@ -0,0 +1,38 @@
model: llama-3.2-90b-text-preview
depraceted: true
label:
zh_Hans: Llama 3.2 90B Text (Preview)
en_US: Llama 3.2 90B Text (Preview)
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD
@@ -0,0 +1,38 @@
model: llama-3.2-90b-vision-preview
label:
zh_Hans: Llama 3.2 90B Vision (Preview)
en_US: Llama 3.2 90B Vision (Preview)
model_type: llm
features:
- agent-thought
- vision
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD
@@ -0,0 +1,38 @@
model: llama-3.3-70b-specdec
label:
zh_Hans: Llama 3.3 70B Specdec
en_US: Llama 3.3 70B Specdec
model_type: llm
features:
- agent-thought
- multi-tool-call
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 1024
min: 1
max: 32768
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: "0.05"
output: "0.1"
unit: "0.000001"
currency: USD
@@ -0,0 +1,38 @@
model: llama-3.3-70b-versatile
label:
zh_Hans: Llama 3.3 70B Versatile
en_US: Llama 3.3 70B Versatile
model_type: llm
features:
- agent-thought
- multi-tool-call
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 1024
min: 1
max: 32768
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: "0.05"
output: "0.1"
unit: "0.000001"
currency: USD
@@ -0,0 +1,37 @@
model: llama-guard-3-8b
label:
zh_Hans: Llama-Guard-3-8B
en_US: Llama-Guard-3-8B
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 8192
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.20'
output: '0.20'
unit: '0.000001'
currency: USD
@@ -0,0 +1,38 @@
model: llama3-groq-70b-8192-tool-use-preview
label:
zh_Hans: Llama3-groq-70b-8192-tool-use (PREVIEW)
en_US: Llama3-groq-70b-8192-tool-use (PREVIEW)
model_type: llm
features:
- agent-thought
- multi-tool-call
model_properties:
mode: chat
context_size: 8192
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.08'
unit: '0.000001'
currency: USD
@@ -0,0 +1,38 @@
model: hunyuan-functioncall
label:
zh_Hans: hunyuan-functioncall
en_US: hunyuan-functioncall
model_type: llm
features:
- agent-thought
- tool-call
- multi-tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 32000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 1024
min: 1
max: 32000
- name: enable_enhance
label:
zh_Hans: 功能增强
en_US: Enable Enhancement
type: boolean
help:
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
en_US: Allow the model to perform external search to enhance the generation results.
required: false
default: true
pricing:
input: '0.004'
output: '0.008'
unit: '0.001'
currency: RMB
@@ -0,0 +1,38 @@
model: hunyuan-large-longcontext
label:
zh_Hans: hunyuan-large-longcontext
en_US: hunyuan-large-longcontext
model_type: llm
features:
- agent-thought
- tool-call
- multi-tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 134000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 1024
min: 1
max: 134000
- name: enable_enhance
label:
zh_Hans: 功能增强
en_US: Enable Enhancement
type: boolean
help:
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
en_US: Allow the model to perform external search to enhance the generation results.
required: false
default: true
pricing:
input: '0.006'
output: '0.018'
unit: '0.001'
currency: RMB
@@ -0,0 +1,38 @@
model: hunyuan-large-role
label:
zh_Hans: hunyuan-large-role
en_US: hunyuan-large-role
model_type: llm
features:
- agent-thought
- tool-call
- multi-tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 32000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 1024
min: 1
max: 32000
- name: enable_enhance
label:
zh_Hans: 功能增强
en_US: Enable Enhancement
type: boolean
help:
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
en_US: Allow the model to perform external search to enhance the generation results.
required: false
default: true
pricing:
input: '0.004'
output: '0.008'
unit: '0.001'
currency: RMB
@@ -0,0 +1,38 @@
model: hunyuan-large
label:
zh_Hans: hunyuan-large
en_US: hunyuan-large
model_type: llm
features:
- agent-thought
- tool-call
- multi-tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 32000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 1024
min: 1
max: 32000
- name: enable_enhance
label:
zh_Hans: 功能增强
en_US: Enable Enhancement
type: boolean
help:
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
en_US: Allow the model to perform external search to enhance the generation results.
required: false
default: true
pricing:
input: '0.004'
output: '0.012'
unit: '0.001'
currency: RMB
@@ -0,0 +1,38 @@
model: hunyuan-role
label:
zh_Hans: hunyuan-role
en_US: hunyuan-role
model_type: llm
features:
- agent-thought
- tool-call
- multi-tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 32000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 1024
min: 1
max: 32000
- name: enable_enhance
label:
zh_Hans: 功能增强
en_US: Enable Enhancement
type: boolean
help:
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
en_US: Allow the model to perform external search to enhance the generation results.
required: false
default: true
pricing:
input: '0.004'
output: '0.008'
unit: '0.001'
currency: RMB
@@ -0,0 +1,38 @@
model: hunyuan-turbo-latest
label:
zh_Hans: hunyuan-turbo-latest
en_US: hunyuan-turbo-latest
model_type: llm
features:
- agent-thought
- tool-call
- multi-tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 32000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 1024
min: 1
max: 32000
- name: enable_enhance
label:
zh_Hans: 功能增强
en_US: Enable Enhancement
type: boolean
help:
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
en_US: Allow the model to perform external search to enhance the generation results.
required: false
default: true
pricing:
input: '0.015'
output: '0.05'
unit: '0.001'
currency: RMB
@@ -0,0 +1,39 @@
model: hunyuan-turbo-vision
label:
zh_Hans: hunyuan-turbo-vision
en_US: hunyuan-turbo-vision
model_type: llm
features:
- agent-thought
- tool-call
- multi-tool-call
- stream-tool-call
- vision
model_properties:
mode: chat
context_size: 8000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 1024
min: 1
max: 8000
- name: enable_enhance
label:
zh_Hans: 功能增强
en_US: Enable Enhancement
type: boolean
help:
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
en_US: Allow the model to perform external search to enhance the generation results.
required: false
default: true
pricing:
input: '0.08'
output: '0.08'
unit: '0.001'
currency: RMB
@@ -0,0 +1,44 @@
model: abab6.5t-chat
label:
en_US: Abab6.5t-Chat
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 8192
parameter_rules:
- name: temperature
use_template: temperature
min: 0.01
max: 1
default: 0.9
- name: top_p
use_template: top_p
min: 0.01
max: 1
default: 0.95
- name: max_tokens
use_template: max_tokens
required: true
default: 3072
min: 1
max: 8192
- name: mask_sensitive_info
type: boolean
default: true
label:
zh_Hans: 隐私保护
en_US: Moderate
help:
zh_Hans: 对输出中易涉及隐私问题的文本信息进行打码,目前包括但不限于邮箱、域名、链接、证件号、家庭住址等,默认true,即开启打码
en_US: Mask the sensitive info of the generated content, such as email/domain/link/address/phone/id..
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty
use_template: frequency_penalty
pricing:
input: '0.005'
output: '0.005'
unit: '0.001'
currency: RMB
@@ -0,0 +1,46 @@
model: abab7-chat-preview
label:
en_US: Abab7-chat-preview
model_type: llm
features:
- agent-thought
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 245760
parameter_rules:
- name: temperature
use_template: temperature
min: 0.01
max: 1
default: 0.1
- name: top_p
use_template: top_p
min: 0.01
max: 1
default: 0.95
- name: max_tokens
use_template: max_tokens
required: true
default: 2048
min: 1
max: 245760
- name: mask_sensitive_info
type: boolean
default: true
label:
zh_Hans: 隐私保护
en_US: Moderate
help:
zh_Hans: 对输出中易涉及隐私问题的文本信息进行打码,目前包括但不限于邮箱、域名、链接、证件号、家庭住址等,默认true,即开启打码
en_US: Mask the sensitive info of the generated content, such as email/domain/link/address/phone/id..
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty
use_template: frequency_penalty
pricing:
input: '0.1'
output: '0.1'
unit: '0.001'
currency: RMB
@@ -0,0 +1,46 @@
model: minimax-text-01
label:
en_US: Minimax-Text-01
model_type: llm
features:
- agent-thought
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 1000192
parameter_rules:
- name: temperature
use_template: temperature
min: 0.01
max: 1
default: 0.1
- name: top_p
use_template: top_p
min: 0.01
max: 1
default: 0.95
- name: max_tokens
use_template: max_tokens
required: true
default: 2048
min: 1
max: 1000192
- name: mask_sensitive_info
type: boolean
default: true
label:
zh_Hans: 隐私保护
en_US: Moderate
help:
zh_Hans: 对输出中易涉及隐私问题的文本信息进行打码,目前包括但不限于邮箱、域名、链接、证件号、家庭住址等,默认true,即开启打码
en_US: Mask the sensitive info of the generated content, such as email/domain/link/address/phone/id..
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty
use_template: frequency_penalty
pricing:
input: '0.001'
output: '0.008'
unit: '0.001'
currency: RMB
@@ -0,0 +1,52 @@
model: pixtral-large-2411
label:
zh_Hans: pixtral-large-2411
en_US: pixtral-large-2411
model_type: llm
features:
- agent-thought
- vision
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
default: 0.7
min: 0
max: 1
- name: top_p
use_template: top_p
default: 1
min: 0
max: 1
- name: max_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: safe_prompt
default: false
type: boolean
help:
en_US: Whether to inject a safety prompt before all conversations.
zh_Hans: 是否开启提示词审查
label:
en_US: SafePrompt
zh_Hans: 提示词审查
- name: random_seed
type: int
help:
en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
label:
en_US: RandomSeed
zh_Hans: 随机数种子
default: 0
min: 0
max: 2147483647
pricing:
input: '0.008'
output: '0.024'
unit: '0.001'
currency: USD
@@ -0,0 +1,52 @@
model: pixtral-large-latest
label:
zh_Hans: pixtral-large-latest
en_US: pixtral-large-latest
model_type: llm
features:
- agent-thought
- vision
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
default: 0.7
min: 0
max: 1
- name: top_p
use_template: top_p
default: 1
min: 0
max: 1
- name: max_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: safe_prompt
default: false
type: boolean
help:
en_US: Whether to inject a safety prompt before all conversations.
zh_Hans: 是否开启提示词审查
label:
en_US: SafePrompt
zh_Hans: 提示词审查
- name: random_seed
type: int
help:
en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
label:
en_US: RandomSeed
zh_Hans: 随机数种子
default: 0
min: 0
max: 2147483647
pricing:
input: '0.008'
output: '0.024'
unit: '0.001'
currency: USD
@@ -0,0 +1,41 @@
model: Sao10K/L3-8B-Stheno-v3.2
label:
zh_Hans: L3 8B Stheno V3.2
en_US: L3 8B Stheno V3.2
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 8192
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0005'
output: '0.0005'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: deepseek/deepseek-r1
label:
zh_Hans: DeepSeek R1
en_US: DeepSeek R1
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 64000
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.04'
output: '0.04'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: deepseek/deepseek_v3
label:
zh_Hans: DeepSeek V3
en_US: DeepSeek V3
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 64000
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0089'
output: '0.0089'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: sao10k/l3-8b-lunaris
label:
zh_Hans: "Sao10k L3 8B Lunaris"
en_US: "Sao10k L3 8B Lunaris"
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 8192
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0005'
output: '0.0005'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: sao10k/l31-70b-euryale-v2.2
label:
zh_Hans: L31 70B Euryale V2.2
en_US: L31 70B Euryale V2.2
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 16000
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0148'
output: '0.0148'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: meta-llama/llama-3.1-8b-instruct-bf16
label:
zh_Hans: Llama 3.1 8B Instruct BF16
en_US: Llama 3.1 8B Instruct BF16
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 8192
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0006'
output: '0.0006'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: meta-llama/llama-3.1-8b-instruct-max
label:
zh_Hans: "Llama3.1 8B Instruct Max\t"
en_US: "Llama3.1 8B Instruct Max\t"
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 16384
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0005'
output: '0.0005'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: meta-llama/llama-3.2-11b-vision-instruct
label:
zh_Hans: "Llama 3.2 11B Vision Instruct\t"
en_US: "Llama 3.2 11B Vision Instruct\t"
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 32768
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0006'
output: '0.0006'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: meta-llama/llama-3.2-1b-instruct
label:
zh_Hans: "Llama 3.2 1B Instruct\t"
en_US: "Llama 3.2 1B Instruct\t"
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131000
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0002'
output: '0.0002'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: meta-llama/llama-3.2-3b-instruct
label:
zh_Hans: Llama 3.2 3B Instruct
en_US: Llama 3.2 3B Instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 32768
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0003'
output: '0.0005'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: meta-llama/llama-3.3-70b-instruct
label:
zh_Hans: Llama 3.3 70B Instruct
en_US: Llama 3.3 70B Instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0039'
output: '0.0039'
unit: '0.0001'
currency: USD
@@ -1,69 +0,0 @@
from collections.abc import Generator
from typing import Optional, Union
from core.model_runtime.entities.llm_entities import LLMResult
from core.model_runtime.entities.message_entities import PromptMessage, PromptMessageTool
from core.model_runtime.entities.model_entities import AIModelEntity
from core.model_runtime.model_providers.openai_api_compatible.llm.llm import OAIAPICompatLargeLanguageModel
class NovitaLargeLanguageModel(OAIAPICompatLargeLanguageModel):
def _update_endpoint_url(self, credentials: dict):
credentials["endpoint_url"] = "https://api.novita.ai/v3/openai"
credentials["extra_headers"] = {"X-Novita-Source": "dify.ai"}
return credentials
def _invoke(
self,
model: str,
credentials: dict,
prompt_messages: list[PromptMessage],
model_parameters: dict,
tools: Optional[list[PromptMessageTool]] = None,
stop: Optional[list[str]] = None,
stream: bool = True,
user: Optional[str] = None,
) -> Union[LLMResult, Generator]:
cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
return super()._invoke(model, cred_with_endpoint, prompt_messages, model_parameters, tools, stop, stream, user)
def validate_credentials(self, model: str, credentials: dict) -> None:
cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
self._add_custom_parameters(credentials, model)
return super().validate_credentials(model, cred_with_endpoint)
@classmethod
def _add_custom_parameters(cls, credentials: dict, model: str) -> None:
credentials["mode"] = "chat"
def _generate(
self,
model: str,
credentials: dict,
prompt_messages: list[PromptMessage],
model_parameters: dict,
tools: Optional[list[PromptMessageTool]] = None,
stop: Optional[list[str]] = None,
stream: bool = True,
user: Optional[str] = None,
) -> Union[LLMResult, Generator]:
cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
return super()._generate(
model, cred_with_endpoint, prompt_messages, model_parameters, tools, stop, stream, user
)
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
return super().get_customizable_model_schema(model, cred_with_endpoint)
def get_num_tokens(
self,
model: str,
credentials: dict,
prompt_messages: list[PromptMessage],
tools: Optional[list[PromptMessageTool]] = None,
) -> int:
cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
return super().get_num_tokens(model, cred_with_endpoint, prompt_messages, tools)
@@ -0,0 +1,41 @@
model: mistralai/mistral-nemo
label:
zh_Hans: Mistral Nemo
en_US: Mistral Nemo
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0017'
output: '0.0017'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: openchat/openchat-7b
label:
zh_Hans: OpenChat 7B
en_US: OpenChat 7B
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 4096
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0006'
output: '0.0006'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: qwen/qwen-2-72b-instruct
label:
zh_Hans: Qwen2 72B Instruct
en_US: Qwen2 72B Instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 32768
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0034'
output: '0.0039'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: qwen/qwen-2-7b-instruct
label:
zh_Hans: Qwen 2 7B Instruct
en_US: Qwen 2 7B Instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 32768
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.00054'
output: '0.00054'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: qwen/qwen-2-vl-72b-instruct
label:
zh_Hans: Qwen 2 VL 72B Instruct
en_US: Qwen 2 VL 72B Instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 32768
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0045'
output: '0.0045'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: qwen/qwen-2.5-72b-instruct
label:
zh_Hans: Qwen 2.5 72B Instruct
en_US: Qwen 2.5 72B Instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 32000
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0038'
output: '0.004'
unit: '0.0001'
currency: USD
@@ -0,0 +1,35 @@
model: deepseek-ai/deepseek-r1
label:
en_US: deepseek-ai/deepseek-r1
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 1
default: 0.5
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 1024
default: 1024
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
@@ -0,0 +1,47 @@
model: gpt-4o-2024-11-20
label:
zh_Hans: gpt-4o-2024-11-20
en_US: gpt-4o-2024-11-20
model_type: llm
features:
- multi-tool-call
- agent-thought
- stream-tool-call
- vision
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty
use_template: frequency_penalty
- name: max_tokens
use_template: max_tokens
default: 16384
min: 1
max: 16384
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
- json_schema
- name: json_schema
use_template: json_schema
pricing:
input: '2.50'
output: '10.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,44 @@
model: gpt-4o-audio-preview
label:
zh_Hans: gpt-4o-audio-preview
en_US: gpt-4o-audio-preview
model_type: llm
features:
- multi-tool-call
- agent-thought
- stream-tool-call
- audio
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty
use_template: frequency_penalty
- name: max_tokens
use_template: max_tokens
default: 16384
min: 1
max: 16384
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '5.00'
output: '15.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,48 @@
model: o1-2024-12-17
label:
en_US: o1-2024-12-17
model_type: llm
features:
- multi-tool-call
- agent-thought
- stream-tool-call
- vision
model_properties:
mode: chat
context_size: 200000
parameter_rules:
- name: max_tokens
use_template: max_tokens
default: 50000
min: 1
max: 50000
- name: reasoning_effort
label:
zh_Hans: 推理工作
en_US: reasoning_effort
type: string
help:
zh_Hans: 限制推理模型的推理工作
en_US: constrains effort on reasoning for reasoning models
required: false
options:
- low
- medium
- high
- name: response_format
label:
zh_Hans: 回复格式
en_US: response_format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '15.00'
output: '60.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,49 @@
model: o1
label:
zh_Hans: o1
en_US: o1
model_type: llm
features:
- multi-tool-call
- agent-thought
- stream-tool-call
- vision
model_properties:
mode: chat
context_size: 200000
parameter_rules:
- name: max_tokens
use_template: max_tokens
default: 50000
min: 1
max: 50000
- name: reasoning_effort
label:
zh_Hans: 推理工作
en_US: reasoning_effort
type: string
help:
zh_Hans: 限制推理模型的推理工作
en_US: constrains effort on reasoning for reasoning models
required: false
options:
- low
- medium
- high
- name: response_format
label:
zh_Hans: 回复格式
en_US: response_format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '15.00'
output: '60.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,46 @@
model: o3-mini-2025-01-31
label:
zh_Hans: o3-mini-2025-01-31
en_US: o3-mini-2025-01-31
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 200000
parameter_rules:
- name: max_tokens
use_template: max_tokens
default: 100000
min: 1
max: 100000
- name: reasoning_effort
label:
zh_Hans: 推理工作
en_US: reasoning_effort
type: string
help:
zh_Hans: 限制推理模型的推理工作
en_US: constrains effort on reasoning for reasoning models
required: false
options:
- low
- medium
- high
- name: response_format
label:
zh_Hans: 回复格式
en_US: response_format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '1.10'
output: '4.40'
unit: '0.000001'
currency: USD
@@ -0,0 +1,46 @@
model: o3-mini
label:
zh_Hans: o3-mini
en_US: o3-mini
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 200000
parameter_rules:
- name: max_tokens
use_template: max_tokens
default: 100000
min: 1
max: 100000
- name: reasoning_effort
label:
zh_Hans: 推理工作
en_US: reasoning_effort
type: string
help:
zh_Hans: 限制推理模型的推理工作
en_US: constrains effort on reasoning for reasoning models
required: false
options:
- low
- medium
- high
- name: response_format
label:
zh_Hans: 回复格式
en_US: response_format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '1.10'
output: '4.40'
unit: '0.000001'
currency: USD
@@ -0,0 +1,38 @@
model: anthropic/claude-3-5-haiku
label:
en_US: claude-3-5-haiku
model_type: llm
features:
- agent-thought
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 200000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
pricing:
input: "1"
output: "5"
unit: "0.000001"
currency: USD
@@ -0,0 +1,59 @@
model: deepseek/deepseek-r1
label:
en_US: deepseek-r1
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 163840
parameter_rules:
- name: temperature
use_template: temperature
type: float
default: 1
min: 0.0
max: 2.0
help:
zh_Hans: 控制生成结果的多样性和随机性。数值越小,越严谨;数值越大,越发散。
en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
- name: max_tokens
use_template: max_tokens
type: int
default: 4096
min: 1
max: 4096
help:
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
type: float
default: 1
min: 0.01
max: 1.00
help:
zh_Hans: 控制生成结果的随机性。数值越小,随机性越弱;数值越大,随机性越强。一般而言,top_p 和 temperature 两个参数选择一个进行调整即可。
en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
default: 0
min: -2.0
max: 2.0
help:
zh_Hans: 介于 -2.0 和 2.0 之间的数字。如果该值为正,那么新 token 会根据其在已有文本中的出现频率受到相应的惩罚,降低模型重复相同内容的可能性。
en_US: A number between -2.0 and 2.0. If the value is positive, new tokens are penalized based on their frequency of occurrence in existing text, reducing the likelihood that the model will repeat the same content.
pricing:
input: "3"
output: "8"
unit: "0.000001"
currency: USD
@@ -0,0 +1,46 @@
model: meta-llama/llama-3.2-11b-vision-instruct
label:
zh_Hans: llama-3.2-11b-vision-instruct
en_US: llama-3.2-11b-vision-instruct
model_type: llm
features:
- agent-thought
- vision
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
- name: max_tokens
use_template: max_tokens
- name: context_length_exceeded_behavior
default: None
label:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
help:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
type: string
options:
- None
- truncate
- error
- name: response_format
use_template: response_format
pricing:
input: '0.055'
output: '0.055'
unit: '0.000001'
currency: USD
@@ -0,0 +1,45 @@
model: meta-llama/llama-3.2-1b-instruct
label:
zh_Hans: llama-3.2-1b-instruct
en_US: llama-3.2-1b-instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
- name: max_tokens
use_template: max_tokens
- name: context_length_exceeded_behavior
default: None
label:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
help:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
type: string
options:
- None
- truncate
- error
- name: response_format
use_template: response_format
pricing:
input: '0.01'
output: '0.02'
unit: '0.000001'
currency: USD
@@ -0,0 +1,45 @@
model: meta-llama/llama-3.2-3b-instruct
label:
zh_Hans: llama-3.2-3b-instruct
en_US: llama-3.2-3b-instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
- name: max_tokens
use_template: max_tokens
- name: context_length_exceeded_behavior
default: None
label:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
help:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
type: string
options:
- None
- truncate
- error
- name: response_format
use_template: response_format
pricing:
input: '0.03'
output: '0.05'
unit: '0.000001'
currency: USD
@@ -0,0 +1,46 @@
model: meta-llama/llama-3.2-90b-vision-instruct
label:
zh_Hans: llama-3.2-90b-vision-instruct
en_US: llama-3.2-90b-vision-instruct
model_type: llm
features:
- agent-thought
- vision
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
- name: max_tokens
use_template: max_tokens
- name: context_length_exceeded_behavior
default: None
label:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
help:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
type: string
options:
- None
- truncate
- error
- name: response_format
use_template: response_format
pricing:
input: '0.35'
output: '0.4'
unit: '0.000001'
currency: USD
@@ -0,0 +1,49 @@
model: openai/o3-mini-2025-01-31
label:
en_US: o3-mini-2025-01-31
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 200000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty
use_template: frequency_penalty
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 100000
- name: response_format
label:
zh_Hans: 回复格式
en_US: response_format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: "1.10"
output: "4.40"
unit: "0.000001"
currency: USD

Some files were not shown because too many files have changed in this diff Show More