diff --git a/ai-provider/model-runtime/loader.go b/ai-provider/model-runtime/loader.go
index d6d7d314..1d6ee446 100644
--- a/ai-provider/model-runtime/loader.go
+++ b/ai-provider/model-runtime/loader.go
@@ -4,8 +4,9 @@ import (
"embed"
"encoding/json"
"fmt"
- "github.com/eolinker/eosc"
"strings"
+
+ "github.com/eolinker/eosc"
)
func init() {
diff --git a/ai-provider/model-runtime/model-providers/baichuan/assets/icon_l_en.svg b/ai-provider/model-runtime/model-providers/baichuan/assets/icon_l_en.svg
new file mode 100644
index 00000000..7ff6b5a6
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/baichuan/assets/icon_l_en.svg
@@ -0,0 +1,19 @@
+
diff --git a/ai-provider/model-runtime/model-providers/baichuan/assets/icon_s_en.svg b/ai-provider/model-runtime/model-providers/baichuan/assets/icon_s_en.svg
new file mode 100644
index 00000000..4ddcd267
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/baichuan/assets/icon_s_en.svg
@@ -0,0 +1,11 @@
+
diff --git a/ai-provider/model-runtime/model-providers/baichuan/baichuan.yaml b/ai-provider/model-runtime/model-providers/baichuan/baichuan.yaml
new file mode 100644
index 00000000..285ac656
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/baichuan/baichuan.yaml
@@ -0,0 +1,30 @@
+provider: baichuan
+label:
+ en_US: Baichuan
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ en_US: icon_l_en.svg
+background: "#FFF6F2"
+help:
+ title:
+ en_US: Get your API Key from BAICHUAN AI
+ zh_Hans: 从百川智能获取您的 API Key
+ url:
+ en_US: https://www.baichuan-ai.com
+supported_model_types:
+ - llm
+ - text-embedding
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
+address: https://api.openai.com
diff --git a/ai-provider/model-runtime/model-providers/baichuan/llm/baichuan2-53b.yaml b/ai-provider/model-runtime/model-providers/baichuan/llm/baichuan2-53b.yaml
new file mode 100644
index 00000000..8360dd5f
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/baichuan/llm/baichuan2-53b.yaml
@@ -0,0 +1,46 @@
+model: baichuan2-53b
+label:
+ en_US: Baichuan2-53B
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 1000
+ min: 1
+ max: 4000
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ default: 1
+ min: 1
+ max: 2
+ - name: with_search_enhance
+ label:
+ zh_Hans: 搜索增强
+ en_US: Search Enhance
+ type: boolean
+ help:
+ zh_Hans: 允许模型自行进行外部搜索,以增强生成结果。
+ en_US: Allow the model to perform external search to enhance the generation results.
+ required: false
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/baichuan/llm/baichuan2-turbo-192k.yaml b/ai-provider/model-runtime/model-providers/baichuan/llm/baichuan2-turbo-192k.yaml
new file mode 100644
index 00000000..0ce0265c
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/baichuan/llm/baichuan2-turbo-192k.yaml
@@ -0,0 +1,46 @@
+model: baichuan2-turbo-192k
+label:
+ en_US: Baichuan2-Turbo-192K
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 192000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 8000
+ min: 1
+ max: 192000
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ default: 1
+ min: 1
+ max: 2
+ - name: with_search_enhance
+ label:
+ zh_Hans: 搜索增强
+ en_US: Search Enhance
+ type: boolean
+ help:
+ zh_Hans: 允许模型自行进行外部搜索,以增强生成结果。
+ en_US: Allow the model to perform external search to enhance the generation results.
+ required: false
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/baichuan/llm/baichuan2-turbo.yaml b/ai-provider/model-runtime/model-providers/baichuan/llm/baichuan2-turbo.yaml
new file mode 100644
index 00000000..ccb4ee8b
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/baichuan/llm/baichuan2-turbo.yaml
@@ -0,0 +1,41 @@
+model: baichuan2-turbo
+label:
+ en_US: Baichuan2-Turbo
+model_type: llm
+features:
+ - agent-thought
+ - multi-tool-call
+model_properties:
+ mode: chat
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.3
+ - name: top_p
+ use_template: top_p
+ default: 0.85
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ min: 0
+ max: 20
+ default: 5
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens
+ use_template: max_tokens
+ default: 2048
+ - name: with_search_enhance
+ label:
+ zh_Hans: 搜索增强
+ en_US: Search Enhance
+ type: boolean
+ help:
+ zh_Hans: 允许模型自行进行外部搜索,以增强生成结果。
+ en_US: Allow the model to perform external search to enhance the generation results.
+ required: false
diff --git a/ai-provider/model-runtime/model-providers/baichuan/llm/baichuan3-turbo-128k.yaml b/ai-provider/model-runtime/model-providers/baichuan/llm/baichuan3-turbo-128k.yaml
new file mode 100644
index 00000000..d9cd086e
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/baichuan/llm/baichuan3-turbo-128k.yaml
@@ -0,0 +1,53 @@
+model: baichuan3-turbo-128k
+label:
+ en_US: Baichuan3-Turbo-128k
+model_type: llm
+features:
+ - agent-thought
+ - multi-tool-call
+model_properties:
+ mode: chat
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.3
+ - name: top_p
+ use_template: top_p
+ default: 0.85
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ min: 0
+ max: 20
+ default: 5
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens
+ use_template: max_tokens
+ default: 2048
+ - name: res_format
+ label:
+ zh_Hans: 回复格式
+ en_US: Response Format
+ type: string
+ help:
+ zh_Hans: 指定模型必须输出的格式
+ en_US: specifying the format that the model must output
+ required: false
+ options:
+ - text
+ - json_object
+ - name: with_search_enhance
+ label:
+ zh_Hans: 搜索增强
+ en_US: Search Enhance
+ type: boolean
+ help:
+ zh_Hans: 允许模型自行进行外部搜索,以增强生成结果。
+ en_US: Allow the model to perform external search to enhance the generation results.
+ required: false
diff --git a/ai-provider/model-runtime/model-providers/baichuan/llm/baichuan3-turbo.yaml b/ai-provider/model-runtime/model-providers/baichuan/llm/baichuan3-turbo.yaml
new file mode 100644
index 00000000..58f9b39a
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/baichuan/llm/baichuan3-turbo.yaml
@@ -0,0 +1,53 @@
+model: baichuan3-turbo
+label:
+ en_US: Baichuan3-Turbo
+model_type: llm
+features:
+ - agent-thought
+ - multi-tool-call
+model_properties:
+ mode: chat
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.3
+ - name: top_p
+ use_template: top_p
+ default: 0.85
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ min: 0
+ max: 20
+ default: 5
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens
+ use_template: max_tokens
+ default: 2048
+ - name: res_format
+ label:
+ zh_Hans: 回复格式
+ en_US: Response Format
+ type: string
+ help:
+ zh_Hans: 指定模型必须输出的格式
+ en_US: specifying the format that the model must output
+ required: false
+ options:
+ - text
+ - json_object
+ - name: with_search_enhance
+ label:
+ zh_Hans: 搜索增强
+ en_US: Search Enhance
+ type: boolean
+ help:
+ zh_Hans: 允许模型自行进行外部搜索,以增强生成结果。
+ en_US: Allow the model to perform external search to enhance the generation results.
+ required: false
diff --git a/ai-provider/model-runtime/model-providers/baichuan/llm/baichuan4.yaml b/ai-provider/model-runtime/model-providers/baichuan/llm/baichuan4.yaml
new file mode 100644
index 00000000..6a1135e1
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/baichuan/llm/baichuan4.yaml
@@ -0,0 +1,53 @@
+model: baichuan4
+label:
+ en_US: Baichuan4
+model_type: llm
+features:
+ - agent-thought
+ - multi-tool-call
+model_properties:
+ mode: chat
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.3
+ - name: top_p
+ use_template: top_p
+ default: 0.85
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ min: 0
+ max: 20
+ default: 5
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens
+ use_template: max_tokens
+ default: 2048
+ - name: res_format
+ label:
+ zh_Hans: 回复格式
+ en_US: Response Format
+ type: string
+ help:
+ zh_Hans: 指定模型必须输出的格式
+ en_US: specifying the format that the model must output
+ required: false
+ options:
+ - text
+ - json_object
+ - name: with_search_enhance
+ label:
+ zh_Hans: 搜索增强
+ en_US: Search Enhance
+ type: boolean
+ help:
+ zh_Hans: 允许模型自行进行外部搜索,以增强生成结果。
+ en_US: Allow the model to perform external search to enhance the generation results.
+ required: false
diff --git a/ai-provider/model-runtime/model-providers/bedrock/assets/icon_l_en.svg b/ai-provider/model-runtime/model-providers/bedrock/assets/icon_l_en.svg
new file mode 100644
index 00000000..667db508
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/assets/icon_l_en.svg
@@ -0,0 +1,14 @@
+
diff --git a/ai-provider/model-runtime/model-providers/bedrock/assets/icon_s_en.svg b/ai-provider/model-runtime/model-providers/bedrock/assets/icon_s_en.svg
new file mode 100644
index 00000000..6a0235af
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/assets/icon_s_en.svg
@@ -0,0 +1,15 @@
+
diff --git a/ai-provider/model-runtime/model-providers/bedrock/bedrock.yaml b/ai-provider/model-runtime/model-providers/bedrock/bedrock.yaml
new file mode 100644
index 00000000..93e3efe9
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/bedrock.yaml
@@ -0,0 +1,90 @@
+provider: bedrock
+label:
+ en_US: AWS
+description:
+ en_US: AWS Bedrock's models.
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ en_US: icon_l_en.svg
+background: "#FCFDFF"
+help:
+ title:
+ en_US: Get your Access Key and Secret Access Key from AWS Console
+ url:
+ en_US: https://console.aws.amazon.com/
+supported_model_types:
+ - llm
+ - text-embedding
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: aws_access_key_id
+ required: false
+ label:
+ en_US: Access Key (If not provided, credentials are obtained from the running environment.)
+ zh_Hans: Access Key
+ type: secret-input
+ placeholder:
+ en_US: Enter your Access Key
+ zh_Hans: 在此输入您的 Access Key
+ - variable: aws_secret_access_key
+ required: false
+ label:
+ en_US: Secret Access Key
+ zh_Hans: Secret Access Key
+ type: secret-input
+ placeholder:
+ en_US: Enter your Secret Access Key
+ zh_Hans: 在此输入您的 Secret Access Key
+ - variable: aws_region
+ required: true
+ label:
+ en_US: AWS Region
+ zh_Hans: AWS 地区
+ type: select
+ default: us-east-1
+ options:
+ - value: us-east-1
+ label:
+ en_US: US East (N. Virginia)
+ zh_Hans: 美国东部 (弗吉尼亚北部)
+ - value: us-west-2
+ label:
+ en_US: US West (Oregon)
+ zh_Hans: 美国西部 (俄勒冈州)
+ - value: ap-southeast-1
+ label:
+ en_US: Asia Pacific (Singapore)
+ zh_Hans: 亚太地区 (新加坡)
+ - value: ap-northeast-1
+ label:
+ en_US: Asia Pacific (Tokyo)
+ zh_Hans: 亚太地区 (东京)
+ - value: eu-central-1
+ label:
+ en_US: Europe (Frankfurt)
+ zh_Hans: 欧洲 (法兰克福)
+ - value: eu-west-2
+ label:
+ en_US: Eu west London (London)
+ zh_Hans: 欧洲西部 (伦敦)
+ - value: us-gov-west-1
+ label:
+ en_US: AWS GovCloud (US-West)
+ zh_Hans: AWS GovCloud (US-West)
+ - value: ap-southeast-2
+ label:
+ en_US: Asia Pacific (Sydney)
+ zh_Hans: 亚太地区 (悉尼)
+ - variable: model_for_validation
+ required: false
+ label:
+ en_US: Available Model Name
+ zh_Hans: 可用模型名称
+ type: text-input
+ placeholder:
+ en_US: A model you have access to (e.g. amazon.titan-text-lite-v1) for validation.
+ zh_Hans: 为了进行验证,请输入一个您可用的模型名称 (例如:amazon.titan-text-lite-v1)
+address: https://api.openai.com
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/ai21.j2-mid-v1.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/ai21.j2-mid-v1.yaml
new file mode 100644
index 00000000..65dad029
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/ai21.j2-mid-v1.yaml
@@ -0,0 +1,47 @@
+model: ai21.j2-mid-v1
+label:
+ en_US: J2 Mid V1
+model_type: llm
+model_properties:
+ mode: completion
+ context_size: 8191
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: topP
+ use_template: top_p
+ - name: maxTokens
+ use_template: max_tokens
+ required: true
+ default: 2048
+ min: 1
+ max: 2048
+ - name: count_penalty
+ label:
+ en_US: Count Penalty
+ required: false
+ type: float
+ default: 0
+ min: 0
+ max: 1
+ - name: presence_penalty
+ label:
+ en_US: Presence Penalty
+ required: false
+ type: float
+ default: 0
+ min: 0
+ max: 5
+ - name: frequency_penalty
+ label:
+ en_US: Frequency Penalty
+ required: false
+ type: float
+ default: 0
+ min: 0
+ max: 500
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/ai21.j2-ultra-v1.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/ai21.j2-ultra-v1.yaml
new file mode 100644
index 00000000..b72f8064
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/ai21.j2-ultra-v1.yaml
@@ -0,0 +1,47 @@
+model: ai21.j2-ultra-v1
+label:
+ en_US: J2 Ultra V1
+model_type: llm
+model_properties:
+ mode: completion
+ context_size: 8191
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: topP
+ use_template: top_p
+ - name: maxTokens
+ use_template: max_tokens
+ required: true
+ default: 2048
+ min: 1
+ max: 2048
+ - name: count_penalty
+ label:
+ en_US: Count Penalty
+ required: false
+ type: float
+ default: 0
+ min: 0
+ max: 1
+ - name: presence_penalty
+ label:
+ en_US: Presence Penalty
+ required: false
+ type: float
+ default: 0
+ min: 0
+ max: 5
+ - name: frequency_penalty
+ label:
+ en_US: Frequency Penalty
+ required: false
+ type: float
+ default: 0
+ min: 0
+ max: 500
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/amazon.titan-text-express-v1.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/amazon.titan-text-express-v1.yaml
new file mode 100644
index 00000000..543c16d5
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/amazon.titan-text-express-v1.yaml
@@ -0,0 +1,23 @@
+model: amazon.titan-text-express-v1
+label:
+ en_US: Titan Text G1 - Express
+model_type: llm
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: topP
+ use_template: top_p
+ - name: maxTokenCount
+ use_template: max_tokens
+ required: true
+ default: 2048
+ min: 1
+ max: 8000
+pricing:
+ input: '0.0008'
+ output: '0.0016'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/amazon.titan-text-lite-v1.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/amazon.titan-text-lite-v1.yaml
new file mode 100644
index 00000000..2c6151c2
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/amazon.titan-text-lite-v1.yaml
@@ -0,0 +1,23 @@
+model: amazon.titan-text-lite-v1
+label:
+ en_US: Titan Text G1 - Lite
+model_type: llm
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: topP
+ use_template: top_p
+ - name: maxTokenCount
+ use_template: max_tokens
+ required: true
+ default: 2048
+ min: 1
+ max: 2048
+pricing:
+ input: '0.0003'
+ output: '0.0004'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/anthropic.claude-3-haiku-v1.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/anthropic.claude-3-haiku-v1.yaml
new file mode 100644
index 00000000..c2d5eb64
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/anthropic.claude-3-haiku-v1.yaml
@@ -0,0 +1,61 @@
+model: anthropic.claude-3-haiku-20240307-v1:0
+label:
+ en_US: Claude 3 Haiku
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 200000
+# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
+parameter_rules:
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ type: int
+ default: 4096
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+ en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+ # docs: https://docs.anthropic.com/claude/docs/system-prompts
+ - name: temperature
+ use_template: temperature
+ required: false
+ type: float
+ default: 1
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 生成内容的随机性。
+ en_US: The amount of randomness injected into the response.
+ - name: top_p
+ required: false
+ type: float
+ default: 0.999
+ min: 0.000
+ max: 1.000
+ help:
+ zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
+ en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+ - name: top_k
+ required: false
+ type: int
+ default: 0
+ min: 0
+ # tip docs from aws has error, max value is 500
+ max: 500
+ help:
+ zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+ en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.00025'
+ output: '0.00125'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/anthropic.claude-3-opus-v1.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/anthropic.claude-3-opus-v1.yaml
new file mode 100644
index 00000000..f90fa042
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/anthropic.claude-3-opus-v1.yaml
@@ -0,0 +1,61 @@
+model: anthropic.claude-3-opus-20240229-v1:0
+label:
+ en_US: Claude 3 Opus
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 200000
+# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
+parameter_rules:
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ type: int
+ default: 4096
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+ en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+ # docs: https://docs.anthropic.com/claude/docs/system-prompts
+ - name: temperature
+ use_template: temperature
+ required: false
+ type: float
+ default: 1
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 生成内容的随机性。
+ en_US: The amount of randomness injected into the response.
+ - name: top_p
+ required: false
+ type: float
+ default: 0.999
+ min: 0.000
+ max: 1.000
+ help:
+ zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
+ en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+ - name: top_k
+ required: false
+ type: int
+ default: 0
+ min: 0
+ # tip docs from aws has error, max value is 500
+ max: 500
+ help:
+ zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+ en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.015'
+ output: '0.075'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/anthropic.claude-3-sonnet-v1.5.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/anthropic.claude-3-sonnet-v1.5.yaml
new file mode 100644
index 00000000..dad0d6b6
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/anthropic.claude-3-sonnet-v1.5.yaml
@@ -0,0 +1,60 @@
+model: anthropic.claude-3-5-sonnet-20240620-v1:0
+label:
+ en_US: Claude 3.5 Sonnet
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 200000
+# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
+parameter_rules:
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ type: int
+ default: 4096
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+ en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+ - name: temperature
+ use_template: temperature
+ required: false
+ type: float
+ default: 1
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 生成内容的随机性。
+ en_US: The amount of randomness injected into the response.
+ - name: top_p
+ required: false
+ type: float
+ default: 0.999
+ min: 0.000
+ max: 1.000
+ help:
+ zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
+ en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+ - name: top_k
+ required: false
+ type: int
+ default: 0
+ min: 0
+ # tip docs from aws has error, max value is 500
+ max: 500
+ help:
+ zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+ en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.003'
+ output: '0.015'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/anthropic.claude-3-sonnet-v1.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/anthropic.claude-3-sonnet-v1.yaml
new file mode 100644
index 00000000..962def80
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/anthropic.claude-3-sonnet-v1.yaml
@@ -0,0 +1,60 @@
+model: anthropic.claude-3-sonnet-20240229-v1:0
+label:
+ en_US: Claude 3 Sonnet
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 200000
+# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
+parameter_rules:
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ type: int
+ default: 4096
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+ en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+ - name: temperature
+ use_template: temperature
+ required: false
+ type: float
+ default: 1
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 生成内容的随机性。
+ en_US: The amount of randomness injected into the response.
+ - name: top_p
+ required: false
+ type: float
+ default: 0.999
+ min: 0.000
+ max: 1.000
+ help:
+ zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
+ en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+ - name: top_k
+ required: false
+ type: int
+ default: 0
+ min: 0
+ # tip docs from aws has error, max value is 500
+ max: 500
+ help:
+ zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+ en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.003'
+ output: '0.015'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/anthropic.claude-instant-v1.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/anthropic.claude-instant-v1.yaml
new file mode 100644
index 00000000..8422f079
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/anthropic.claude-instant-v1.yaml
@@ -0,0 +1,52 @@
+model: anthropic.claude-instant-v1
+label:
+ en_US: Claude Instant 1
+model_type: llm
+model_properties:
+ mode: chat
+ context_size: 100000
+parameter_rules:
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ type: int
+ default: 4096
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+ en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+ - name: temperature
+ use_template: temperature
+ required: false
+ type: float
+ default: 1
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 生成内容的随机性。
+ en_US: The amount of randomness injected into the response.
+ - name: top_p
+ required: false
+ type: float
+ default: 0.999
+ min: 0.000
+ max: 1.000
+ help:
+ zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
+ en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+ - name: top_k
+ required: false
+ type: int
+ default: 0
+ min: 0
+ # tip docs from aws has error, max value is 500
+ max: 500
+ help:
+ zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+ en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+ input: '0.0008'
+ output: '0.0024'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/anthropic.claude-v1.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/anthropic.claude-v1.yaml
new file mode 100644
index 00000000..6a714b10
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/anthropic.claude-v1.yaml
@@ -0,0 +1,53 @@
+model: anthropic.claude-v1
+label:
+ en_US: Claude 1
+model_type: llm
+model_properties:
+ mode: chat
+ context_size: 100000
+parameter_rules:
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ type: int
+ default: 4096
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+ en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+ - name: temperature
+ use_template: temperature
+ required: false
+ type: float
+ default: 1
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 生成内容的随机性。
+ en_US: The amount of randomness injected into the response.
+ - name: top_p
+ required: false
+ type: float
+ default: 0.999
+ min: 0.000
+ max: 1.000
+ help:
+ zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
+ en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+ - name: top_k
+ required: false
+ type: int
+ default: 0
+ min: 0
+ # tip docs from aws has error, max value is 500
+ max: 500
+ help:
+ zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+ en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+ input: '0.008'
+ output: '0.024'
+ unit: '0.001'
+ currency: USD
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/anthropic.claude-v2.1.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/anthropic.claude-v2.1.yaml
new file mode 100644
index 00000000..70294e4a
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/anthropic.claude-v2.1.yaml
@@ -0,0 +1,54 @@
+model: anthropic.claude-v2:1
+label:
+ en_US: Claude 2.1
+model_type: llm
+model_properties:
+ mode: chat
+ context_size: 200000
+parameter_rules:
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ type: int
+ default: 4096
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+ en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+ - name: temperature
+ use_template: temperature
+ required: false
+ type: float
+ default: 1
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 生成内容的随机性。
+ en_US: The amount of randomness injected into the response.
+ - name: top_p
+ required: false
+ type: float
+ default: 0.999
+ min: 0.000
+ max: 1.000
+ help:
+ zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
+ en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+ - name: top_k
+ required: false
+ type: int
+ default: 0
+ min: 0
+ # tip docs from aws has error, max value is 500
+ max: 500
+ help:
+ zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+ en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.008'
+ output: '0.024'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/anthropic.claude-v2.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/anthropic.claude-v2.yaml
new file mode 100644
index 00000000..0a8ea61b
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/anthropic.claude-v2.yaml
@@ -0,0 +1,54 @@
+model: anthropic.claude-v2
+label:
+ en_US: Claude 2
+model_type: llm
+model_properties:
+ mode: chat
+ context_size: 100000
+parameter_rules:
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ type: int
+ default: 4096
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+ en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+ - name: temperature
+ use_template: temperature
+ required: false
+ type: float
+ default: 1
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 生成内容的随机性。
+ en_US: The amount of randomness injected into the response.
+ - name: top_p
+ required: false
+ type: float
+ default: 0.999
+ min: 0.000
+ max: 1.000
+ help:
+ zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
+ en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+ - name: top_k
+ required: false
+ type: int
+ default: 0
+ min: 0
+ # tip docs from aws has error, max value is 500
+ max: 500
+ help:
+ zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+ en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.008'
+ output: '0.024'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/cohere.command-light-text-v14.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/cohere.command-light-text-v14.yaml
new file mode 100644
index 00000000..74500095
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/cohere.command-light-text-v14.yaml
@@ -0,0 +1,35 @@
+model: cohere.command-light-text-v14
+label:
+ en_US: Command Light Text V14
+model_type: llm
+model_properties:
+ mode: completion
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: p
+ use_template: top_p
+ - name: k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ min: 0
+ max: 500
+ default: 0
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 4096
+ min: 1
+ max: 4096
+pricing:
+ input: '0.0003'
+ output: '0.0006'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/cohere.command-r-plus-v1.0.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/cohere.command-r-plus-v1.0.yaml
new file mode 100644
index 00000000..3c0bb4e8
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/cohere.command-r-plus-v1.0.yaml
@@ -0,0 +1,44 @@
+model: cohere.command-r-plus-v1:0
+label:
+ en_US: Command R+
+model_type: llm
+features:
+ - tool-call
+ #- stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ max: 5.0
+ - name: p
+ use_template: top_p
+ default: 0.75
+ min: 0.01
+ max: 0.99
+ - name: k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ default: 0
+ min: 0
+ max: 500
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ max: 4096
+pricing:
+ input: '3'
+ output: '15'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/cohere.command-r-v1.0.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/cohere.command-r-v1.0.yaml
new file mode 100644
index 00000000..a34f4831
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/cohere.command-r-v1.0.yaml
@@ -0,0 +1,43 @@
+model: cohere.command-r-v1:0
+label:
+ en_US: Command R
+model_type: llm
+features:
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ max: 5.0
+ - name: p
+ use_template: top_p
+ default: 0.75
+ min: 0.01
+ max: 0.99
+ - name: k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ default: 0
+ min: 0
+ max: 500
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ max: 4096
+pricing:
+ input: '0.5'
+ output: '1.5'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/cohere.command-text-v14.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/cohere.command-text-v14.yaml
new file mode 100644
index 00000000..6aea5be1
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/cohere.command-text-v14.yaml
@@ -0,0 +1,32 @@
+model: cohere.command-text-v14
+label:
+ en_US: Command Text V14
+model_type: llm
+model_properties:
+ mode: completion
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: p
+ use_template: top_p
+ - name: k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 4096
+ min: 1
+ max: 4096
+pricing:
+ input: '0.0015'
+ output: '0.0020'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/eu.anthropic.claude-3-haiku-v1.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/eu.anthropic.claude-3-haiku-v1.yaml
new file mode 100644
index 00000000..24a65ef1
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/eu.anthropic.claude-3-haiku-v1.yaml
@@ -0,0 +1,59 @@
+model: eu.anthropic.claude-3-haiku-20240307-v1:0
+label:
+ en_US: Claude 3 Haiku(EU.Cross Region Inference)
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 200000
+# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
+parameter_rules:
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ type: int
+ default: 4096
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+ en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+ # docs: https://docs.anthropic.com/claude/docs/system-prompts
+ - name: temperature
+ use_template: temperature
+ required: false
+ type: float
+ default: 1
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 生成内容的随机性。
+ en_US: The amount of randomness injected into the response.
+ - name: top_p
+ required: false
+ type: float
+ default: 0.999
+ min: 0.000
+ max: 1.000
+ help:
+ zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
+ en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+ - name: top_k
+ required: false
+ type: int
+ default: 0
+ min: 0
+ # tip docs from aws has error, max value is 500
+ max: 500
+ help:
+ zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+ en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+ input: '0.00025'
+ output: '0.00125'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/eu.anthropic.claude-3-sonnet-v1.5.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/eu.anthropic.claude-3-sonnet-v1.5.yaml
new file mode 100644
index 00000000..e3d25c7d
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/eu.anthropic.claude-3-sonnet-v1.5.yaml
@@ -0,0 +1,58 @@
+model: eu.anthropic.claude-3-5-sonnet-20240620-v1:0
+label:
+ en_US: Claude 3.5 Sonnet(EU.Cross Region Inference)
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 200000
+# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
+parameter_rules:
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ type: int
+ default: 4096
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+ en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+ - name: temperature
+ use_template: temperature
+ required: false
+ type: float
+ default: 1
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 生成内容的随机性。
+ en_US: The amount of randomness injected into the response.
+ - name: top_p
+ required: false
+ type: float
+ default: 0.999
+ min: 0.000
+ max: 1.000
+ help:
+ zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
+ en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+ - name: top_k
+ required: false
+ type: int
+ default: 0
+ min: 0
+ # tip docs from aws has error, max value is 500
+ max: 500
+ help:
+ zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+ en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+ input: '0.003'
+ output: '0.015'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/eu.anthropic.claude-3-sonnet-v1.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/eu.anthropic.claude-3-sonnet-v1.yaml
new file mode 100644
index 00000000..9a06a4ad
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/eu.anthropic.claude-3-sonnet-v1.yaml
@@ -0,0 +1,58 @@
+model: eu.anthropic.claude-3-sonnet-20240229-v1:0
+label:
+ en_US: Claude 3 Sonnet(EU.Cross Region Inference)
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 200000
+# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
+parameter_rules:
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ type: int
+ default: 4096
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+ en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+ - name: temperature
+ use_template: temperature
+ required: false
+ type: float
+ default: 1
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 生成内容的随机性。
+ en_US: The amount of randomness injected into the response.
+ - name: top_p
+ required: false
+ type: float
+ default: 0.999
+ min: 0.000
+ max: 1.000
+ help:
+ zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
+ en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+ - name: top_k
+ required: false
+ type: int
+ default: 0
+ min: 0
+ # tip docs from aws has error, max value is 500
+ max: 500
+ help:
+ zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+ en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+ input: '0.003'
+ output: '0.015'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/meta.llama2-13b-chat-v1.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/meta.llama2-13b-chat-v1.yaml
new file mode 100644
index 00000000..a8d3704c
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/meta.llama2-13b-chat-v1.yaml
@@ -0,0 +1,23 @@
+model: meta.llama2-13b-chat-v1
+label:
+ en_US: Llama 2 Chat 13B
+model_type: llm
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_gen_len
+ use_template: max_tokens
+ required: true
+ default: 2048
+ min: 1
+ max: 2048
+pricing:
+ input: '0.00075'
+ output: '0.00100'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/meta.llama2-70b-chat-v1.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/meta.llama2-70b-chat-v1.yaml
new file mode 100644
index 00000000..77525e63
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/meta.llama2-70b-chat-v1.yaml
@@ -0,0 +1,23 @@
+model: meta.llama2-70b-chat-v1
+label:
+ en_US: Llama 2 Chat 70B
+model_type: llm
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_gen_len
+ use_template: max_tokens
+ required: true
+ default: 2048
+ min: 1
+ max: 2048
+pricing:
+ input: '0.00195'
+ output: '0.00256'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/meta.llama3-1-405b-instruct-v1.0.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/meta.llama3-1-405b-instruct-v1.0.yaml
new file mode 100644
index 00000000..401de65f
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/meta.llama3-1-405b-instruct-v1.0.yaml
@@ -0,0 +1,25 @@
+model: meta.llama3-1-405b-instruct-v1:0
+label:
+ en_US: Llama 3.1 405B Instruct
+model_type: llm
+model_properties:
+ mode: completion
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.5
+ - name: top_p
+ use_template: top_p
+ default: 0.9
+ - name: max_gen_len
+ use_template: max_tokens
+ required: true
+ default: 512
+ min: 1
+ max: 2048
+pricing:
+ input: '0.00532'
+ output: '0.016'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/meta.llama3-1-70b-instruct-v1.0.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/meta.llama3-1-70b-instruct-v1.0.yaml
new file mode 100644
index 00000000..10bfa7b1
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/meta.llama3-1-70b-instruct-v1.0.yaml
@@ -0,0 +1,25 @@
+model: meta.llama3-1-70b-instruct-v1:0
+label:
+ en_US: Llama 3.1 Instruct 70B
+model_type: llm
+model_properties:
+ mode: completion
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.5
+ - name: top_p
+ use_template: top_p
+ default: 0.9
+ - name: max_gen_len
+ use_template: max_tokens
+ required: true
+ default: 512
+ min: 1
+ max: 2048
+pricing:
+ input: '0.00265'
+ output: '0.0035'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/meta.llama3-1-8b-instruct-v1.0.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/meta.llama3-1-8b-instruct-v1.0.yaml
new file mode 100644
index 00000000..81cd5324
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/meta.llama3-1-8b-instruct-v1.0.yaml
@@ -0,0 +1,25 @@
+model: meta.llama3-1-8b-instruct-v1:0
+label:
+ en_US: Llama 3.1 Instruct 8B
+model_type: llm
+model_properties:
+ mode: completion
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.5
+ - name: top_p
+ use_template: top_p
+ default: 0.9
+ - name: max_gen_len
+ use_template: max_tokens
+ required: true
+ default: 512
+ min: 1
+ max: 2048
+pricing:
+ input: '0.0003'
+ output: '0.0006'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/meta.llama3-70b-instruct-v1.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/meta.llama3-70b-instruct-v1.yaml
new file mode 100644
index 00000000..20466269
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/meta.llama3-70b-instruct-v1.yaml
@@ -0,0 +1,23 @@
+model: meta.llama3-70b-instruct-v1:0
+label:
+ en_US: Llama 3 Instruct 70B
+model_type: llm
+model_properties:
+ mode: completion
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_gen_len
+ use_template: max_tokens
+ required: true
+ default: 512
+ min: 1
+ max: 2048
+pricing:
+ input: '0.00265'
+ output: '0.0035'
+ unit: '0.00001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/meta.llama3-8b-instruct-v1.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/meta.llama3-8b-instruct-v1.yaml
new file mode 100644
index 00000000..dd4f666a
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/meta.llama3-8b-instruct-v1.yaml
@@ -0,0 +1,23 @@
+model: meta.llama3-8b-instruct-v1:0
+label:
+ en_US: Llama 3 Instruct 8B
+model_type: llm
+model_properties:
+ mode: completion
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_gen_len
+ use_template: max_tokens
+ required: true
+ default: 512
+ min: 1
+ max: 2048
+pricing:
+ input: '0.0004'
+ output: '0.0006'
+ unit: '0.0001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/mistral.mistral-7b-instruct-v0.2.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/mistral.mistral-7b-instruct-v0.2.yaml
new file mode 100644
index 00000000..175c14da
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/mistral.mistral-7b-instruct-v0.2.yaml
@@ -0,0 +1,39 @@
+model: mistral.mistral-7b-instruct-v0:2
+label:
+ en_US: Mistral 7B Instruct
+model_type: llm
+model_properties:
+ mode: completion
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ required: false
+ default: 0.5
+ - name: top_p
+ use_template: top_p
+ required: false
+ default: 0.9
+ - name: top_k
+ use_template: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ default: 50
+ max: 200
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 512
+ min: 1
+ max: 8192
+pricing:
+ input: '0.00015'
+ output: '0.0002'
+ unit: '0.00001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/mistral.mistral-large-2402-v1.0.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/mistral.mistral-large-2402-v1.0.yaml
new file mode 100644
index 00000000..65eed592
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/mistral.mistral-large-2402-v1.0.yaml
@@ -0,0 +1,30 @@
+model: mistral.mistral-large-2402-v1:0
+label:
+ en_US: Mistral Large
+model_type: llm
+features:
+ - tool-call
+ - agent-thought
+model_properties:
+ mode: completion
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ required: false
+ default: 0.7
+ - name: top_p
+ use_template: top_p
+ required: false
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 512
+ min: 1
+ max: 4096
+pricing:
+ input: '0.008'
+ output: '0.024'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/mistral.mistral-large-2407-v1.0.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/mistral.mistral-large-2407-v1.0.yaml
new file mode 100644
index 00000000..19d7843a
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/mistral.mistral-large-2407-v1.0.yaml
@@ -0,0 +1,29 @@
+model: mistral.mistral-large-2407-v1:0
+label:
+ en_US: Mistral Large 2 (24.07)
+model_type: llm
+features:
+ - tool-call
+model_properties:
+ mode: completion
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ required: false
+ default: 0.7
+ - name: top_p
+ use_template: top_p
+ required: false
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 512
+ min: 1
+ max: 8192
+pricing:
+ input: '0.003'
+ output: '0.009'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/mistral.mistral-small-2402-v1.0.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/mistral.mistral-small-2402-v1.0.yaml
new file mode 100644
index 00000000..b97c2a94
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/mistral.mistral-small-2402-v1.0.yaml
@@ -0,0 +1,29 @@
+model: mistral.mistral-small-2402-v1:0
+label:
+ en_US: Mistral Small
+model_type: llm
+features:
+ - tool-call
+model_properties:
+ mode: completion
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ required: false
+ default: 0.7
+ - name: top_p
+ use_template: top_p
+ required: false
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 512
+ min: 1
+ max: 4096
+pricing:
+ input: '0.001'
+ output: '0.03'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/mistral.mixtral-8x7b-instruct-v0.1.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/mistral.mixtral-8x7b-instruct-v0.1.yaml
new file mode 100644
index 00000000..03ec7edd
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/mistral.mixtral-8x7b-instruct-v0.1.yaml
@@ -0,0 +1,39 @@
+model: mistral.mixtral-8x7b-instruct-v0:1
+label:
+ en_US: Mixtral 8X7B Instruct
+model_type: llm
+model_properties:
+ mode: completion
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ required: false
+ default: 0.5
+ - name: top_p
+ use_template: top_p
+ required: false
+ default: 0.9
+ - name: top_k
+ use_template: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ default: 50
+ max: 200
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 512
+ min: 1
+ max: 8192
+pricing:
+ input: '0.00045'
+ output: '0.0007'
+ unit: '0.00001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/us.anthropic.claude-3-haiku-v1.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/us.anthropic.claude-3-haiku-v1.yaml
new file mode 100644
index 00000000..9247f469
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/us.anthropic.claude-3-haiku-v1.yaml
@@ -0,0 +1,59 @@
+model: us.anthropic.claude-3-haiku-20240307-v1:0
+label:
+ en_US: Claude 3 Haiku(US.Cross Region Inference)
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 200000
+# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
+parameter_rules:
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ type: int
+ default: 4096
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+ en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+ # docs: https://docs.anthropic.com/claude/docs/system-prompts
+ - name: temperature
+ use_template: temperature
+ required: false
+ type: float
+ default: 1
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 生成内容的随机性。
+ en_US: The amount of randomness injected into the response.
+ - name: top_p
+ required: false
+ type: float
+ default: 0.999
+ min: 0.000
+ max: 1.000
+ help:
+ zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
+ en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+ - name: top_k
+ required: false
+ type: int
+ default: 0
+ min: 0
+ # tip docs from aws has error, max value is 500
+ max: 500
+ help:
+ zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+ en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+ input: '0.00025'
+ output: '0.00125'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/us.anthropic.claude-3-opus-v1.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/us.anthropic.claude-3-opus-v1.yaml
new file mode 100644
index 00000000..f9854d51
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/us.anthropic.claude-3-opus-v1.yaml
@@ -0,0 +1,59 @@
+model: us.anthropic.claude-3-opus-20240229-v1:0
+label:
+ en_US: Claude 3 Opus(US.Cross Region Inference)
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 200000
+# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
+parameter_rules:
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ type: int
+ default: 4096
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+ en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+ # docs: https://docs.anthropic.com/claude/docs/system-prompts
+ - name: temperature
+ use_template: temperature
+ required: false
+ type: float
+ default: 1
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 生成内容的随机性。
+ en_US: The amount of randomness injected into the response.
+ - name: top_p
+ required: false
+ type: float
+ default: 0.999
+ min: 0.000
+ max: 1.000
+ help:
+ zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
+ en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+ - name: top_k
+ required: false
+ type: int
+ default: 0
+ min: 0
+ # tip docs from aws has error, max value is 500
+ max: 500
+ help:
+ zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+ en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+ input: '0.015'
+ output: '0.075'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/us.anthropic.claude-3-sonnet-v1.5.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/us.anthropic.claude-3-sonnet-v1.5.yaml
new file mode 100644
index 00000000..fbcab2d5
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/us.anthropic.claude-3-sonnet-v1.5.yaml
@@ -0,0 +1,58 @@
+model: us.anthropic.claude-3-5-sonnet-20240620-v1:0
+label:
+ en_US: Claude 3.5 Sonnet(US.Cross Region Inference)
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 200000
+# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
+parameter_rules:
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ type: int
+ default: 4096
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+ en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+ - name: temperature
+ use_template: temperature
+ required: false
+ type: float
+ default: 1
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 生成内容的随机性。
+ en_US: The amount of randomness injected into the response.
+ - name: top_p
+ required: false
+ type: float
+ default: 0.999
+ min: 0.000
+ max: 1.000
+ help:
+ zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
+ en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+ - name: top_k
+ required: false
+ type: int
+ default: 0
+ min: 0
+ # tip docs from aws has error, max value is 500
+ max: 500
+ help:
+ zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+ en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+ input: '0.003'
+ output: '0.015'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/bedrock/llm/us.anthropic.claude-3-sonnet-v1.yaml b/ai-provider/model-runtime/model-providers/bedrock/llm/us.anthropic.claude-3-sonnet-v1.yaml
new file mode 100644
index 00000000..9f5a1501
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/bedrock/llm/us.anthropic.claude-3-sonnet-v1.yaml
@@ -0,0 +1,58 @@
+model: us.anthropic.claude-3-sonnet-20240229-v1:0
+label:
+ en_US: Claude 3 Sonnet(US.Cross Region Inference)
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 200000
+# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
+parameter_rules:
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ type: int
+ default: 4096
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+ en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+ - name: temperature
+ use_template: temperature
+ required: false
+ type: float
+ default: 1
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 生成内容的随机性。
+ en_US: The amount of randomness injected into the response.
+ - name: top_p
+ required: false
+ type: float
+ default: 0.999
+ min: 0.000
+ max: 1.000
+ help:
+ zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
+ en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+ - name: top_k
+ required: false
+ type: int
+ default: 0
+ min: 0
+ # tip docs from aws has error, max value is 500
+ max: 500
+ help:
+ zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+ en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+ input: '0.003'
+ output: '0.015'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/chatglm/assets/icon_l_en.svg b/ai-provider/model-runtime/model-providers/chatglm/assets/icon_l_en.svg
new file mode 100644
index 00000000..a824d43d
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/chatglm/assets/icon_l_en.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/chatglm/assets/icon_s_en.svg b/ai-provider/model-runtime/model-providers/chatglm/assets/icon_s_en.svg
new file mode 100644
index 00000000..466b4fce
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/chatglm/assets/icon_s_en.svg
@@ -0,0 +1,9 @@
+
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/chatglm/chatglm.yaml b/ai-provider/model-runtime/model-providers/chatglm/chatglm.yaml
new file mode 100644
index 00000000..715febcd
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/chatglm/chatglm.yaml
@@ -0,0 +1,29 @@
+provider: chatglm
+label:
+ en_US: ChatGLM
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ en_US: icon_l_en.svg
+background: "#F4F7FF"
+help:
+ title:
+ en_US: Deploy ChatGLM to your local
+ zh_Hans: 部署您的本地 ChatGLM
+ url:
+ en_US: https://github.com/THUDM/ChatGLM3
+supported_model_types:
+ - llm
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: api_base
+ label:
+ en_US: API URL
+ type: text-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API URL
+ en_US: Enter your API URL
+address: https://api.openai.com
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/chatglm/llm/chatglm2-6b-32k.yaml b/ai-provider/model-runtime/model-providers/chatglm/llm/chatglm2-6b-32k.yaml
new file mode 100644
index 00000000..d1075d74
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/chatglm/llm/chatglm2-6b-32k.yaml
@@ -0,0 +1,21 @@
+model: chatglm2-6b-32k
+label:
+ en_US: ChatGLM2-6B-32K
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ required: false
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 2000
+ min: 1
+ max: 32000
diff --git a/ai-provider/model-runtime/model-providers/chatglm/llm/chatglm2-6b.yaml b/ai-provider/model-runtime/model-providers/chatglm/llm/chatglm2-6b.yaml
new file mode 100644
index 00000000..e3cfeb90
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/chatglm/llm/chatglm2-6b.yaml
@@ -0,0 +1,21 @@
+model: chatglm2-6b
+label:
+ en_US: ChatGLM2-6B
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 2000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ required: false
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 256
+ min: 1
+ max: 2000
diff --git a/ai-provider/model-runtime/model-providers/chatglm/llm/chatglm3-6b-32k.yaml b/ai-provider/model-runtime/model-providers/chatglm/llm/chatglm3-6b-32k.yaml
new file mode 100644
index 00000000..6f347435
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/chatglm/llm/chatglm3-6b-32k.yaml
@@ -0,0 +1,22 @@
+model: chatglm3-6b-32k
+label:
+ en_US: ChatGLM3-6B-32K
+model_type: llm
+features:
+ - tool-call
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ required: false
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 8000
+ min: 1
+ max: 32000
diff --git a/ai-provider/model-runtime/model-providers/chatglm/llm/chatglm3-6b.yaml b/ai-provider/model-runtime/model-providers/chatglm/llm/chatglm3-6b.yaml
new file mode 100644
index 00000000..d6d87e2e
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/chatglm/llm/chatglm3-6b.yaml
@@ -0,0 +1,22 @@
+model: chatglm3-6b
+label:
+ en_US: ChatGLM3-6B
+model_type: llm
+features:
+ - tool-call
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ required: false
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 256
+ min: 1
+ max: 8000
diff --git a/ai-provider/model-runtime/model-providers/cohere/assets/icon_l_en.svg b/ai-provider/model-runtime/model-providers/cohere/assets/icon_l_en.svg
new file mode 100644
index 00000000..9c176896
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/cohere/assets/icon_l_en.svg
@@ -0,0 +1,11 @@
+
diff --git a/ai-provider/model-runtime/model-providers/cohere/assets/icon_s_en.svg b/ai-provider/model-runtime/model-providers/cohere/assets/icon_s_en.svg
new file mode 100644
index 00000000..28fe96d3
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/cohere/assets/icon_s_en.svg
@@ -0,0 +1,16 @@
+
diff --git a/ai-provider/model-runtime/model-providers/cohere/cohere.yaml b/ai-provider/model-runtime/model-providers/cohere/cohere.yaml
new file mode 100644
index 00000000..51c01fb6
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/cohere/cohere.yaml
@@ -0,0 +1,92 @@
+provider: cohere
+label:
+ zh_Hans: Cohere
+ en_US: Cohere
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ en_US: icon_l_en.svg
+background: "#ECE9E3"
+help:
+ title:
+ en_US: Get your API key from cohere
+ zh_Hans: 从 cohere 获取 API Key
+ url:
+ en_US: https://dashboard.cohere.com/api-keys
+supported_model_types:
+ - llm
+ - text-embedding
+ - rerank
+configurate_methods:
+ - predefined-model
+ - customizable-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: api_key
+ label:
+ zh_Hans: API Key
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
+ show_on: [ ]
+ - variable: base_url
+ label:
+ zh_Hans: API Base
+ en_US: API Base
+ type: text-input
+ required: false
+ placeholder:
+ zh_Hans: 在此输入您的 API Base,如 https://api.cohere.ai/v1
+ en_US: Enter your API Base, e.g. https://api.cohere.ai/v1
+model_credential_schema:
+ model:
+ label:
+ en_US: Model Name
+ zh_Hans: 模型名称
+ placeholder:
+ en_US: Enter your model name
+ zh_Hans: 输入模型名称
+ credential_form_schemas:
+ - variable: mode
+ show_on:
+ - variable: __model_type
+ value: llm
+ label:
+ en_US: Completion mode
+ type: select
+ required: false
+ default: chat
+ placeholder:
+ zh_Hans: 选择对话类型
+ en_US: Select completion mode
+ options:
+ - value: completion
+ label:
+ en_US: Completion
+ zh_Hans: 补全
+ - value: chat
+ label:
+ en_US: Chat
+ zh_Hans: 对话
+ - variable: api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
+ - variable: base_url
+ label:
+ zh_Hans: API Base
+ en_US: API Base
+ type: text-input
+ required: false
+ placeholder:
+ zh_Hans: 在此输入您的 API Base,如 https://api.cohere.ai/v1
+ en_US: Enter your API Base, e.g. https://api.cohere.ai/v1
+
+address: https://api.openai.com
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/cohere/llm/command-chat.yaml b/ai-provider/model-runtime/model-providers/cohere/llm/command-chat.yaml
new file mode 100644
index 00000000..5f233f35
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/cohere/llm/command-chat.yaml
@@ -0,0 +1,62 @@
+model: command-chat
+label:
+ zh_Hans: command-chat
+ en_US: command-chat
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ max: 5.0
+ - name: p
+ use_template: top_p
+ default: 0.75
+ min: 0.01
+ max: 0.99
+ - name: k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ default: 0
+ min: 0
+ max: 500
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ max: 4096
+ - name: preamble_override
+ label:
+ zh_Hans: 前导文本
+ en_US: Preamble
+ type: string
+ help:
+ zh_Hans: 当指定时,将使用提供的前导文本替换默认的 Cohere 前导文本。
+ en_US: When specified, the default Cohere preamble will be replaced with the provided one.
+ required: false
+ - name: prompt_truncation
+ label:
+ zh_Hans: 提示截断
+ en_US: Prompt Truncation
+ type: string
+ help:
+ zh_Hans: 指定如何构造 Prompt。当 prompt_truncation 设置为 "AUTO" 时,将会丢弃一些来自聊天记录的元素,以尝试构造一个符合模型上下文长度限制的 Prompt。
+ en_US: Dictates how the prompt will be constructed. With prompt_truncation set to "AUTO", some elements from chat histories will be dropped in an attempt to construct a prompt that fits within the model's context length limit.
+ required: true
+ default: 'AUTO'
+ options:
+ - 'AUTO'
+ - 'OFF'
+pricing:
+ input: '1.0'
+ output: '2.0'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/cohere/llm/command-light-chat.yaml b/ai-provider/model-runtime/model-providers/cohere/llm/command-light-chat.yaml
new file mode 100644
index 00000000..b5f00487
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/cohere/llm/command-light-chat.yaml
@@ -0,0 +1,62 @@
+model: command-light-chat
+label:
+ zh_Hans: command-light-chat
+ en_US: command-light-chat
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ max: 5.0
+ - name: p
+ use_template: top_p
+ default: 0.75
+ min: 0.01
+ max: 0.99
+ - name: k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ default: 0
+ min: 0
+ max: 500
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ max: 4096
+ - name: preamble_override
+ label:
+ zh_Hans: 前导文本
+ en_US: Preamble
+ type: string
+ help:
+ zh_Hans: 当指定时,将使用提供的前导文本替换默认的 Cohere 前导文本。
+ en_US: When specified, the default Cohere preamble will be replaced with the provided one.
+ required: false
+ - name: prompt_truncation
+ label:
+ zh_Hans: 提示截断
+ en_US: Prompt Truncation
+ type: string
+ help:
+ zh_Hans: 指定如何构造 Prompt。当 prompt_truncation 设置为 "AUTO" 时,将会丢弃一些来自聊天记录的元素,以尝试构造一个符合模型上下文长度限制的 Prompt。
+ en_US: Dictates how the prompt will be constructed. With prompt_truncation set to "AUTO", some elements from chat histories will be dropped in an attempt to construct a prompt that fits within the model's context length limit.
+ required: true
+ default: 'AUTO'
+ options:
+ - 'AUTO'
+ - 'OFF'
+pricing:
+ input: '0.3'
+ output: '0.6'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/cohere/llm/command-light-nightly-chat.yaml b/ai-provider/model-runtime/model-providers/cohere/llm/command-light-nightly-chat.yaml
new file mode 100644
index 00000000..1c96b240
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/cohere/llm/command-light-nightly-chat.yaml
@@ -0,0 +1,62 @@
+model: command-light-nightly-chat
+label:
+ zh_Hans: command-light-nightly-chat
+ en_US: command-light-nightly-chat
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ max: 5.0
+ - name: p
+ use_template: top_p
+ default: 0.75
+ min: 0.01
+ max: 0.99
+ - name: k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ default: 0
+ min: 0
+ max: 500
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ max: 4096
+ - name: preamble_override
+ label:
+ zh_Hans: 前导文本
+ en_US: Preamble
+ type: string
+ help:
+ zh_Hans: 当指定时,将使用提供的前导文本替换默认的 Cohere 前导文本。
+ en_US: When specified, the default Cohere preamble will be replaced with the provided one.
+ required: false
+ - name: prompt_truncation
+ label:
+ zh_Hans: 提示截断
+ en_US: Prompt Truncation
+ type: string
+ help:
+ zh_Hans: 指定如何构造 Prompt。当 prompt_truncation 设置为 "AUTO" 时,将会丢弃一些来自聊天记录的元素,以尝试构造一个符合模型上下文长度限制的 Prompt。
+ en_US: Dictates how the prompt will be constructed. With prompt_truncation set to "AUTO", some elements from chat histories will be dropped in an attempt to construct a prompt that fits within the model's context length limit.
+ required: true
+ default: 'AUTO'
+ options:
+ - 'AUTO'
+ - 'OFF'
+pricing:
+ input: '0.3'
+ output: '0.6'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/cohere/llm/command-light-nightly.yaml b/ai-provider/model-runtime/model-providers/cohere/llm/command-light-nightly.yaml
new file mode 100644
index 00000000..4616f766
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/cohere/llm/command-light-nightly.yaml
@@ -0,0 +1,44 @@
+model: command-light-nightly
+label:
+ zh_Hans: command-light-nightly
+ en_US: command-light-nightly
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: completion
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ max: 5.0
+ - name: p
+ use_template: top_p
+ default: 0.75
+ min: 0.01
+ max: 0.99
+ - name: k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ default: 0
+ min: 0
+ max: 500
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ max: 4096
+pricing:
+ input: '0.3'
+ output: '0.6'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/cohere/llm/command-light.yaml b/ai-provider/model-runtime/model-providers/cohere/llm/command-light.yaml
new file mode 100644
index 00000000..161756b3
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/cohere/llm/command-light.yaml
@@ -0,0 +1,44 @@
+model: command-light
+label:
+ zh_Hans: command-light
+ en_US: command-light
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: completion
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ max: 5.0
+ - name: p
+ use_template: top_p
+ default: 0.75
+ min: 0.01
+ max: 0.99
+ - name: k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ default: 0
+ min: 0
+ max: 500
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ max: 4096
+pricing:
+ input: '0.3'
+ output: '0.6'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/cohere/llm/command-nightly-chat.yaml b/ai-provider/model-runtime/model-providers/cohere/llm/command-nightly-chat.yaml
new file mode 100644
index 00000000..739e09e7
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/cohere/llm/command-nightly-chat.yaml
@@ -0,0 +1,62 @@
+model: command-nightly-chat
+label:
+ zh_Hans: command-nightly-chat
+ en_US: command-nightly-chat
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ max: 5.0
+ - name: p
+ use_template: top_p
+ default: 0.75
+ min: 0.01
+ max: 0.99
+ - name: k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ default: 0
+ min: 0
+ max: 500
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ max: 4096
+ - name: preamble_override
+ label:
+ zh_Hans: 前导文本
+ en_US: Preamble
+ type: string
+ help:
+ zh_Hans: 当指定时,将使用提供的前导文本替换默认的 Cohere 前导文本。
+ en_US: When specified, the default Cohere preamble will be replaced with the provided one.
+ required: false
+ - name: prompt_truncation
+ label:
+ zh_Hans: 提示截断
+ en_US: Prompt Truncation
+ type: string
+ help:
+ zh_Hans: 指定如何构造 Prompt。当 prompt_truncation 设置为 "AUTO" 时,将会丢弃一些来自聊天记录的元素,以尝试构造一个符合模型上下文长度限制的 Prompt。
+ en_US: Dictates how the prompt will be constructed. With prompt_truncation set to "AUTO", some elements from chat histories will be dropped in an attempt to construct a prompt that fits within the model's context length limit.
+ required: true
+ default: 'AUTO'
+ options:
+ - 'AUTO'
+ - 'OFF'
+pricing:
+ input: '1.0'
+ output: '2.0'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/cohere/llm/command-nightly.yaml b/ai-provider/model-runtime/model-providers/cohere/llm/command-nightly.yaml
new file mode 100644
index 00000000..1e025e40
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/cohere/llm/command-nightly.yaml
@@ -0,0 +1,44 @@
+model: command-nightly
+label:
+ zh_Hans: command-nightly
+ en_US: command-nightly
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: completion
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ max: 5.0
+ - name: p
+ use_template: top_p
+ default: 0.75
+ min: 0.01
+ max: 0.99
+ - name: k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ default: 0
+ min: 0
+ max: 500
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ max: 4096
+pricing:
+ input: '1.0'
+ output: '2.0'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/cohere/llm/command-r-plus.yaml b/ai-provider/model-runtime/model-providers/cohere/llm/command-r-plus.yaml
new file mode 100644
index 00000000..617e6853
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/cohere/llm/command-r-plus.yaml
@@ -0,0 +1,45 @@
+model: command-r-plus
+label:
+ en_US: command-r-plus
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ max: 5.0
+ - name: p
+ use_template: top_p
+ default: 0.75
+ min: 0.01
+ max: 0.99
+ - name: k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ default: 0
+ min: 0
+ max: 500
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ max: 4096
+pricing:
+ input: '3'
+ output: '15'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/cohere/llm/command-r.yaml b/ai-provider/model-runtime/model-providers/cohere/llm/command-r.yaml
new file mode 100644
index 00000000..c3668044
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/cohere/llm/command-r.yaml
@@ -0,0 +1,45 @@
+model: command-r
+label:
+ en_US: command-r
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ max: 5.0
+ - name: p
+ use_template: top_p
+ default: 0.75
+ min: 0.01
+ max: 0.99
+ - name: k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ default: 0
+ min: 0
+ max: 500
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ max: 4096
+pricing:
+ input: '0.5'
+ output: '1.5'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/cohere/llm/command.yaml b/ai-provider/model-runtime/model-providers/cohere/llm/command.yaml
new file mode 100644
index 00000000..0cac7c35
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/cohere/llm/command.yaml
@@ -0,0 +1,44 @@
+model: command
+label:
+ zh_Hans: command
+ en_US: command
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: completion
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ max: 5.0
+ - name: p
+ use_template: top_p
+ default: 0.75
+ min: 0.01
+ max: 0.99
+ - name: k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ default: 0
+ min: 0
+ max: 500
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ max: 4096
+pricing:
+ input: '1.0'
+ output: '2.0'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/deepseek/assets/icon_l_en.svg b/ai-provider/model-runtime/model-providers/deepseek/assets/icon_l_en.svg
new file mode 100644
index 00000000..42549440
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/deepseek/assets/icon_l_en.svg
@@ -0,0 +1,22 @@
+
diff --git a/ai-provider/model-runtime/model-providers/deepseek/assets/icon_s_en.svg b/ai-provider/model-runtime/model-providers/deepseek/assets/icon_s_en.svg
new file mode 100644
index 00000000..aa854a75
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/deepseek/assets/icon_s_en.svg
@@ -0,0 +1,3 @@
+
diff --git a/ai-provider/model-runtime/model-providers/deepseek/deepseek.yaml b/ai-provider/model-runtime/model-providers/deepseek/deepseek.yaml
new file mode 100644
index 00000000..b77f2c7b
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/deepseek/deepseek.yaml
@@ -0,0 +1,42 @@
+provider: deepseek
+label:
+ en_US: deepseek
+ zh_Hans: 深度求索
+description:
+ en_US: Models provided by deepseek, such as deepseek-chat、deepseek-coder.
+ zh_Hans: 深度求索提供的模型,例如 deepseek-chat、deepseek-coder 。
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ en_US: icon_l_en.svg
+background: "#c0cdff"
+help:
+ title:
+ en_US: Get your API Key from deepseek
+ zh_Hans: 从深度求索获取 API Key
+ url:
+ en_US: https://platform.deepseek.com/api_keys
+supported_model_types:
+ - llm
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
+ - variable: endpoint_url
+ label:
+ zh_Hans: 自定义 API endpoint 地址
+ en_US: Custom API endpoint URL
+ type: text-input
+ required: false
+ placeholder:
+ zh_Hans: Base URL, e.g. https://api.deepseek.com/v1 or https://api.deepseek.com
+ en_US: Base URL, e.g. https://api.deepseek.com/v1 or https://api.deepseek.com
+address: https://api.openai.com
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/deepseek/llm/deepseek-chat.yaml b/ai-provider/model-runtime/model-providers/deepseek/llm/deepseek-chat.yaml
new file mode 100644
index 00000000..4973ac8a
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/deepseek/llm/deepseek-chat.yaml
@@ -0,0 +1,78 @@
+model: deepseek-chat
+label:
+ zh_Hans: deepseek-chat
+ en_US: deepseek-chat
+model_type: llm
+features:
+ - agent-thought
+ - multi-tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 1
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 控制生成结果的多样性和随机性。数值越小,越严谨;数值越大,越发散。
+ en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 4096
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 1
+ min: 0.01
+ max: 1.00
+ help:
+ zh_Hans: 控制生成结果的随机性。数值越小,随机性越弱;数值越大,随机性越强。一般而言,top_p 和 temperature 两个参数选择一个进行调整即可。
+ en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
+ - name: logprobs
+ help:
+ zh_Hans: 是否返回所输出 token 的对数概率。如果为 true,则在 message 的 content 中返回每个输出 token 的对数概率。
+ en_US: Whether to return the log probability of the output token. If true, returns the log probability of each output token in the content of message .
+ type: boolean
+ - name: top_logprobs
+ type: int
+ default: 0
+ min: 0
+ max: 20
+ help:
+ zh_Hans: 一个介于 0 到 20 之间的整数 N,指定每个输出位置返回输出概率 top N 的 token,且返回这些 token 的对数概率。指定此参数时,logprobs 必须为 true。
+ en_US: An integer N between 0 and 20, specifying that each output position returns the top N tokens with output probability, and returns the logarithmic probability of these tokens. When specifying this parameter, logprobs must be true.
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ default: 0
+ min: -2.0
+ max: 2.0
+ help:
+ zh_Hans: 介于 -2.0 和 2.0 之间的数字。如果该值为正,那么新 token 会根据其在已有文本中的出现频率受到相应的惩罚,降低模型重复相同内容的可能性。
+ en_US: A number between -2.0 and 2.0. If the value is positive, new tokens are penalized based on their frequency of occurrence in existing text, reducing the likelihood that the model will repeat the same content.
+ - name: response_format
+ label:
+ zh_Hans: 回复格式
+ en_US: Response Format
+ type: string
+ help:
+ zh_Hans: 指定模型必须输出的格式
+ en_US: specifying the format that the model must output
+ required: false
+ options:
+ - text
+ - json_object
+pricing:
+ input: '1'
+ output: '2'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/deepseek/llm/deepseek-coder.yaml b/ai-provider/model-runtime/model-providers/deepseek/llm/deepseek-coder.yaml
new file mode 100644
index 00000000..caafeada
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/deepseek/llm/deepseek-coder.yaml
@@ -0,0 +1,28 @@
+model: deepseek-coder
+label:
+ zh_Hans: deepseek-coder
+ en_US: deepseek-coder
+model_type: llm
+features:
+ - agent-thought
+ - multi-tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 1
+ default: 0.5
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 4096
+ default: 1024
diff --git a/ai-provider/model-runtime/model-providers/fireworks/assets/icon_l_en.svg b/ai-provider/model-runtime/model-providers/fireworks/assets/icon_l_en.svg
new file mode 100644
index 00000000..582605cc
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/fireworks/assets/icon_l_en.svg
@@ -0,0 +1,3 @@
+
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/fireworks/assets/icon_s_en.svg b/ai-provider/model-runtime/model-providers/fireworks/assets/icon_s_en.svg
new file mode 100644
index 00000000..86eeba66
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/fireworks/assets/icon_s_en.svg
@@ -0,0 +1,5 @@
+
diff --git a/ai-provider/model-runtime/model-providers/fireworks/fireworks.yaml b/ai-provider/model-runtime/model-providers/fireworks/fireworks.yaml
new file mode 100644
index 00000000..fcfba8e4
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/fireworks/fireworks.yaml
@@ -0,0 +1,31 @@
+provider: fireworks
+label:
+ zh_Hans: Fireworks AI
+ en_US: Fireworks AI
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ en_US: icon_l_en.svg
+background: "#FCFDFF"
+help:
+ title:
+ en_US: Get your API Key from Fireworks AI
+ zh_Hans: 从 Fireworks AI 获取 API Key
+ url:
+ en_US: https://fireworks.ai/account/api-keys
+supported_model_types:
+ - llm
+ - text-embedding
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: fireworks_api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
+address: https://api.openai.com
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/fireworks/llm/firefunction-v1.yaml b/ai-provider/model-runtime/model-providers/fireworks/llm/firefunction-v1.yaml
new file mode 100644
index 00000000..f6bac128
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/fireworks/llm/firefunction-v1.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/firefunction-v1
+label:
+ zh_Hans: Firefunction V1
+ en_US: Firefunction V1
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.5'
+ output: '0.5'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/fireworks/llm/firefunction-v2.yaml b/ai-provider/model-runtime/model-providers/fireworks/llm/firefunction-v2.yaml
new file mode 100644
index 00000000..2979cb46
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/fireworks/llm/firefunction-v2.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/firefunction-v2
+label:
+ zh_Hans: Firefunction V2
+ en_US: Firefunction V2
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.9'
+ output: '0.9'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/fireworks/llm/gemma2-9b-it.yaml b/ai-provider/model-runtime/model-providers/fireworks/llm/gemma2-9b-it.yaml
new file mode 100644
index 00000000..ee41a7e2
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/fireworks/llm/gemma2-9b-it.yaml
@@ -0,0 +1,45 @@
+model: accounts/fireworks/models/gemma2-9b-it
+label:
+ zh_Hans: Gemma2 9B Instruct
+ en_US: Gemma2 9B Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.2'
+ output: '0.2'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/fireworks/llm/llama-v3-70b-instruct-hf.yaml b/ai-provider/model-runtime/model-providers/fireworks/llm/llama-v3-70b-instruct-hf.yaml
new file mode 100644
index 00000000..2ae89b88
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/fireworks/llm/llama-v3-70b-instruct-hf.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3-70b-instruct-hf
+label:
+ zh_Hans: Llama3 70B Instruct(HF version)
+ en_US: Llama3 70B Instruct(HF version)
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.9'
+ output: '0.9'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/fireworks/llm/llama-v3-70b-instruct.yaml b/ai-provider/model-runtime/model-providers/fireworks/llm/llama-v3-70b-instruct.yaml
new file mode 100644
index 00000000..7c24b08c
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/fireworks/llm/llama-v3-70b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3-70b-instruct
+label:
+ zh_Hans: Llama3 70B Instruct
+ en_US: Llama3 70B Instruct
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.9'
+ output: '0.9'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/fireworks/llm/llama-v3-8b-instruct-hf.yaml b/ai-provider/model-runtime/model-providers/fireworks/llm/llama-v3-8b-instruct-hf.yaml
new file mode 100644
index 00000000..83507ef3
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/fireworks/llm/llama-v3-8b-instruct-hf.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3-8b-instruct-hf
+label:
+ zh_Hans: Llama3 8B Instruct(HF version)
+ en_US: Llama3 8B Instruct(HF version)
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.2'
+ output: '0.2'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/fireworks/llm/llama-v3-8b-instruct.yaml b/ai-provider/model-runtime/model-providers/fireworks/llm/llama-v3-8b-instruct.yaml
new file mode 100644
index 00000000..d8ac9537
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/fireworks/llm/llama-v3-8b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3-8b-instruct
+label:
+ zh_Hans: Llama3 8B Instruct
+ en_US: Llama3 8B Instruct
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.2'
+ output: '0.2'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/fireworks/llm/llama-v3p1-405b-instruct.yaml b/ai-provider/model-runtime/model-providers/fireworks/llm/llama-v3p1-405b-instruct.yaml
new file mode 100644
index 00000000..c4ddb3e9
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/fireworks/llm/llama-v3p1-405b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p1-405b-instruct
+label:
+ zh_Hans: Llama3.1 405B Instruct
+ en_US: Llama3.1 405B Instruct
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '3'
+ output: '3'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/fireworks/llm/llama-v3p1-70b-instruct.yaml b/ai-provider/model-runtime/model-providers/fireworks/llm/llama-v3p1-70b-instruct.yaml
new file mode 100644
index 00000000..62f84f87
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/fireworks/llm/llama-v3p1-70b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p1-70b-instruct
+label:
+ zh_Hans: Llama3.1 70B Instruct
+ en_US: Llama3.1 70B Instruct
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.2'
+ output: '0.2'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/fireworks/llm/llama-v3p1-8b-instruct.yaml b/ai-provider/model-runtime/model-providers/fireworks/llm/llama-v3p1-8b-instruct.yaml
new file mode 100644
index 00000000..9bb99c91
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/fireworks/llm/llama-v3p1-8b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p1-8b-instruct
+label:
+ zh_Hans: Llama3.1 8B Instruct
+ en_US: Llama3.1 8B Instruct
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.2'
+ output: '0.2'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/fireworks/llm/mixtral-8x22b-instruct.yaml b/ai-provider/model-runtime/model-providers/fireworks/llm/mixtral-8x22b-instruct.yaml
new file mode 100644
index 00000000..87d977e2
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/fireworks/llm/mixtral-8x22b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/mixtral-8x22b-instruct
+label:
+ zh_Hans: Mixtral MoE 8x22B Instruct
+ en_US: Mixtral MoE 8x22B Instruct
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 65536
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '1.2'
+ output: '1.2'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/fireworks/llm/mixtral-8x7b-instruct-hf.yaml b/ai-provider/model-runtime/model-providers/fireworks/llm/mixtral-8x7b-instruct-hf.yaml
new file mode 100644
index 00000000..e3d5a908
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/fireworks/llm/mixtral-8x7b-instruct-hf.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/mixtral-8x7b-instruct-hf
+label:
+ zh_Hans: Mixtral MoE 8x7B Instruct(HF version)
+ en_US: Mixtral MoE 8x7B Instruct(HF version)
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.5'
+ output: '0.5'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/fireworks/llm/mixtral-8x7b-instruct.yaml b/ai-provider/model-runtime/model-providers/fireworks/llm/mixtral-8x7b-instruct.yaml
new file mode 100644
index 00000000..45f632ce
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/fireworks/llm/mixtral-8x7b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/mixtral-8x7b-instruct
+label:
+ zh_Hans: Mixtral MoE 8x7B Instruct
+ en_US: Mixtral MoE 8x7B Instruct
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.5'
+ output: '0.5'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/fireworks/llm/mythomax-l2-13b.yaml b/ai-provider/model-runtime/model-providers/fireworks/llm/mythomax-l2-13b.yaml
new file mode 100644
index 00000000..9c3486ba
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/fireworks/llm/mythomax-l2-13b.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/mythomax-l2-13b
+label:
+ zh_Hans: MythoMax L2 13b
+ en_US: MythoMax L2 13b
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.2'
+ output: '0.2'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/fireworks/llm/phi-3-vision-128k-instruct.yaml b/ai-provider/model-runtime/model-providers/fireworks/llm/phi-3-vision-128k-instruct.yaml
new file mode 100644
index 00000000..e399f2ed
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/fireworks/llm/phi-3-vision-128k-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/phi-3-vision-128k-instruct
+label:
+ zh_Hans: Phi3.5 Vision Instruct
+ en_US: Phi3.5 Vision Instruct
+model_type: llm
+features:
+ - agent-thought
+ - vision
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.2'
+ output: '0.2'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/fireworks/llm/yi-large.yaml b/ai-provider/model-runtime/model-providers/fireworks/llm/yi-large.yaml
new file mode 100644
index 00000000..bb4b6f99
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/fireworks/llm/yi-large.yaml
@@ -0,0 +1,45 @@
+model: accounts/yi-01-ai/models/yi-large
+label:
+ zh_Hans: Yi-Large
+ en_US: Yi-Large
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '3'
+ output: '3'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/google/google.yaml b/ai-provider/model-runtime/model-providers/google/google.yaml
index 030dbe2b..1d45610f 100644
--- a/ai-provider/model-runtime/model-providers/google/google.yaml
+++ b/ai-provider/model-runtime/model-providers/google/google.yaml
@@ -29,4 +29,13 @@ provider_credential_schema:
placeholder:
zh_Hans: 在此输入您的 API Key
en_US: Enter your API Key
-address: https://api.openai.com
\ No newline at end of file
+ - variable: google_api_base
+ label:
+ zh_Hans: API Base
+ en_US: API Base
+ type: text-input
+ required: false
+ placeholder:
+ zh_Hans: 在此输入您的 API Base, 如:https://api.google.com
+ en_US: Enter your API Base, e.g. https://api.google.com
+address: https://generativelanguage.googleapis.com
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/groq/assets/icon_l_en.svg b/ai-provider/model-runtime/model-providers/groq/assets/icon_l_en.svg
new file mode 100644
index 00000000..2505a5f4
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/groq/assets/icon_l_en.svg
@@ -0,0 +1,11 @@
+
diff --git a/ai-provider/model-runtime/model-providers/groq/assets/icon_s_en.svg b/ai-provider/model-runtime/model-providers/groq/assets/icon_s_en.svg
new file mode 100644
index 00000000..087f37e4
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/groq/assets/icon_s_en.svg
@@ -0,0 +1,4 @@
+
diff --git a/ai-provider/model-runtime/model-providers/groq/groq.yaml b/ai-provider/model-runtime/model-providers/groq/groq.yaml
new file mode 100644
index 00000000..3c707267
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/groq/groq.yaml
@@ -0,0 +1,33 @@
+provider: groq
+label:
+ zh_Hans: GroqCloud
+ en_US: GroqCloud
+description:
+ en_US: GroqCloud provides access to the Groq Cloud API, which hosts models like LLama2 and Mixtral.
+ zh_Hans: GroqCloud 提供对 Groq Cloud API 的访问,其中托管了 LLama2 和 Mixtral 等模型。
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ en_US: icon_l_en.svg
+background: "#F5F5F4"
+help:
+ title:
+ en_US: Get your API Key from GroqCloud
+ zh_Hans: 从 GroqCloud 获取 API Key
+ url:
+ en_US: https://console.groq.com/
+supported_model_types:
+ - llm
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
+address: https://api.openai.com
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/groq/llm/llama-3.1-405b-reasoning.yaml b/ai-provider/model-runtime/model-providers/groq/llm/llama-3.1-405b-reasoning.yaml
new file mode 100644
index 00000000..217785ce
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/groq/llm/llama-3.1-405b-reasoning.yaml
@@ -0,0 +1,25 @@
+model: llama-3.1-405b-reasoning
+label:
+ zh_Hans: Llama-3.1-405b-reasoning
+ en_US: Llama-3.1-405b-reasoning
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 8192
+pricing:
+ input: '0.05'
+ output: '0.1'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/groq/llm/llama-3.1-70b-versatile.yaml b/ai-provider/model-runtime/model-providers/groq/llm/llama-3.1-70b-versatile.yaml
new file mode 100644
index 00000000..ab5f6ab0
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/groq/llm/llama-3.1-70b-versatile.yaml
@@ -0,0 +1,25 @@
+model: llama-3.1-70b-versatile
+label:
+ zh_Hans: Llama-3.1-70b-versatile
+ en_US: Llama-3.1-70b-versatile
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 8192
+pricing:
+ input: '0.05'
+ output: '0.1'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/groq/llm/llama-3.1-8b-instant.yaml b/ai-provider/model-runtime/model-providers/groq/llm/llama-3.1-8b-instant.yaml
new file mode 100644
index 00000000..a82e6453
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/groq/llm/llama-3.1-8b-instant.yaml
@@ -0,0 +1,25 @@
+model: llama-3.1-8b-instant
+label:
+ zh_Hans: Llama-3.1-8b-instant
+ en_US: Llama-3.1-8b-instant
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 8192
+pricing:
+ input: '0.05'
+ output: '0.1'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/groq/llm/llama2-70b-4096.yaml b/ai-provider/model-runtime/model-providers/groq/llm/llama2-70b-4096.yaml
new file mode 100644
index 00000000..384912b0
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/groq/llm/llama2-70b-4096.yaml
@@ -0,0 +1,25 @@
+model: llama2-70b-4096
+label:
+ zh_Hans: Llama-2-70B-4096
+ en_US: Llama-2-70B-4096
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 4096
+pricing:
+ input: '0.7'
+ output: '0.8'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/groq/llm/llama3-70b-8192.yaml b/ai-provider/model-runtime/model-providers/groq/llm/llama3-70b-8192.yaml
new file mode 100644
index 00000000..91d0e307
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/groq/llm/llama3-70b-8192.yaml
@@ -0,0 +1,25 @@
+model: llama3-70b-8192
+label:
+ zh_Hans: Llama-3-70B-8192
+ en_US: Llama-3-70B-8192
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 8192
+pricing:
+ input: '0.59'
+ output: '0.79'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/groq/llm/llama3-8b-8192.yaml b/ai-provider/model-runtime/model-providers/groq/llm/llama3-8b-8192.yaml
new file mode 100644
index 00000000..b6154f76
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/groq/llm/llama3-8b-8192.yaml
@@ -0,0 +1,25 @@
+model: llama3-8b-8192
+label:
+ zh_Hans: Llama-3-8B-8192
+ en_US: Llama-3-8B-8192
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 8192
+pricing:
+ input: '0.05'
+ output: '0.08'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/groq/llm/mixtral-8x7b-instruct-v0.1.yaml b/ai-provider/model-runtime/model-providers/groq/llm/mixtral-8x7b-instruct-v0.1.yaml
new file mode 100644
index 00000000..0dc6678f
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/groq/llm/mixtral-8x7b-instruct-v0.1.yaml
@@ -0,0 +1,25 @@
+model: mixtral-8x7b-32768
+label:
+ zh_Hans: Mixtral-8x7b-Instruct-v0.1
+ en_US: Mixtral-8x7b-Instruct-v0.1
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 20480
+pricing:
+ input: '0.27'
+ output: '0.27'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/hunyuan/assets/icon_l_en.svg b/ai-provider/model-runtime/model-providers/hunyuan/assets/icon_l_en.svg
new file mode 100644
index 00000000..7d32aa9a
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/hunyuan/assets/icon_l_en.svg
@@ -0,0 +1,992 @@
+
+
+
diff --git a/ai-provider/model-runtime/model-providers/hunyuan/assets/icon_s_en.svg b/ai-provider/model-runtime/model-providers/hunyuan/assets/icon_s_en.svg
new file mode 100644
index 00000000..c07fe0ac
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/hunyuan/assets/icon_s_en.svg
@@ -0,0 +1,909 @@
+
+
+
diff --git a/ai-provider/model-runtime/model-providers/hunyuan/hunyuan.yaml b/ai-provider/model-runtime/model-providers/hunyuan/hunyuan.yaml
new file mode 100644
index 00000000..ef3773cc
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/hunyuan/hunyuan.yaml
@@ -0,0 +1,42 @@
+provider: hunyuan
+label:
+ zh_Hans: 腾讯混元
+ en_US: Hunyuan
+description:
+ en_US: Models provided by Tencent Hunyuan, such as hunyuan-standard, hunyuan-standard-256k, hunyuan-pro and hunyuan-lite.
+ zh_Hans: 腾讯混元提供的模型,例如 hunyuan-standard、 hunyuan-standard-256k, hunyuan-pro 和 hunyuan-lite。
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ en_US: icon_l_en.svg
+background: "#F6F7F7"
+help:
+ title:
+ en_US: Get your API Key from Tencent Hunyuan
+ zh_Hans: 从腾讯混元获取 API Key
+ url:
+ en_US: https://console.cloud.tencent.com/cam/capi
+supported_model_types:
+ - llm
+ - text-embedding
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: secret_id
+ label:
+ en_US: Secret ID
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 Secret ID
+ en_US: Enter your Secret ID
+ - variable: secret_key
+ label:
+ en_US: Secret Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 Secret Key
+ en_US: Enter your Secret Key
+address: https://api.openai.com
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/hunyuan/llm/hunyuan-lite.yaml b/ai-provider/model-runtime/model-providers/hunyuan/llm/hunyuan-lite.yaml
new file mode 100644
index 00000000..4f5a5dfb
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/hunyuan/llm/hunyuan-lite.yaml
@@ -0,0 +1,28 @@
+model: hunyuan-lite
+label:
+ zh_Hans: hunyuan-lite
+ en_US: hunyuan-lite
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+ - multi-tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 256000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 256000
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/hunyuan/llm/hunyuan-pro.yaml b/ai-provider/model-runtime/model-providers/hunyuan/llm/hunyuan-pro.yaml
new file mode 100644
index 00000000..b173ffbe
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/hunyuan/llm/hunyuan-pro.yaml
@@ -0,0 +1,38 @@
+model: hunyuan-pro
+label:
+ zh_Hans: hunyuan-pro
+ en_US: hunyuan-pro
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+ - multi-tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 32000
+ - name: enable_enhance
+ label:
+ zh_Hans: 功能增强
+ en_US: Enable Enhancement
+ type: boolean
+ help:
+ zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
+ en_US: Allow the model to perform external search to enhance the generation results.
+ required: false
+ default: true
+pricing:
+ input: '0.03'
+ output: '0.10'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/hunyuan/llm/hunyuan-standard-256k.yaml b/ai-provider/model-runtime/model-providers/hunyuan/llm/hunyuan-standard-256k.yaml
new file mode 100644
index 00000000..1f94a862
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/hunyuan/llm/hunyuan-standard-256k.yaml
@@ -0,0 +1,38 @@
+model: hunyuan-standard-256k
+label:
+ zh_Hans: hunyuan-standard-256k
+ en_US: hunyuan-standard-256k
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+ - multi-tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 256000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 256000
+ - name: enable_enhance
+ label:
+ zh_Hans: 功能增强
+ en_US: Enable Enhancement
+ type: boolean
+ help:
+ zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
+ en_US: Allow the model to perform external search to enhance the generation results.
+ required: false
+ default: true
+pricing:
+ input: '0.015'
+ output: '0.06'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/hunyuan/llm/hunyuan-standard.yaml b/ai-provider/model-runtime/model-providers/hunyuan/llm/hunyuan-standard.yaml
new file mode 100644
index 00000000..1db25930
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/hunyuan/llm/hunyuan-standard.yaml
@@ -0,0 +1,38 @@
+model: hunyuan-standard
+label:
+ zh_Hans: hunyuan-standard
+ en_US: hunyuan-standard
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+ - multi-tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 32000
+ - name: enable_enhance
+ label:
+ zh_Hans: 功能增强
+ en_US: Enable Enhancement
+ type: boolean
+ help:
+ zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
+ en_US: Allow the model to perform external search to enhance the generation results.
+ required: false
+ default: true
+pricing:
+ input: '0.0045'
+ output: '0.0005'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/hunyuan/llm/hunyuan-turbo.yaml b/ai-provider/model-runtime/model-providers/hunyuan/llm/hunyuan-turbo.yaml
new file mode 100644
index 00000000..4837fed4
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/hunyuan/llm/hunyuan-turbo.yaml
@@ -0,0 +1,38 @@
+model: hunyuan-turbo
+label:
+ zh_Hans: hunyuan-turbo
+ en_US: hunyuan-turbo
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+ - multi-tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 32000
+ - name: enable_enhance
+ label:
+ zh_Hans: 功能增强
+ en_US: Enable Enhancement
+ type: boolean
+ help:
+ zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
+ en_US: Allow the model to perform external search to enhance the generation results.
+ required: false
+ default: true
+pricing:
+ input: '0.015'
+ output: '0.05'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/hunyuan/llm/hunyuan-vision.yaml b/ai-provider/model-runtime/model-providers/hunyuan/llm/hunyuan-vision.yaml
new file mode 100644
index 00000000..9edc7f47
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/hunyuan/llm/hunyuan-vision.yaml
@@ -0,0 +1,39 @@
+model: hunyuan-vision
+label:
+ zh_Hans: hunyuan-vision
+ en_US: hunyuan-vision
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+ - multi-tool-call
+ - stream-tool-call
+ - vision
+model_properties:
+ mode: chat
+ context_size: 8000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 8000
+ - name: enable_enhance
+ label:
+ zh_Hans: 功能增强
+ en_US: Enable Enhancement
+ type: boolean
+ help:
+ zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
+ en_US: Allow the model to perform external search to enhance the generation results.
+ required: false
+ default: true
+pricing:
+ input: '0.018'
+ output: '0.018'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/leptonai/assets/icon_l_en.svg b/ai-provider/model-runtime/model-providers/leptonai/assets/icon_l_en.svg
new file mode 100644
index 00000000..a8cc1bdc
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/leptonai/assets/icon_l_en.svg
@@ -0,0 +1,9219 @@
+
+
+
diff --git a/ai-provider/model-runtime/model-providers/leptonai/assets/icon_s_en.svg b/ai-provider/model-runtime/model-providers/leptonai/assets/icon_s_en.svg
new file mode 100644
index 00000000..6e24e9e5
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/leptonai/assets/icon_s_en.svg
@@ -0,0 +1,1080 @@
+
+
+
diff --git a/ai-provider/model-runtime/model-providers/leptonai/leptonai.yaml b/ai-provider/model-runtime/model-providers/leptonai/leptonai.yaml
new file mode 100644
index 00000000..ce485d74
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/leptonai/leptonai.yaml
@@ -0,0 +1,30 @@
+provider: leptonai
+label:
+ zh_Hans: Lepton AI
+ en_US: Lepton AI
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ en_US: icon_l_en.svg
+background: "#F5F5F4"
+help:
+ title:
+ en_US: Get your API Key from Lepton AI
+ zh_Hans: 从 Lepton AI 获取 API Key
+ url:
+ en_US: https://dashboard.lepton.ai
+supported_model_types:
+ - llm
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
+address: https://api.openai.com
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/leptonai/llm/gemma-7b.yaml b/ai-provider/model-runtime/model-providers/leptonai/llm/gemma-7b.yaml
new file mode 100644
index 00000000..2d69067a
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/leptonai/llm/gemma-7b.yaml
@@ -0,0 +1,20 @@
+model: gemma-7b
+label:
+ zh_Hans: gemma-7b
+ en_US: gemma-7b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 1024
diff --git a/ai-provider/model-runtime/model-providers/leptonai/llm/llama2-13b.yaml b/ai-provider/model-runtime/model-providers/leptonai/llm/llama2-13b.yaml
new file mode 100644
index 00000000..307f1ea8
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/leptonai/llm/llama2-13b.yaml
@@ -0,0 +1,20 @@
+model: llama2-13b
+label:
+ zh_Hans: llama2-13b
+ en_US: llama2-13b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 1024
diff --git a/ai-provider/model-runtime/model-providers/leptonai/llm/llama2-7b.yaml b/ai-provider/model-runtime/model-providers/leptonai/llm/llama2-7b.yaml
new file mode 100644
index 00000000..bd471e59
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/leptonai/llm/llama2-7b.yaml
@@ -0,0 +1,20 @@
+model: llama2-7b
+label:
+ zh_Hans: llama2-7b
+ en_US: llama2-7b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 1024
diff --git a/ai-provider/model-runtime/model-providers/leptonai/llm/llama3-70b.yaml b/ai-provider/model-runtime/model-providers/leptonai/llm/llama3-70b.yaml
new file mode 100644
index 00000000..9c20eb6c
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/leptonai/llm/llama3-70b.yaml
@@ -0,0 +1,20 @@
+model: llama3-70b
+label:
+ zh_Hans: llama3-70b
+ en_US: llama3-70b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 1024
diff --git a/ai-provider/model-runtime/model-providers/leptonai/llm/mistral-7b.yaml b/ai-provider/model-runtime/model-providers/leptonai/llm/mistral-7b.yaml
new file mode 100644
index 00000000..f2b46ff9
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/leptonai/llm/mistral-7b.yaml
@@ -0,0 +1,20 @@
+model: mistral-7b
+label:
+ zh_Hans: mistral-7b
+ en_US: mistral-7b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 1024
diff --git a/ai-provider/model-runtime/model-providers/leptonai/llm/mixtral-8x7b.yaml b/ai-provider/model-runtime/model-providers/leptonai/llm/mixtral-8x7b.yaml
new file mode 100644
index 00000000..de788ac2
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/leptonai/llm/mixtral-8x7b.yaml
@@ -0,0 +1,20 @@
+model: mixtral-8x7b
+label:
+ zh_Hans: mixtral-8x7b
+ en_US: mixtral-8x7b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 1024
diff --git a/ai-provider/model-runtime/model-providers/minimax/assets/icon_l_en.svg b/ai-provider/model-runtime/model-providers/minimax/assets/icon_l_en.svg
new file mode 100644
index 00000000..abf67ea4
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/minimax/assets/icon_l_en.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/minimax/assets/icon_s_en.svg b/ai-provider/model-runtime/model-providers/minimax/assets/icon_s_en.svg
new file mode 100644
index 00000000..7de6a2ad
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/minimax/assets/icon_s_en.svg
@@ -0,0 +1,41 @@
+
+
+
diff --git a/ai-provider/model-runtime/model-providers/minimax/llm/abab5-chat.yaml b/ai-provider/model-runtime/model-providers/minimax/llm/abab5-chat.yaml
new file mode 100644
index 00000000..2c1f79e2
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/minimax/llm/abab5-chat.yaml
@@ -0,0 +1,38 @@
+model: abab5-chat
+label:
+ en_US: Abab5-Chat
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 6144
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 6144
+ min: 1
+ max: 6144
+ - name: mask_sensitive_info
+ type: boolean
+ default: true
+ label:
+ zh_Hans: 隐私保护
+ en_US: Moderate
+ help:
+ zh_Hans: 对输出中易涉及隐私问题的文本信息进行打码,目前包括但不限于邮箱、域名、链接、证件号、家庭住址等,默认true,即开启打码
+ en_US: Mask the sensitive info of the generated content, such as email/domain/link/address/phone/id..
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '0.015'
+ output: '0.015'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/minimax/llm/abab5.5-chat.yaml b/ai-provider/model-runtime/model-providers/minimax/llm/abab5.5-chat.yaml
new file mode 100644
index 00000000..6d29be0d
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/minimax/llm/abab5.5-chat.yaml
@@ -0,0 +1,53 @@
+model: abab5.5-chat
+label:
+ en_US: Abab5.5-Chat
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 16384
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0.01
+ max: 1
+ default: 0.9
+ - name: top_p
+ use_template: top_p
+ min: 0.01
+ max: 1
+ default: 0.95
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 6144
+ min: 1
+ max: 16384
+ - name: mask_sensitive_info
+ type: boolean
+ default: true
+ label:
+ zh_Hans: 隐私保护
+ en_US: Moderate
+ help:
+ zh_Hans: 对输出中易涉及隐私问题的文本信息进行打码,目前包括但不限于邮箱、域名、链接、证件号、家庭住址等,默认true,即开启打码
+ en_US: Mask the sensitive info of the generated content, such as email/domain/link/address/phone/id..
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: plugin_web_search
+ required: false
+ default: false
+ type: boolean
+ label:
+ en_US: Enable Web Search
+ zh_Hans: 开启网页搜索
+pricing:
+ input: '0.015'
+ output: '0.015'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/minimax/llm/abab5.5s-chat.yaml b/ai-provider/model-runtime/model-providers/minimax/llm/abab5.5s-chat.yaml
new file mode 100644
index 00000000..aa42bb57
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/minimax/llm/abab5.5s-chat.yaml
@@ -0,0 +1,44 @@
+model: abab5.5s-chat
+label:
+ en_US: Abab5.5s-Chat
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0.01
+ max: 1
+ default: 0.9
+ - name: top_p
+ use_template: top_p
+ min: 0.01
+ max: 1
+ default: 0.95
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 3072
+ min: 1
+ max: 8192
+ - name: mask_sensitive_info
+ type: boolean
+ default: true
+ label:
+ zh_Hans: 隐私保护
+ en_US: Moderate
+ help:
+ zh_Hans: 对输出中易涉及隐私问题的文本信息进行打码,目前包括但不限于邮箱、域名、链接、证件号、家庭住址等,默认true,即开启打码
+ en_US: Mask the sensitive info of the generated content, such as email/domain/link/address/phone/id..
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '0.005'
+ output: '0.005'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/minimax/llm/abab6-chat.yaml b/ai-provider/model-runtime/model-providers/minimax/llm/abab6-chat.yaml
new file mode 100644
index 00000000..9188b6b5
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/minimax/llm/abab6-chat.yaml
@@ -0,0 +1,46 @@
+model: abab6-chat
+label:
+ en_US: Abab6-Chat
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0.01
+ max: 1
+ default: 0.1
+ - name: top_p
+ use_template: top_p
+ min: 0.01
+ max: 1
+ default: 0.9
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 2048
+ min: 1
+ max: 32768
+ - name: mask_sensitive_info
+ type: boolean
+ default: true
+ label:
+ zh_Hans: 隐私保护
+ en_US: Moderate
+ help:
+ zh_Hans: 对输出中易涉及隐私问题的文本信息进行打码,目前包括但不限于邮箱、域名、链接、证件号、家庭住址等,默认true,即开启打码
+ en_US: Mask the sensitive info of the generated content, such as email/domain/link/address/phone/id..
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '0.1'
+ output: '0.1'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/minimax/llm/abab6.5-chat.yaml b/ai-provider/model-runtime/model-providers/minimax/llm/abab6.5-chat.yaml
new file mode 100644
index 00000000..5d717d5f
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/minimax/llm/abab6.5-chat.yaml
@@ -0,0 +1,46 @@
+model: abab6.5-chat
+label:
+ en_US: Abab6.5-Chat
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0.01
+ max: 1
+ default: 0.1
+ - name: top_p
+ use_template: top_p
+ min: 0.01
+ max: 1
+ default: 0.95
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 2048
+ min: 1
+ max: 8192
+ - name: mask_sensitive_info
+ type: boolean
+ default: true
+ label:
+ zh_Hans: 隐私保护
+ en_US: Moderate
+ help:
+ zh_Hans: 对输出中易涉及隐私问题的文本信息进行打码,目前包括但不限于邮箱、域名、链接、证件号、家庭住址等,默认true,即开启打码
+ en_US: Mask the sensitive info of the generated content, such as email/domain/link/address/phone/id..
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '0.03'
+ output: '0.03'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/minimax/llm/abab6.5s-chat.yaml b/ai-provider/model-runtime/model-providers/minimax/llm/abab6.5s-chat.yaml
new file mode 100644
index 00000000..4631fe67
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/minimax/llm/abab6.5s-chat.yaml
@@ -0,0 +1,46 @@
+model: abab6.5s-chat
+label:
+ en_US: Abab6.5s-Chat
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 245760
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0.01
+ max: 1
+ default: 0.1
+ - name: top_p
+ use_template: top_p
+ min: 0.01
+ max: 1
+ default: 0.95
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 2048
+ min: 1
+ max: 245760
+ - name: mask_sensitive_info
+ type: boolean
+ default: true
+ label:
+ zh_Hans: 隐私保护
+ en_US: Moderate
+ help:
+ zh_Hans: 对输出中易涉及隐私问题的文本信息进行打码,目前包括但不限于邮箱、域名、链接、证件号、家庭住址等,默认true,即开启打码
+ en_US: Mask the sensitive info of the generated content, such as email/domain/link/address/phone/id..
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '0.01'
+ output: '0.01'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/minimax/minimax.yaml b/ai-provider/model-runtime/model-providers/minimax/minimax.yaml
new file mode 100644
index 00000000..c78ab330
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/minimax/minimax.yaml
@@ -0,0 +1,38 @@
+provider: minimax
+label:
+ en_US: Minimax
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ en_US: icon_l_en.svg
+background: "#FFEFEF"
+help:
+ title:
+ en_US: Get your API Key from Minimax
+ zh_Hans: 从 Minimax 获取您的 API Key
+ url:
+ en_US: https://api.minimax.chat/user-center/basic-information/interface-key
+supported_model_types:
+ - llm
+ - text-embedding
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: minimax_api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
+ - variable: minimax_group_id
+ label:
+ en_US: Group ID
+ type: text-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 Group ID
+ en_US: Enter your group ID
+address: https://api.openai.com
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/mistralai/assets/icon_l_en.svg b/ai-provider/model-runtime/model-providers/mistralai/assets/icon_l_en.svg
new file mode 100644
index 00000000..0fc9afdb
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/mistralai/assets/icon_l_en.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/mistralai/assets/icon_s_en.svg b/ai-provider/model-runtime/model-providers/mistralai/assets/icon_s_en.svg
new file mode 100644
index 00000000..7a3feba5
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/mistralai/assets/icon_s_en.svg
@@ -0,0 +1,116 @@
+
+
+
diff --git a/ai-provider/model-runtime/model-providers/mistralai/llm/codestral-latest.yaml b/ai-provider/model-runtime/model-providers/mistralai/llm/codestral-latest.yaml
new file mode 100644
index 00000000..5f126023
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/mistralai/llm/codestral-latest.yaml
@@ -0,0 +1,51 @@
+model: codestral-latest
+label:
+ zh_Hans: codestral-latest
+ en_US: codestral-latest
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.7
+ min: 0
+ max: 1
+ - name: top_p
+ use_template: top_p
+ default: 1
+ min: 0
+ max: 1
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 4096
+ - name: safe_prompt
+ default: false
+ type: boolean
+ help:
+ en_US: Whether to inject a safety prompt before all conversations.
+ zh_Hans: 是否开启提示词审查
+ label:
+ en_US: SafePrompt
+ zh_Hans: 提示词审查
+ - name: random_seed
+ type: int
+ help:
+ en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+ zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
+ label:
+ en_US: RandomSeed
+ zh_Hans: 随机数种子
+ default: 0
+ min: 0
+ max: 2147483647
+pricing:
+ input: '0.008'
+ output: '0.024'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/mistralai/llm/mistral-embed.yaml b/ai-provider/model-runtime/model-providers/mistralai/llm/mistral-embed.yaml
new file mode 100644
index 00000000..d759103d
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/mistralai/llm/mistral-embed.yaml
@@ -0,0 +1,51 @@
+model: mistral-embed
+label:
+ zh_Hans: mistral-embed
+ en_US: mistral-embed
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.7
+ min: 0
+ max: 1
+ - name: top_p
+ use_template: top_p
+ default: 1
+ min: 0
+ max: 1
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 1024
+ - name: safe_prompt
+ default: false
+ type: boolean
+ help:
+ en_US: Whether to inject a safety prompt before all conversations.
+ zh_Hans: 是否开启提示词审查
+ label:
+ en_US: SafePrompt
+ zh_Hans: 提示词审查
+ - name: random_seed
+ type: int
+ help:
+ en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+ zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
+ label:
+ en_US: RandomSeed
+ zh_Hans: 随机数种子
+ default: 0
+ min: 0
+ max: 2147483647
+pricing:
+ input: '0.008'
+ output: '0.024'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/mistralai/llm/mistral-large-latest.yaml b/ai-provider/model-runtime/model-providers/mistralai/llm/mistral-large-latest.yaml
new file mode 100644
index 00000000..a0d07a2b
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/mistralai/llm/mistral-large-latest.yaml
@@ -0,0 +1,51 @@
+model: mistral-large-latest
+label:
+ zh_Hans: mistral-large-latest
+ en_US: mistral-large-latest
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.7
+ min: 0
+ max: 1
+ - name: top_p
+ use_template: top_p
+ default: 1
+ min: 0
+ max: 1
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 8000
+ - name: safe_prompt
+ default: false
+ type: boolean
+ help:
+ en_US: Whether to inject a safety prompt before all conversations.
+ zh_Hans: 是否开启提示词审查
+ label:
+ en_US: SafePrompt
+ zh_Hans: 提示词审查
+ - name: random_seed
+ type: int
+ help:
+ en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+ zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
+ label:
+ en_US: RandomSeed
+ zh_Hans: 随机数种子
+ default: 0
+ min: 0
+ max: 2147483647
+pricing:
+ input: '0.008'
+ output: '0.024'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/mistralai/llm/mistral-medium-latest.yaml b/ai-provider/model-runtime/model-providers/mistralai/llm/mistral-medium-latest.yaml
new file mode 100644
index 00000000..7c744089
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/mistralai/llm/mistral-medium-latest.yaml
@@ -0,0 +1,51 @@
+model: mistral-medium-latest
+label:
+ zh_Hans: mistral-medium-latest
+ en_US: mistral-medium-latest
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.7
+ min: 0
+ max: 1
+ - name: top_p
+ use_template: top_p
+ default: 1
+ min: 0
+ max: 1
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 8000
+ - name: safe_prompt
+ default: false
+ type: boolean
+ help:
+ en_US: Whether to inject a safety prompt before all conversations.
+ zh_Hans: 是否开启提示词审查
+ label:
+ en_US: SafePrompt
+ zh_Hans: 提示词审查
+ - name: random_seed
+ type: int
+ help:
+ en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+ zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
+ label:
+ en_US: RandomSeed
+ zh_Hans: 随机数种子
+ default: 0
+ min: 0
+ max: 2147483647
+pricing:
+ input: '0.0027'
+ output: '0.0081'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/mistralai/llm/mistral-small-latest.yaml b/ai-provider/model-runtime/model-providers/mistralai/llm/mistral-small-latest.yaml
new file mode 100644
index 00000000..865e6102
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/mistralai/llm/mistral-small-latest.yaml
@@ -0,0 +1,51 @@
+model: mistral-small-latest
+label:
+ zh_Hans: mistral-small-latest
+ en_US: mistral-small-latest
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.7
+ min: 0
+ max: 1
+ - name: top_p
+ use_template: top_p
+ default: 1
+ min: 0
+ max: 1
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 8000
+ - name: safe_prompt
+ default: false
+ type: boolean
+ help:
+ en_US: Whether to inject a safety prompt before all conversations.
+ zh_Hans: 是否开启提示词审查
+ label:
+ en_US: SafePrompt
+ zh_Hans: 提示词审查
+ - name: random_seed
+ type: int
+ help:
+ en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+ zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
+ label:
+ en_US: RandomSeed
+ zh_Hans: 随机数种子
+ default: 0
+ min: 0
+ max: 2147483647
+pricing:
+ input: '0.002'
+ output: '0.006'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/mistralai/llm/open-codestral-mamba.yaml b/ai-provider/model-runtime/model-providers/mistralai/llm/open-codestral-mamba.yaml
new file mode 100644
index 00000000..d7ffb9ea
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/mistralai/llm/open-codestral-mamba.yaml
@@ -0,0 +1,51 @@
+model: open-codestral-mamba
+label:
+ zh_Hans: open-codestral-mamba
+ en_US: open-codestral-mamba
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 256000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.7
+ min: 0
+ max: 1
+ - name: top_p
+ use_template: top_p
+ default: 1
+ min: 0
+ max: 1
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 16384
+ - name: safe_prompt
+ default: false
+ type: boolean
+ help:
+ en_US: Whether to inject a safety prompt before all conversations.
+ zh_Hans: 是否开启提示词审查
+ label:
+ en_US: SafePrompt
+ zh_Hans: 提示词审查
+ - name: random_seed
+ type: int
+ help:
+ en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+ zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
+ label:
+ en_US: RandomSeed
+ zh_Hans: 随机数种子
+ default: 0
+ min: 0
+ max: 2147483647
+pricing:
+ input: '0.008'
+ output: '0.024'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/mistralai/llm/open-mistral-7b.yaml b/ai-provider/model-runtime/model-providers/mistralai/llm/open-mistral-7b.yaml
new file mode 100644
index 00000000..ac292269
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/mistralai/llm/open-mistral-7b.yaml
@@ -0,0 +1,51 @@
+model: open-mistral-7b
+label:
+ zh_Hans: open-mistral-7b
+ en_US: open-mistral-7b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.7
+ min: 0
+ max: 1
+ - name: top_p
+ use_template: top_p
+ default: 1
+ min: 0
+ max: 1
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 2048
+ - name: safe_prompt
+ default: false
+ type: boolean
+ help:
+ en_US: Whether to inject a safety prompt before all conversations.
+ zh_Hans: 是否开启提示词审查
+ label:
+ en_US: SafePrompt
+ zh_Hans: 提示词审查
+ - name: random_seed
+ type: int
+ help:
+ en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+ zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
+ label:
+ en_US: RandomSeed
+ zh_Hans: 随机数种子
+ default: 0
+ min: 0
+ max: 2147483647
+pricing:
+ input: '0.00025'
+ output: '0.00025'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/mistralai/llm/open-mistral-nemo.yaml b/ai-provider/model-runtime/model-providers/mistralai/llm/open-mistral-nemo.yaml
new file mode 100644
index 00000000..dcda4fbc
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/mistralai/llm/open-mistral-nemo.yaml
@@ -0,0 +1,51 @@
+model: open-mistral-nemo
+label:
+ zh_Hans: open-mistral-nemo
+ en_US: open-mistral-nemo
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.7
+ min: 0
+ max: 1
+ - name: top_p
+ use_template: top_p
+ default: 1
+ min: 0
+ max: 1
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 8192
+ - name: safe_prompt
+ default: false
+ type: boolean
+ help:
+ en_US: Whether to inject a safety prompt before all conversations.
+ zh_Hans: 是否开启提示词审查
+ label:
+ en_US: SafePrompt
+ zh_Hans: 提示词审查
+ - name: random_seed
+ type: int
+ help:
+ en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+ zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
+ label:
+ en_US: RandomSeed
+ zh_Hans: 随机数种子
+ default: 0
+ min: 0
+ max: 2147483647
+pricing:
+ input: '0.008'
+ output: '0.024'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/mistralai/llm/open-mixtral-8x22b.yaml b/ai-provider/model-runtime/model-providers/mistralai/llm/open-mixtral-8x22b.yaml
new file mode 100644
index 00000000..325fafd4
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/mistralai/llm/open-mixtral-8x22b.yaml
@@ -0,0 +1,51 @@
+model: open-mixtral-8x22b
+label:
+ zh_Hans: open-mixtral-8x22b
+ en_US: open-mixtral-8x22b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 64000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.7
+ min: 0
+ max: 1
+ - name: top_p
+ use_template: top_p
+ default: 1
+ min: 0
+ max: 1
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 8000
+ - name: safe_prompt
+ default: false
+ type: boolean
+ help:
+ en_US: Whether to inject a safety prompt before all conversations.
+ zh_Hans: 是否开启提示词审查
+ label:
+ en_US: SafePrompt
+ zh_Hans: 提示词审查
+ - name: random_seed
+ type: int
+ help:
+ en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+ zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
+ label:
+ en_US: RandomSeed
+ zh_Hans: 随机数种子
+ default: 0
+ min: 0
+ max: 2147483647
+pricing:
+ input: '0.002'
+ output: '0.006'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/mistralai/llm/open-mixtral-8x7b.yaml b/ai-provider/model-runtime/model-providers/mistralai/llm/open-mixtral-8x7b.yaml
new file mode 100644
index 00000000..d217e5e7
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/mistralai/llm/open-mixtral-8x7b.yaml
@@ -0,0 +1,51 @@
+model: open-mixtral-8x7b
+label:
+ zh_Hans: open-mixtral-8x7b
+ en_US: open-mixtral-8x7b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.7
+ min: 0
+ max: 1
+ - name: top_p
+ use_template: top_p
+ default: 1
+ min: 0
+ max: 1
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 8000
+ - name: safe_prompt
+ default: false
+ type: boolean
+ help:
+ en_US: Whether to inject a safety prompt before all conversations.
+ zh_Hans: 是否开启提示词审查
+ label:
+ en_US: SafePrompt
+ zh_Hans: 提示词审查
+ - name: random_seed
+ type: int
+ help:
+ en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+ zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
+ label:
+ en_US: RandomSeed
+ zh_Hans: 随机数种子
+ default: 0
+ min: 0
+ max: 2147483647
+pricing:
+ input: '0.0007'
+ output: '0.0007'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/mistralai/llm/pixtral-12b-2409.yaml b/ai-provider/model-runtime/model-providers/mistralai/llm/pixtral-12b-2409.yaml
new file mode 100644
index 00000000..0b002b49
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/mistralai/llm/pixtral-12b-2409.yaml
@@ -0,0 +1,51 @@
+model: pixtral-12b-2409
+label:
+ zh_Hans: pixtral-12b-2409
+ en_US: pixtral-12b-2409
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.7
+ min: 0
+ max: 1
+ - name: top_p
+ use_template: top_p
+ default: 1
+ min: 0
+ max: 1
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 8192
+ - name: safe_prompt
+ default: false
+ type: boolean
+ help:
+ en_US: Whether to inject a safety prompt before all conversations.
+ zh_Hans: 是否开启提示词审查
+ label:
+ en_US: SafePrompt
+ zh_Hans: 提示词审查
+ - name: random_seed
+ type: int
+ help:
+ en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+ zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
+ label:
+ en_US: RandomSeed
+ zh_Hans: 随机数种子
+ default: 0
+ min: 0
+ max: 2147483647
+pricing:
+ input: '0.008'
+ output: '0.024'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/mistralai/mistralai.yaml b/ai-provider/model-runtime/model-providers/mistralai/mistralai.yaml
new file mode 100644
index 00000000..c2b5a5ce
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/mistralai/mistralai.yaml
@@ -0,0 +1,32 @@
+provider: mistralai
+label:
+ en_US: MistralAI
+description:
+ en_US: Models provided by MistralAI, such as open-mistral-7b and mistral-large-latest.
+ zh_Hans: MistralAI 提供的模型,例如 open-mistral-7b 和 mistral-large-latest。
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ en_US: icon_l_en.svg
+background: "#FFFFFF"
+help:
+ title:
+ en_US: Get your API Key from MistralAI
+ zh_Hans: 从 MistralAI 获取 API Key
+ url:
+ en_US: https://console.mistral.ai/api-keys/
+supported_model_types:
+ - llm
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
+address: https://api.openai.com
\ No newline at end of file
diff --git a/ai-provider/model-runtime/moonshot/assets/icon_l_en.svg b/ai-provider/model-runtime/model-providers/moonshot/assets/icon_l_en.svg
similarity index 100%
rename from ai-provider/model-runtime/moonshot/assets/icon_l_en.svg
rename to ai-provider/model-runtime/model-providers/moonshot/assets/icon_l_en.svg
diff --git a/ai-provider/model-runtime/moonshot/assets/icon_s_en.svg b/ai-provider/model-runtime/model-providers/moonshot/assets/icon_s_en.svg
similarity index 100%
rename from ai-provider/model-runtime/moonshot/assets/icon_s_en.svg
rename to ai-provider/model-runtime/model-providers/moonshot/assets/icon_s_en.svg
diff --git a/ai-provider/model-runtime/moonshot/llm/moonshot-v1-128k.yaml b/ai-provider/model-runtime/model-providers/moonshot/llm/moonshot-v1-128k.yaml
similarity index 100%
rename from ai-provider/model-runtime/moonshot/llm/moonshot-v1-128k.yaml
rename to ai-provider/model-runtime/model-providers/moonshot/llm/moonshot-v1-128k.yaml
diff --git a/ai-provider/model-runtime/moonshot/llm/moonshot-v1-32k.yaml b/ai-provider/model-runtime/model-providers/moonshot/llm/moonshot-v1-32k.yaml
similarity index 100%
rename from ai-provider/model-runtime/moonshot/llm/moonshot-v1-32k.yaml
rename to ai-provider/model-runtime/model-providers/moonshot/llm/moonshot-v1-32k.yaml
diff --git a/ai-provider/model-runtime/moonshot/llm/moonshot-v1-8k.yaml b/ai-provider/model-runtime/model-providers/moonshot/llm/moonshot-v1-8k.yaml
similarity index 100%
rename from ai-provider/model-runtime/moonshot/llm/moonshot-v1-8k.yaml
rename to ai-provider/model-runtime/model-providers/moonshot/llm/moonshot-v1-8k.yaml
diff --git a/ai-provider/model-runtime/moonshot/moonshot.yaml b/ai-provider/model-runtime/model-providers/moonshot/moonshot.yaml
similarity index 100%
rename from ai-provider/model-runtime/moonshot/moonshot.yaml
rename to ai-provider/model-runtime/model-providers/moonshot/moonshot.yaml
diff --git a/ai-provider/model-runtime/model-providers/novita/assets/icon_l_en.svg b/ai-provider/model-runtime/model-providers/novita/assets/icon_l_en.svg
new file mode 100644
index 00000000..5c92cdbc
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/novita/assets/icon_l_en.svg
@@ -0,0 +1,19 @@
+
diff --git a/ai-provider/model-runtime/model-providers/novita/assets/icon_s_en.svg b/ai-provider/model-runtime/model-providers/novita/assets/icon_s_en.svg
new file mode 100644
index 00000000..798c1d63
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/novita/assets/icon_s_en.svg
@@ -0,0 +1,10 @@
+
diff --git a/ai-provider/model-runtime/model-providers/novita/llm/Nous-Hermes-2-Mixtral-8x7B-DPO.yaml b/ai-provider/model-runtime/model-providers/novita/llm/Nous-Hermes-2-Mixtral-8x7B-DPO.yaml
new file mode 100644
index 00000000..7ff30458
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/novita/llm/Nous-Hermes-2-Mixtral-8x7B-DPO.yaml
@@ -0,0 +1,41 @@
+model: Nous-Hermes-2-Mixtral-8x7B-DPO
+label:
+ zh_Hans: Nous-Hermes-2-Mixtral-8x7B-DPO
+ en_US: Nous-Hermes-2-Mixtral-8x7B-DPO
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 2
+ default: 1
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 2048
+ default: 512
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
+pricing:
+ input: '0.0027'
+ output: '0.0027'
+ unit: '0.0001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/novita/llm/airoboros-l2-70b.yaml b/ai-provider/model-runtime/model-providers/novita/llm/airoboros-l2-70b.yaml
new file mode 100644
index 00000000..b5994184
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/novita/llm/airoboros-l2-70b.yaml
@@ -0,0 +1,41 @@
+model: jondurbin/airoboros-l2-70b
+label:
+ zh_Hans: jondurbin/airoboros-l2-70b
+ en_US: jondurbin/airoboros-l2-70b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 2
+ default: 1
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 2048
+ default: 512
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
+pricing:
+ input: '0.005'
+ output: '0.005'
+ unit: '0.0001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/novita/llm/dolphin-mixtral-8x22b.yaml b/ai-provider/model-runtime/model-providers/novita/llm/dolphin-mixtral-8x22b.yaml
new file mode 100644
index 00000000..72a181f5
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/novita/llm/dolphin-mixtral-8x22b.yaml
@@ -0,0 +1,41 @@
+model: cognitivecomputations/dolphin-mixtral-8x22b
+label:
+ zh_Hans: cognitivecomputations/dolphin-mixtral-8x22b
+ en_US: cognitivecomputations/dolphin-mixtral-8x22b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 16000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 2
+ default: 1
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 2048
+ default: 512
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
+pricing:
+ input: '0.009'
+ output: '0.009'
+ unit: '0.0001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/novita/llm/gemma-2-9b-it.yaml b/ai-provider/model-runtime/model-providers/novita/llm/gemma-2-9b-it.yaml
new file mode 100644
index 00000000..d1749bc8
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/novita/llm/gemma-2-9b-it.yaml
@@ -0,0 +1,41 @@
+model: google/gemma-2-9b-it
+label:
+ zh_Hans: google/gemma-2-9b-it
+ en_US: google/gemma-2-9b-it
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 2
+ default: 1
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 2048
+ default: 512
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
+pricing:
+ input: '0.0008'
+ output: '0.0008'
+ unit: '0.0001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/novita/llm/hermes-2-pro-llama-3-8b.yaml b/ai-provider/model-runtime/model-providers/novita/llm/hermes-2-pro-llama-3-8b.yaml
new file mode 100644
index 00000000..8b3228e5
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/novita/llm/hermes-2-pro-llama-3-8b.yaml
@@ -0,0 +1,41 @@
+model: nousresearch/hermes-2-pro-llama-3-8b
+label:
+ zh_Hans: nousresearch/hermes-2-pro-llama-3-8b
+ en_US: nousresearch/hermes-2-pro-llama-3-8b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 2
+ default: 1
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 2048
+ default: 512
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
+pricing:
+ input: '0.0014'
+ output: '0.0014'
+ unit: '0.0001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/novita/llm/l3-70b-euryale-v2.1.yaml b/ai-provider/model-runtime/model-providers/novita/llm/l3-70b-euryale-v2.1.yaml
new file mode 100644
index 00000000..5e27941c
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/novita/llm/l3-70b-euryale-v2.1.yaml
@@ -0,0 +1,41 @@
+model: sao10k/l3-70b-euryale-v2.1
+label:
+ zh_Hans: sao10k/l3-70b-euryale-v2.1
+ en_US: sao10k/l3-70b-euryale-v2.1
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 16000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 2
+ default: 1
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 2048
+ default: 512
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
+pricing:
+ input: '0.0148'
+ output: '0.0148'
+ unit: '0.0001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/novita/llm/llama-3-70b-instruct.yaml b/ai-provider/model-runtime/model-providers/novita/llm/llama-3-70b-instruct.yaml
new file mode 100644
index 00000000..39709e10
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/novita/llm/llama-3-70b-instruct.yaml
@@ -0,0 +1,41 @@
+model: meta-llama/llama-3-70b-instruct
+label:
+ zh_Hans: meta-llama/llama-3-70b-instruct
+ en_US: meta-llama/llama-3-70b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 2
+ default: 1
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 2048
+ default: 512
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
+pricing:
+ input: '0.0051'
+ output: '0.0074'
+ unit: '0.0001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/novita/llm/llama-3-8b-instruct.yaml b/ai-provider/model-runtime/model-providers/novita/llm/llama-3-8b-instruct.yaml
new file mode 100644
index 00000000..9b5e5df4
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/novita/llm/llama-3-8b-instruct.yaml
@@ -0,0 +1,41 @@
+model: meta-llama/llama-3-8b-instruct
+label:
+ zh_Hans: meta-llama/llama-3-8b-instruct
+ en_US: meta-llama/llama-3-8b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 2
+ default: 1
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 2048
+ default: 512
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
+pricing:
+ input: '0.00063'
+ output: '0.00063'
+ unit: '0.0001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/novita/llm/llama-3.1-405b-instruct.yaml b/ai-provider/model-runtime/model-providers/novita/llm/llama-3.1-405b-instruct.yaml
new file mode 100644
index 00000000..c5a45271
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/novita/llm/llama-3.1-405b-instruct.yaml
@@ -0,0 +1,41 @@
+model: meta-llama/llama-3.1-405b-instruct
+label:
+ zh_Hans: meta-llama/llama-3.1-405b-instruct
+ en_US: meta-llama/llama-3.1-405b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 2
+ default: 1
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 2048
+ default: 512
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
+pricing:
+ input: '0.03'
+ output: '0.05'
+ unit: '0.0001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/novita/llm/llama-3.1-70b-instruct.yaml b/ai-provider/model-runtime/model-providers/novita/llm/llama-3.1-70b-instruct.yaml
new file mode 100644
index 00000000..3a5c29c4
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/novita/llm/llama-3.1-70b-instruct.yaml
@@ -0,0 +1,41 @@
+model: meta-llama/llama-3.1-70b-instruct
+label:
+ zh_Hans: meta-llama/llama-3.1-70b-instruct
+ en_US: meta-llama/llama-3.1-70b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 2
+ default: 1
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 2048
+ default: 512
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
+pricing:
+ input: '0.0055'
+ output: '0.0076'
+ unit: '0.0001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/novita/llm/llama-3.1-8b-instruct.yaml b/ai-provider/model-runtime/model-providers/novita/llm/llama-3.1-8b-instruct.yaml
new file mode 100644
index 00000000..e6ef772a
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/novita/llm/llama-3.1-8b-instruct.yaml
@@ -0,0 +1,41 @@
+model: meta-llama/llama-3.1-8b-instruct
+label:
+ zh_Hans: meta-llama/llama-3.1-8b-instruct
+ en_US: meta-llama/llama-3.1-8b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 2
+ default: 1
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 2048
+ default: 512
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
+pricing:
+ input: '0.001'
+ output: '0.001'
+ unit: '0.0001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/novita/llm/llm.py b/ai-provider/model-runtime/model-providers/novita/llm/llm.py
new file mode 100644
index 00000000..23367ed1
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/novita/llm/llm.py
@@ -0,0 +1,69 @@
+from collections.abc import Generator
+from typing import Optional, Union
+
+from core.model_runtime.entities.llm_entities import LLMResult
+from core.model_runtime.entities.message_entities import PromptMessage, PromptMessageTool
+from core.model_runtime.entities.model_entities import AIModelEntity
+from core.model_runtime.model_providers.openai_api_compatible.llm.llm import OAIAPICompatLargeLanguageModel
+
+
+class NovitaLargeLanguageModel(OAIAPICompatLargeLanguageModel):
+ def _update_endpoint_url(self, credentials: dict):
+ credentials["endpoint_url"] = "https://api.novita.ai/v3/openai"
+ credentials["extra_headers"] = {"X-Novita-Source": "dify.ai"}
+ return credentials
+
+ def _invoke(
+ self,
+ model: str,
+ credentials: dict,
+ prompt_messages: list[PromptMessage],
+ model_parameters: dict,
+ tools: Optional[list[PromptMessageTool]] = None,
+ stop: Optional[list[str]] = None,
+ stream: bool = True,
+ user: Optional[str] = None,
+ ) -> Union[LLMResult, Generator]:
+ cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
+ return super()._invoke(model, cred_with_endpoint, prompt_messages, model_parameters, tools, stop, stream, user)
+
+ def validate_credentials(self, model: str, credentials: dict) -> None:
+ cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
+ self._add_custom_parameters(credentials, model)
+ return super().validate_credentials(model, cred_with_endpoint)
+
+ @classmethod
+ def _add_custom_parameters(cls, credentials: dict, model: str) -> None:
+ credentials["mode"] = "chat"
+
+ def _generate(
+ self,
+ model: str,
+ credentials: dict,
+ prompt_messages: list[PromptMessage],
+ model_parameters: dict,
+ tools: Optional[list[PromptMessageTool]] = None,
+ stop: Optional[list[str]] = None,
+ stream: bool = True,
+ user: Optional[str] = None,
+ ) -> Union[LLMResult, Generator]:
+ cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
+ return super()._generate(
+ model, cred_with_endpoint, prompt_messages, model_parameters, tools, stop, stream, user
+ )
+
+ def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
+ cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
+
+ return super().get_customizable_model_schema(model, cred_with_endpoint)
+
+ def get_num_tokens(
+ self,
+ model: str,
+ credentials: dict,
+ prompt_messages: list[PromptMessage],
+ tools: Optional[list[PromptMessageTool]] = None,
+ ) -> int:
+ cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
+
+ return super().get_num_tokens(model, cred_with_endpoint, prompt_messages, tools)
diff --git a/ai-provider/model-runtime/model-providers/novita/llm/lzlv_70b.yaml b/ai-provider/model-runtime/model-providers/novita/llm/lzlv_70b.yaml
new file mode 100644
index 00000000..0cc68a8c
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/novita/llm/lzlv_70b.yaml
@@ -0,0 +1,41 @@
+model: lzlv_70b
+label:
+ zh_Hans: lzlv_70b
+ en_US: lzlv_70b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 2
+ default: 1
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 2048
+ default: 512
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
+pricing:
+ input: '0.0058'
+ output: '0.0078'
+ unit: '0.0001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/novita/llm/midnight-rose-70b.yaml b/ai-provider/model-runtime/model-providers/novita/llm/midnight-rose-70b.yaml
new file mode 100644
index 00000000..19876bee
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/novita/llm/midnight-rose-70b.yaml
@@ -0,0 +1,41 @@
+model: sophosympatheia/midnight-rose-70b
+label:
+ zh_Hans: sophosympatheia/midnight-rose-70b
+ en_US: sophosympatheia/midnight-rose-70b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 2
+ default: 1
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 2048
+ default: 512
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
+pricing:
+ input: '0.008'
+ output: '0.008'
+ unit: '0.0001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/novita/llm/mistral-7b-instruct.yaml b/ai-provider/model-runtime/model-providers/novita/llm/mistral-7b-instruct.yaml
new file mode 100644
index 00000000..6fba47bc
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/novita/llm/mistral-7b-instruct.yaml
@@ -0,0 +1,41 @@
+model: mistralai/mistral-7b-instruct
+label:
+ zh_Hans: mistralai/mistral-7b-instruct
+ en_US: mistralai/mistral-7b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 2
+ default: 1
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 2048
+ default: 512
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
+pricing:
+ input: '0.00059'
+ output: '0.00059'
+ unit: '0.0001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/novita/llm/mythomax-l2-13b.yaml b/ai-provider/model-runtime/model-providers/novita/llm/mythomax-l2-13b.yaml
new file mode 100644
index 00000000..7e4ac3ff
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/novita/llm/mythomax-l2-13b.yaml
@@ -0,0 +1,41 @@
+model: gryphe/mythomax-l2-13b
+label:
+ zh_Hans: gryphe/mythomax-l2-13b
+ en_US: gryphe/mythomax-l2-13b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 2
+ default: 1
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 2048
+ default: 512
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
+pricing:
+ input: '0.00119'
+ output: '0.00119'
+ unit: '0.0001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/novita/llm/nous-hermes-llama2-13b.yaml b/ai-provider/model-runtime/model-providers/novita/llm/nous-hermes-llama2-13b.yaml
new file mode 100644
index 00000000..75671c41
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/novita/llm/nous-hermes-llama2-13b.yaml
@@ -0,0 +1,41 @@
+model: nousresearch/nous-hermes-llama2-13b
+label:
+ zh_Hans: nousresearch/nous-hermes-llama2-13b
+ en_US: nousresearch/nous-hermes-llama2-13b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 2
+ default: 1
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 2048
+ default: 512
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
+pricing:
+ input: '0.0017'
+ output: '0.0017'
+ unit: '0.0001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/novita/llm/openhermes-2.5-mistral-7b.yaml b/ai-provider/model-runtime/model-providers/novita/llm/openhermes-2.5-mistral-7b.yaml
new file mode 100644
index 00000000..8b0deba4
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/novita/llm/openhermes-2.5-mistral-7b.yaml
@@ -0,0 +1,41 @@
+model: teknium/openhermes-2.5-mistral-7b
+label:
+ zh_Hans: teknium/openhermes-2.5-mistral-7b
+ en_US: teknium/openhermes-2.5-mistral-7b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 2
+ default: 1
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 2048
+ default: 512
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
+pricing:
+ input: '0.0017'
+ output: '0.0017'
+ unit: '0.0001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/novita/llm/wizardlm-2-8x22b.yaml b/ai-provider/model-runtime/model-providers/novita/llm/wizardlm-2-8x22b.yaml
new file mode 100644
index 00000000..ef42568e
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/novita/llm/wizardlm-2-8x22b.yaml
@@ -0,0 +1,41 @@
+model: microsoft/wizardlm-2-8x22b
+label:
+ zh_Hans: microsoft/wizardlm-2-8x22b
+ en_US: microsoft/wizardlm-2-8x22b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 65535
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 2
+ default: 1
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 2048
+ default: 512
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
+pricing:
+ input: '0.0064'
+ output: '0.0064'
+ unit: '0.0001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/novita/novita.yaml b/ai-provider/model-runtime/model-providers/novita/novita.yaml
new file mode 100644
index 00000000..41a6c5db
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/novita/novita.yaml
@@ -0,0 +1,32 @@
+provider: novita
+label:
+ en_US: novita.ai
+description:
+ en_US: An LLM API that matches various application scenarios with high cost-effectiveness.
+ zh_Hans: 适配多种海外应用场景的高性价比 LLM API
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ en_US: icon_l_en.svg
+background: "#eadeff"
+help:
+ title:
+ en_US: Get your API key from novita.ai
+ zh_Hans: 从 novita.ai 获取 API Key
+ url:
+ en_US: https://novita.ai/settings#key-management?utm_source=dify&utm_medium=ch&utm_campaign=api
+supported_model_types:
+ - llm
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: api_key
+ required: true
+ label:
+ en_US: API Key
+ type: secret-input
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
+address: https://api.openai.com
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/nvidia/assets/icon_l_en.svg b/ai-provider/model-runtime/model-providers/nvidia/assets/icon_l_en.svg
new file mode 100644
index 00000000..97d94e43
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/nvidia/assets/icon_l_en.svg
@@ -0,0 +1,2 @@
+
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/nvidia/assets/icon_s_en.svg b/ai-provider/model-runtime/model-providers/nvidia/assets/icon_s_en.svg
new file mode 100644
index 00000000..562ce462
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/nvidia/assets/icon_s_en.svg
@@ -0,0 +1,3 @@
+
diff --git a/ai-provider/model-runtime/model-providers/nvidia/llm/arctic.yaml b/ai-provider/model-runtime/model-providers/nvidia/llm/arctic.yaml
new file mode 100644
index 00000000..7f53ae58
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/nvidia/llm/arctic.yaml
@@ -0,0 +1,36 @@
+model: snowflake/arctic
+label:
+ zh_Hans: snowflake/arctic
+ en_US: snowflake/arctic
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 1
+ default: 0.5
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 1024
+ default: 1024
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
diff --git a/ai-provider/model-runtime/model-providers/nvidia/llm/codegemma-7b.yaml b/ai-provider/model-runtime/model-providers/nvidia/llm/codegemma-7b.yaml
new file mode 100644
index 00000000..57446224
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/nvidia/llm/codegemma-7b.yaml
@@ -0,0 +1,36 @@
+model: google/codegemma-7b
+label:
+ zh_Hans: google/codegemma-7b
+ en_US: google/codegemma-7b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 1
+ default: 0.5
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 1024
+ default: 1024
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
diff --git a/ai-provider/model-runtime/model-providers/nvidia/llm/fuyu-8b.yaml b/ai-provider/model-runtime/model-providers/nvidia/llm/fuyu-8b.yaml
new file mode 100644
index 00000000..6ae524c6
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/nvidia/llm/fuyu-8b.yaml
@@ -0,0 +1,27 @@
+model: fuyu-8b
+label:
+ zh_Hans: fuyu-8b
+ en_US: fuyu-8b
+model_type: llm
+features:
+ - agent-thought
+ - vision
+model_properties:
+ mode: chat
+ context_size: 16000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.2
+ min: 0.1
+ max: 1
+ - name: top_p
+ use_template: top_p
+ default: 0.7
+ min: 0.1
+ max: 1
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 1024
diff --git a/ai-provider/model-runtime/model-providers/nvidia/llm/gemma-7b.yaml b/ai-provider/model-runtime/model-providers/nvidia/llm/gemma-7b.yaml
new file mode 100644
index 00000000..794b820b
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/nvidia/llm/gemma-7b.yaml
@@ -0,0 +1,36 @@
+model: google/gemma-7b
+label:
+ zh_Hans: google/gemma-7b
+ en_US: google/gemma-7b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 1
+ default: 0.5
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 1024
+ default: 1024
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
diff --git a/ai-provider/model-runtime/model-providers/nvidia/llm/llama-3.1-405b.yaml b/ai-provider/model-runtime/model-providers/nvidia/llm/llama-3.1-405b.yaml
new file mode 100644
index 00000000..5472de99
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/nvidia/llm/llama-3.1-405b.yaml
@@ -0,0 +1,36 @@
+model: meta/llama-3.1-405b-instruct
+label:
+ zh_Hans: meta/llama-3.1-405b-instruct
+ en_US: meta/llama-3.1-405b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 1
+ default: 0.5
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 4096
+ default: 1024
+ - name: frequency_penalt
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
diff --git a/ai-provider/model-runtime/model-providers/nvidia/llm/llama-3.1-70b.yaml b/ai-provider/model-runtime/model-providers/nvidia/llm/llama-3.1-70b.yaml
new file mode 100644
index 00000000..16af0554
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/nvidia/llm/llama-3.1-70b.yaml
@@ -0,0 +1,36 @@
+model: meta/llama-3.1-70b-instruct
+label:
+ zh_Hans: meta/llama-3.1-70b-instruct
+ en_US: meta/llama-3.1-70b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 1
+ default: 0.5
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 4096
+ default: 1024
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
diff --git a/ai-provider/model-runtime/model-providers/nvidia/llm/llama-3.1-8b.yaml b/ai-provider/model-runtime/model-providers/nvidia/llm/llama-3.1-8b.yaml
new file mode 100644
index 00000000..f2d43dc3
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/nvidia/llm/llama-3.1-8b.yaml
@@ -0,0 +1,36 @@
+model: meta/llama-3.1-8b-instruct
+label:
+ zh_Hans: meta/llama-3.1-8b-instruct
+ en_US: meta/llama-3.1-8b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 1
+ default: 0.5
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 4096
+ default: 1024
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
diff --git a/ai-provider/model-runtime/model-providers/nvidia/llm/llama2-70b.yaml b/ai-provider/model-runtime/model-providers/nvidia/llm/llama2-70b.yaml
new file mode 100644
index 00000000..9fba816b
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/nvidia/llm/llama2-70b.yaml
@@ -0,0 +1,36 @@
+model: meta/llama2-70b
+label:
+ zh_Hans: meta/llama2-70b
+ en_US: meta/llama2-70b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 1
+ default: 0.5
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 1024
+ default: 1024
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
diff --git a/ai-provider/model-runtime/model-providers/nvidia/llm/llama3-70b.yaml b/ai-provider/model-runtime/model-providers/nvidia/llm/llama3-70b.yaml
new file mode 100644
index 00000000..4d591d42
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/nvidia/llm/llama3-70b.yaml
@@ -0,0 +1,36 @@
+model: meta/llama3-70b-instruct
+label:
+ zh_Hans: meta/llama3-70b-instruct
+ en_US: meta/llama3-70b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 1
+ default: 0.5
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 1024
+ default: 1024
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
diff --git a/ai-provider/model-runtime/model-providers/nvidia/llm/llama3-8b.yaml b/ai-provider/model-runtime/model-providers/nvidia/llm/llama3-8b.yaml
new file mode 100644
index 00000000..01395666
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/nvidia/llm/llama3-8b.yaml
@@ -0,0 +1,36 @@
+model: meta/llama3-8b-instruct
+label:
+ zh_Hans: meta/llama3-8b-instruct
+ en_US: meta/llama3-8b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 1
+ default: 0.5
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 1024
+ default: 1024
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
diff --git a/ai-provider/model-runtime/model-providers/nvidia/llm/mistral-large.yaml b/ai-provider/model-runtime/model-providers/nvidia/llm/mistral-large.yaml
new file mode 100644
index 00000000..3e14d221
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/nvidia/llm/mistral-large.yaml
@@ -0,0 +1,36 @@
+model: mistralai/mistral-large
+label:
+ zh_Hans: mistralai/mistral-large
+ en_US: mistralai/mistral-large
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 1
+ default: 0.5
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 1024
+ default: 1024
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
diff --git a/ai-provider/model-runtime/model-providers/nvidia/llm/mistralai_mixtral-8x7b-instruct-v0.1.yaml b/ai-provider/model-runtime/model-providers/nvidia/llm/mistralai_mixtral-8x7b-instruct-v0.1.yaml
new file mode 100644
index 00000000..d2c4dc5d
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/nvidia/llm/mistralai_mixtral-8x7b-instruct-v0.1.yaml
@@ -0,0 +1,36 @@
+model: mistralai/mixtral-8x7b-instruct-v0.1
+label:
+ zh_Hans: mistralai/mixtral-8x7b-instruct-v0.1
+ en_US: mistralai/mixtral-8x7b-instruct-v0.1
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 1
+ default: 0.5
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 1024
+ default: 1024
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
diff --git a/ai-provider/model-runtime/model-providers/nvidia/llm/mixtral-8x22b-instruct-v0.1.yaml b/ai-provider/model-runtime/model-providers/nvidia/llm/mixtral-8x22b-instruct-v0.1.yaml
new file mode 100644
index 00000000..05500c03
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/nvidia/llm/mixtral-8x22b-instruct-v0.1.yaml
@@ -0,0 +1,36 @@
+model: mistralai/mixtral-8x22b-instruct-v0.1
+label:
+ zh_Hans: mistralai/mixtral-8x22b-instruct-v0.1
+ en_US: mistralai/mixtral-8x22b-instruct-v0.1
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 64000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 1
+ default: 0.5
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 1024
+ default: 1024
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
diff --git a/ai-provider/model-runtime/model-providers/nvidia/llm/nemotron-4-340b-instruct.yaml b/ai-provider/model-runtime/model-providers/nvidia/llm/nemotron-4-340b-instruct.yaml
new file mode 100644
index 00000000..e5537cd2
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/nvidia/llm/nemotron-4-340b-instruct.yaml
@@ -0,0 +1,36 @@
+model: nvidia/nemotron-4-340b-instruct
+label:
+ zh_Hans: nvidia/nemotron-4-340b-instruct
+ en_US: nvidia/nemotron-4-340b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 1
+ default: 0.5
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 4096
+ default: 1024
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
diff --git a/ai-provider/model-runtime/model-providers/nvidia/llm/phi-3-medium-128k-instruct.yaml b/ai-provider/model-runtime/model-providers/nvidia/llm/phi-3-medium-128k-instruct.yaml
new file mode 100644
index 00000000..0c5538d1
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/nvidia/llm/phi-3-medium-128k-instruct.yaml
@@ -0,0 +1,36 @@
+model: microsoft/phi-3-medium-128k-instruct
+label:
+ zh_Hans: microsoft/phi-3-medium-128k-instruct
+ en_US: microsoft/phi-3-medium-128k-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 1
+ default: 0.5
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 4096
+ default: 1024
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
diff --git a/ai-provider/model-runtime/model-providers/nvidia/llm/phi-3-mini-128k-instruct.yaml b/ai-provider/model-runtime/model-providers/nvidia/llm/phi-3-mini-128k-instruct.yaml
new file mode 100644
index 00000000..1eb1c51d
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/nvidia/llm/phi-3-mini-128k-instruct.yaml
@@ -0,0 +1,36 @@
+model: microsoft/phi-3-mini-128k-instruct
+label:
+ zh_Hans: microsoft/phi-3-mini-128k-instruct
+ en_US: microsoft/phi-3-mini-128k-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 1
+ default: 0.5
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 4096
+ default: 1024
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
diff --git a/ai-provider/model-runtime/model-providers/nvidia/llm/recurrentgemma-2b.yaml b/ai-provider/model-runtime/model-providers/nvidia/llm/recurrentgemma-2b.yaml
new file mode 100644
index 00000000..73fcce39
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/nvidia/llm/recurrentgemma-2b.yaml
@@ -0,0 +1,37 @@
+model: google/recurrentgemma-2b
+label:
+ zh_Hans: google/recurrentgemma-2b
+ en_US: google/recurrentgemma-2b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 2048
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 1
+ default: 0.2
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 0.7
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 1024
+ default: 1024
+ - name: random_seed
+ type: int
+ help:
+ en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+ zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
+ label:
+ en_US: Seed
+ zh_Hans: 种子
+ default: 0
+ min: 0
+ max: 2147483647
diff --git a/ai-provider/model-runtime/model-providers/nvidia/nvidia.yaml b/ai-provider/model-runtime/model-providers/nvidia/nvidia.yaml
new file mode 100644
index 00000000..2cfdf42a
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/nvidia/nvidia.yaml
@@ -0,0 +1,34 @@
+provider: nvidia
+label:
+ en_US: API Catalog
+description:
+ en_US: API Catalog
+ zh_Hans: API Catalog
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ en_US: icon_l_en.svg
+background: "#FFFFFF"
+help:
+ title:
+ en_US: Get your API Key from NVIDIA
+ zh_Hans: 从 NVIDIA 获取 API Key
+ url:
+ en_US: https://build.nvidia.com/explore/discover
+supported_model_types:
+ - llm
+ - text-embedding
+ - rerank
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
+address: https://api.openai.com
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/oci/assets/icon_l_en.svg b/ai-provider/model-runtime/model-providers/oci/assets/icon_l_en.svg
new file mode 100644
index 00000000..0981dfcf
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/oci/assets/icon_l_en.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/oci/assets/icon_s_en.svg b/ai-provider/model-runtime/model-providers/oci/assets/icon_s_en.svg
new file mode 100644
index 00000000..0981dfcf
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/oci/assets/icon_s_en.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/oci/llm/cohere.command-r-16k.yaml b/ai-provider/model-runtime/model-providers/oci/llm/cohere.command-r-16k.yaml
new file mode 100644
index 00000000..eb60cbcd
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/oci/llm/cohere.command-r-16k.yaml
@@ -0,0 +1,52 @@
+model: cohere.command-r-16k
+label:
+ en_US: cohere.command-r-16k v1.2
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 1
+ max: 1.0
+ - name: topP
+ use_template: top_p
+ default: 0.75
+ min: 0
+ max: 1
+ - name: topK
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ default: 0
+ min: 0
+ max: 500
+ - name: presencePenalty
+ use_template: presence_penalty
+ min: 0
+ max: 1
+ default: 0
+ - name: frequencyPenalty
+ use_template: frequency_penalty
+ min: 0
+ max: 1
+ default: 0
+ - name: maxTokens
+ use_template: max_tokens
+ default: 600
+ max: 4000
+pricing:
+ input: '0.004'
+ output: '0.004'
+ unit: '0.0001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/oci/llm/cohere.command-r-plus.yaml b/ai-provider/model-runtime/model-providers/oci/llm/cohere.command-r-plus.yaml
new file mode 100644
index 00000000..df31b0d0
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/oci/llm/cohere.command-r-plus.yaml
@@ -0,0 +1,52 @@
+model: cohere.command-r-plus
+label:
+ en_US: cohere.command-r-plus v1.2
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 1
+ max: 1.0
+ - name: topP
+ use_template: top_p
+ default: 0.75
+ min: 0
+ max: 1
+ - name: topK
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ default: 0
+ min: 0
+ max: 500
+ - name: presencePenalty
+ use_template: presence_penalty
+ min: 0
+ max: 1
+ default: 0
+ - name: frequencyPenalty
+ use_template: frequency_penalty
+ min: 0
+ max: 1
+ default: 0
+ - name: maxTokens
+ use_template: max_tokens
+ default: 600
+ max: 4000
+pricing:
+ input: '0.0219'
+ output: '0.0219'
+ unit: '0.0001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/oci/llm/meta.llama-3-70b-instruct.yaml b/ai-provider/model-runtime/model-providers/oci/llm/meta.llama-3-70b-instruct.yaml
new file mode 100644
index 00000000..dd5be107
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/oci/llm/meta.llama-3-70b-instruct.yaml
@@ -0,0 +1,51 @@
+model: meta.llama-3-70b-instruct
+label:
+ zh_Hans: meta.llama-3-70b-instruct
+ en_US: meta.llama-3-70b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 1
+ max: 2.0
+ - name: topP
+ use_template: top_p
+ default: 0.75
+ min: 0
+ max: 1
+ - name: topK
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ default: 0
+ min: 0
+ max: 500
+ - name: presencePenalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: frequencyPenalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: maxTokens
+ use_template: max_tokens
+ default: 600
+ max: 8000
+pricing:
+ input: '0.015'
+ output: '0.015'
+ unit: '0.0001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/oci/oci.yaml b/ai-provider/model-runtime/model-providers/oci/oci.yaml
new file mode 100644
index 00000000..39b21468
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/oci/oci.yaml
@@ -0,0 +1,43 @@
+provider: oci
+label:
+ en_US: OCIGenerativeAI
+description:
+ en_US: Models provided by OCI, such as Cohere Command R and Cohere Command R+.
+ zh_Hans: OCI 提供的模型,例如 Cohere Command R 和 Cohere Command R+。
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ en_US: icon_l_en.svg
+background: "#FFFFFF"
+help:
+ title:
+ en_US: Get your API Key from OCI
+ zh_Hans: 从 OCI 获取 API Key
+ url:
+ en_US: https://docs.cloud.oracle.com/Content/API/Concepts/sdkconfig.htm
+supported_model_types:
+ - llm
+ - text-embedding
+ #- rerank
+configurate_methods:
+ - predefined-model
+ #- customizable-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: oci_config_content
+ label:
+ en_US: oci api key config file's content
+ type: text-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 oci api key config 文件的内容(base64.b64encode("user_ocid/fingerprint/tenancy_ocid/region/compartment_ocid".encode('utf-8')) )
+ en_US: Enter your oci api key config file's content(base64.b64encode("user_ocid/fingerprint/tenancy_ocid/region/compartment_ocid".encode('utf-8')) )
+ - variable: oci_key_content
+ label:
+ en_US: oci api key file's content
+ type: text-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 oci api key 文件的内容(base64.b64encode("pem file content".encode('utf-8')))
+ en_US: Enter your oci api key file's content(base64.b64encode("pem file content".encode('utf-8')))
+address: https://api.openai.com
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/openrouter/assets/openrouter.svg b/ai-provider/model-runtime/model-providers/openrouter/assets/openrouter.svg
new file mode 100644
index 00000000..2e9590d9
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/assets/openrouter.svg
@@ -0,0 +1,11 @@
+
diff --git a/ai-provider/model-runtime/model-providers/openrouter/assets/openrouter_square.svg b/ai-provider/model-runtime/model-providers/openrouter/assets/openrouter_square.svg
new file mode 100644
index 00000000..ed81fc04
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/assets/openrouter_square.svg
@@ -0,0 +1,10 @@
+
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/claude-3-5-sonnet.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/claude-3-5-sonnet.yaml
new file mode 100644
index 00000000..40558854
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/claude-3-5-sonnet.yaml
@@ -0,0 +1,39 @@
+model: anthropic/claude-3.5-sonnet
+label:
+ en_US: claude-3.5-sonnet
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 200000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 4096
+ min: 1
+ max: 4096
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: "3.00"
+ output: "15.00"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/claude-3-haiku.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/claude-3-haiku.yaml
new file mode 100644
index 00000000..ce17d412
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/claude-3-haiku.yaml
@@ -0,0 +1,39 @@
+model: anthropic/claude-3-haiku
+label:
+ en_US: claude-3-haiku
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 200000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 4096
+ min: 1
+ max: 4096
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: "0.25"
+ output: "1.25"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/claude-3-opus.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/claude-3-opus.yaml
new file mode 100644
index 00000000..68a92219
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/claude-3-opus.yaml
@@ -0,0 +1,39 @@
+model: anthropic/claude-3-opus
+label:
+ en_US: claude-3-opus
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 200000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 4096
+ min: 1
+ max: 4096
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: "15.00"
+ output: "75.00"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/claude-3-sonnet.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/claude-3-sonnet.yaml
new file mode 100644
index 00000000..ede88459
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/claude-3-sonnet.yaml
@@ -0,0 +1,39 @@
+model: anthropic/claude-3-sonnet
+label:
+ en_US: claude-3-sonnet
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 200000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 4096
+ min: 1
+ max: 4096
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: "3.00"
+ output: "15.00"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/command-r-plus.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/command-r-plus.yaml
new file mode 100644
index 00000000..a23eb269
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/command-r-plus.yaml
@@ -0,0 +1,45 @@
+model: cohere/command-r-plus
+label:
+ en_US: command-r-plus
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ max: 5.0
+ - name: top_p
+ use_template: top_p
+ default: 0.75
+ min: 0.01
+ max: 0.99
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ default: 0
+ min: 0
+ max: 500
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ max: 4096
+pricing:
+ input: "3"
+ output: "15"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/command-r.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/command-r.yaml
new file mode 100644
index 00000000..7165bf29
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/command-r.yaml
@@ -0,0 +1,45 @@
+model: cohere/command-r
+label:
+ en_US: command-r
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ max: 5.0
+ - name: top_p
+ use_template: top_p
+ default: 0.75
+ min: 0.01
+ max: 0.99
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ default: 0
+ min: 0
+ max: 500
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ max: 4096
+pricing:
+ input: "0.5"
+ output: "1.5"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/deepseek-chat.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/deepseek-chat.yaml
new file mode 100644
index 00000000..7a1dea69
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/deepseek-chat.yaml
@@ -0,0 +1,50 @@
+model: deepseek/deepseek-chat
+label:
+ en_US: deepseek-chat
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 1
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 控制生成结果的多样性和随机性。数值越小,越严谨;数值越大,越发散。
+ en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 4096
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 1
+ min: 0.01
+ max: 1.00
+ help:
+ zh_Hans: 控制生成结果的随机性。数值越小,随机性越弱;数值越大,随机性越强。一般而言,top_p 和 temperature 两个参数选择一个进行调整即可。
+ en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ default: 0
+ min: -2.0
+ max: 2.0
+ help:
+ zh_Hans: 介于 -2.0 和 2.0 之间的数字。如果该值为正,那么新 token 会根据其在已有文本中的出现频率受到相应的惩罚,降低模型重复相同内容的可能性。
+ en_US: A number between -2.0 and 2.0. If the value is positive, new tokens are penalized based on their frequency of occurrence in existing text, reducing the likelihood that the model will repeat the same content.
+pricing:
+ input: "0.14"
+ output: "0.28"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/deepseek-coder.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/deepseek-coder.yaml
new file mode 100644
index 00000000..c05f4769
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/deepseek-coder.yaml
@@ -0,0 +1,30 @@
+model: deepseek/deepseek-coder
+label:
+ en_US: deepseek-coder
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 1
+ default: 0.5
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 4096
+ default: 1024
+pricing:
+ input: "0.14"
+ output: "0.28"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/gemini-1.5-flash.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/gemini-1.5-flash.yaml
new file mode 100644
index 00000000..0b2f329b
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/gemini-1.5-flash.yaml
@@ -0,0 +1,39 @@
+model: google/gemini-flash-1.5
+label:
+ en_US: gemini-flash-1.5
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: "0.25"
+ output: "0.75"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/gemini-1.5-pro.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/gemini-1.5-pro.yaml
new file mode 100644
index 00000000..679ce9bd
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/gemini-1.5-pro.yaml
@@ -0,0 +1,39 @@
+model: google/gemini-pro-1.5
+label:
+ en_US: gemini-pro-1.5
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: "2.5"
+ output: "7.5"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/gemini-pro.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/gemini-pro.yaml
new file mode 100644
index 00000000..9f5d96c5
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/gemini-pro.yaml
@@ -0,0 +1,38 @@
+model: google/gemini-pro
+label:
+ en_US: gemini-pro
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 30720
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 2048
+ min: 1
+ max: 2048
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: "0.125"
+ output: "0.375"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/gpt-3.5-turbo.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/gpt-3.5-turbo.yaml
new file mode 100644
index 00000000..186c1cc6
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/gpt-3.5-turbo.yaml
@@ -0,0 +1,42 @@
+model: openai/gpt-3.5-turbo
+label:
+ en_US: gpt-3.5-turbo
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 16385
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 4096
+ - name: response_format
+ label:
+ zh_Hans: 回复格式
+ en_US: Response Format
+ type: string
+ help:
+ zh_Hans: 指定模型必须输出的格式
+ en_US: specifying the format that the model must output
+ required: false
+ options:
+ - text
+ - json_object
+pricing:
+ input: "0.5"
+ output: "1.5"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/gpt-4-32k.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/gpt-4-32k.yaml
new file mode 100644
index 00000000..8c2989b3
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/gpt-4-32k.yaml
@@ -0,0 +1,57 @@
+model: openai/gpt-4-32k
+label:
+ en_US: gpt-4-32k
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 32768
+ - name: seed
+ label:
+ zh_Hans: 种子
+ en_US: Seed
+ type: int
+ help:
+ zh_Hans:
+ 如果指定,模型将尽最大努力进行确定性采样,使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性,您应该参考 system_fingerprint
+ 响应参数来监视变化。
+ en_US:
+ If specified, model will make a best effort to sample deterministically,
+ such that repeated requests with the same seed and parameters should return
+ the same result. Determinism is not guaranteed, and you should refer to the
+ system_fingerprint response parameter to monitor changes in the backend.
+ required: false
+ - name: response_format
+ label:
+ zh_Hans: 回复格式
+ en_US: Response Format
+ type: string
+ help:
+ zh_Hans: 指定模型必须输出的格式
+ en_US: specifying the format that the model must output
+ required: false
+ options:
+ - text
+ - json_object
+pricing:
+ input: "60"
+ output: "120"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/gpt-4.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/gpt-4.yaml
new file mode 100644
index 00000000..ef19d4f6
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/gpt-4.yaml
@@ -0,0 +1,57 @@
+model: openai/gpt-4
+label:
+ en_US: gpt-4
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 8192
+ - name: seed
+ label:
+ zh_Hans: 种子
+ en_US: Seed
+ type: int
+ help:
+ zh_Hans:
+ 如果指定,模型将尽最大努力进行确定性采样,使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性,您应该参考 system_fingerprint
+ 响应参数来监视变化。
+ en_US:
+ If specified, model will make a best effort to sample deterministically,
+ such that repeated requests with the same seed and parameters should return
+ the same result. Determinism is not guaranteed, and you should refer to the
+ system_fingerprint response parameter to monitor changes in the backend.
+ required: false
+ - name: response_format
+ label:
+ zh_Hans: 回复格式
+ en_US: Response Format
+ type: string
+ help:
+ zh_Hans: 指定模型必须输出的格式
+ en_US: specifying the format that the model must output
+ required: false
+ options:
+ - text
+ - json_object
+pricing:
+ input: "30"
+ output: "60"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/gpt-4o-2024-08-06.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/gpt-4o-2024-08-06.yaml
new file mode 100644
index 00000000..0be325f5
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/gpt-4o-2024-08-06.yaml
@@ -0,0 +1,44 @@
+model: gpt-4o-2024-08-06
+label:
+ zh_Hans: gpt-4o-2024-08-06
+ en_US: gpt-4o-2024-08-06
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+ - vision
+model_properties:
+ mode: chat
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 16384
+ - name: response_format
+ label:
+ zh_Hans: 回复格式
+ en_US: Response Format
+ type: string
+ help:
+ zh_Hans: 指定模型必须输出的格式
+ en_US: specifying the format that the model must output
+ required: false
+ options:
+ - text
+ - json_object
+pricing:
+ input: '2.50'
+ output: '10.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/gpt-4o-mini.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/gpt-4o-mini.yaml
new file mode 100644
index 00000000..3b1d9564
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/gpt-4o-mini.yaml
@@ -0,0 +1,43 @@
+model: openai/gpt-4o-mini
+label:
+ en_US: gpt-4o-mini
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+ - vision
+model_properties:
+ mode: chat
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 16384
+ - name: response_format
+ label:
+ zh_Hans: 回复格式
+ en_US: Response Format
+ type: string
+ help:
+ zh_Hans: 指定模型必须输出的格式
+ en_US: specifying the format that the model must output
+ required: false
+ options:
+ - text
+ - json_object
+pricing:
+ input: "0.15"
+ output: "0.60"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/gpt-4o.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/gpt-4o.yaml
new file mode 100644
index 00000000..a8c97efd
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/gpt-4o.yaml
@@ -0,0 +1,43 @@
+model: openai/gpt-4o
+label:
+ en_US: gpt-4o
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+ - vision
+model_properties:
+ mode: chat
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 4096
+ - name: response_format
+ label:
+ zh_Hans: 回复格式
+ en_US: Response Format
+ type: string
+ help:
+ zh_Hans: 指定模型必须输出的格式
+ en_US: specifying the format that the model must output
+ required: false
+ options:
+ - text
+ - json_object
+pricing:
+ input: "5.00"
+ output: "15.00"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/llama-3-70b-instruct.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/llama-3-70b-instruct.yaml
new file mode 100644
index 00000000..b91c39e7
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/llama-3-70b-instruct.yaml
@@ -0,0 +1,23 @@
+model: meta-llama/llama-3-70b-instruct
+label:
+ en_US: llama-3-70b-instruct
+model_type: llm
+model_properties:
+ mode: completion
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 512
+ min: 1
+ max: 2048
+pricing:
+ input: "0.59"
+ output: "0.79"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/llama-3-8b-instruct.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/llama-3-8b-instruct.yaml
new file mode 100644
index 00000000..84b2c7fa
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/llama-3-8b-instruct.yaml
@@ -0,0 +1,23 @@
+model: meta-llama/llama-3-8b-instruct
+label:
+ en_US: llama-3-8b-instruct
+model_type: llm
+model_properties:
+ mode: completion
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 512
+ min: 1
+ max: 2048
+pricing:
+ input: "0.07"
+ output: "0.07"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/llama-3.1-405b-instruct.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/llama-3.1-405b-instruct.yaml
new file mode 100644
index 00000000..a489ce1b
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/llama-3.1-405b-instruct.yaml
@@ -0,0 +1,23 @@
+model: meta-llama/llama-3.1-405b-instruct
+label:
+ en_US: llama-3.1-405b-instruct
+model_type: llm
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 512
+ min: 1
+ max: 131072
+pricing:
+ input: "2.7"
+ output: "2.7"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/llama-3.1-70b-instruct.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/llama-3.1-70b-instruct.yaml
new file mode 100644
index 00000000..12037411
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/llama-3.1-70b-instruct.yaml
@@ -0,0 +1,23 @@
+model: meta-llama/llama-3.1-70b-instruct
+label:
+ en_US: llama-3.1-70b-instruct
+model_type: llm
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 512
+ min: 1
+ max: 131072
+pricing:
+ input: "0.52"
+ output: "0.75"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/llama-3.1-8b-instruct.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/llama-3.1-8b-instruct.yaml
new file mode 100644
index 00000000..6f06493f
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/llama-3.1-8b-instruct.yaml
@@ -0,0 +1,23 @@
+model: meta-llama/llama-3.1-8b-instruct
+label:
+ en_US: llama-3.1-8b-instruct
+model_type: llm
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 512
+ min: 1
+ max: 131072
+pricing:
+ input: "0.06"
+ output: "0.06"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/mistral-7b-instruct.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/mistral-7b-instruct.yaml
new file mode 100644
index 00000000..012dfc55
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/mistral-7b-instruct.yaml
@@ -0,0 +1,30 @@
+model: mistralai/mistral-7b-instruct
+label:
+ en_US: mistral-7b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: completion
+ context_size: 8000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.7
+ min: 0
+ max: 1
+ - name: top_p
+ use_template: top_p
+ default: 1
+ min: 0
+ max: 1
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 2048
+pricing:
+ input: "0.07"
+ output: "0.07"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/mixtral-8x22b-instruct.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/mixtral-8x22b-instruct.yaml
new file mode 100644
index 00000000..f4eb4e45
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/mixtral-8x22b-instruct.yaml
@@ -0,0 +1,30 @@
+model: mistralai/mixtral-8x22b-instruct
+label:
+ en_US: mixtral-8x22b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: completion
+ context_size: 64000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.7
+ min: 0
+ max: 1
+ - name: top_p
+ use_template: top_p
+ default: 1
+ min: 0
+ max: 1
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 8000
+pricing:
+ input: "0.65"
+ output: "0.65"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/mixtral-8x7b-instruct.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/mixtral-8x7b-instruct.yaml
new file mode 100644
index 00000000..7871e1f7
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/mixtral-8x7b-instruct.yaml
@@ -0,0 +1,31 @@
+model: mistralai/mixtral-8x7b-instruct
+label:
+ zh_Hans: mixtral-8x7b-instruct
+ en_US: mixtral-8x7b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: completion
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.7
+ min: 0
+ max: 1
+ - name: top_p
+ use_template: top_p
+ default: 1
+ min: 0
+ max: 1
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 8000
+pricing:
+ input: "0.24"
+ output: "0.24"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/o1-mini.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/o1-mini.yaml
new file mode 100644
index 00000000..85a918ff
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/o1-mini.yaml
@@ -0,0 +1,40 @@
+model: openai/o1-mini
+label:
+ en_US: o1-mini
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 65536
+ - name: response_format
+ label:
+ zh_Hans: 回复格式
+ en_US: response_format
+ type: string
+ help:
+ zh_Hans: 指定模型必须输出的格式
+ en_US: specifying the format that the model must output
+ required: false
+ options:
+ - text
+ - json_object
+pricing:
+ input: "3.00"
+ output: "12.00"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/o1-preview.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/o1-preview.yaml
new file mode 100644
index 00000000..74b0a511
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/o1-preview.yaml
@@ -0,0 +1,40 @@
+model: openai/o1-preview
+label:
+ en_US: o1-preview
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 32768
+ - name: response_format
+ label:
+ zh_Hans: 回复格式
+ en_US: response_format
+ type: string
+ help:
+ zh_Hans: 指定模型必须输出的格式
+ en_US: specifying the format that the model must output
+ required: false
+ options:
+ - text
+ - json_object
+pricing:
+ input: "15.00"
+ output: "60.00"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/llm/qwen2-72b-instruct.yaml b/ai-provider/model-runtime/model-providers/openrouter/llm/qwen2-72b-instruct.yaml
new file mode 100644
index 00000000..7b75fcb0
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/llm/qwen2-72b-instruct.yaml
@@ -0,0 +1,30 @@
+model: qwen/qwen-2-72b-instruct
+label:
+ en_US: qwen-2-72b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: completion
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: "0.59"
+ output: "0.79"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/openrouter/openrouter.yaml b/ai-provider/model-runtime/model-providers/openrouter/openrouter.yaml
new file mode 100644
index 00000000..6268cf07
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/openrouter/openrouter.yaml
@@ -0,0 +1,106 @@
+provider: openrouter
+label:
+ en_US: OpenRouter
+icon_small:
+ en_US: openrouter_square.svg
+icon_large:
+ en_US: openrouter.svg
+background: "#F1EFED"
+help:
+ title:
+ en_US: Get your API key from openrouter.ai
+ zh_Hans: 从 openrouter.ai 获取 API Key
+ url:
+ en_US: https://openrouter.ai/keys
+supported_model_types:
+ - llm
+configurate_methods:
+ - predefined-model
+ - customizable-model
+model_credential_schema:
+ model:
+ label:
+ en_US: Model Name
+ zh_Hans: 模型名称
+ placeholder:
+ en_US: Enter full model name
+ zh_Hans: 输入模型全称
+ credential_form_schemas:
+ - variable: api_key
+ required: true
+ label:
+ en_US: API Key
+ type: secret-input
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
+ - variable: mode
+ show_on:
+ - variable: __model_type
+ value: llm
+ label:
+ en_US: Completion mode
+ type: select
+ required: false
+ default: chat
+ placeholder:
+ zh_Hans: 选择对话类型
+ en_US: Select completion mode
+ options:
+ - value: completion
+ label:
+ en_US: Completion
+ zh_Hans: 补全
+ - value: chat
+ label:
+ en_US: Chat
+ zh_Hans: 对话
+ - variable: context_size
+ label:
+ zh_Hans: 模型上下文长度
+ en_US: Model context size
+ required: true
+ type: text-input
+ default: "4096"
+ placeholder:
+ zh_Hans: 在此输入您的模型上下文长度
+ en_US: Enter your Model context size
+ - variable: max_tokens_to_sample
+ label:
+ zh_Hans: 最大 token 上限
+ en_US: Upper bound for max tokens
+ show_on:
+ - variable: __model_type
+ value: llm
+ default: "4096"
+ type: text-input
+ - variable: vision_support
+ show_on:
+ - variable: __model_type
+ value: llm
+ label:
+ zh_Hans: 是否支持 Vision
+ en_US: Vision Support
+ type: radio
+ required: false
+ default: "no_support"
+ options:
+ - value: "support"
+ label:
+ en_US: "Yes"
+ zh_Hans: 是
+ - value: "no_support"
+ label:
+ en_US: "No"
+ zh_Hans: 否
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: api_key
+ required: true
+ label:
+ en_US: API Key
+ type: secret-input
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
+address: https://api.openai.com
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/perfxcloud/assets/icon_l_en.svg b/ai-provider/model-runtime/model-providers/perfxcloud/assets/icon_l_en.svg
new file mode 100644
index 00000000..060d9de3
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/perfxcloud/assets/icon_l_en.svg
@@ -0,0 +1,8 @@
+
+
+
diff --git a/ai-provider/model-runtime/model-providers/perfxcloud/assets/icon_s_en.svg b/ai-provider/model-runtime/model-providers/perfxcloud/assets/icon_s_en.svg
new file mode 100644
index 00000000..be0c2eeb
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/perfxcloud/assets/icon_s_en.svg
@@ -0,0 +1,8 @@
+
+
+
diff --git a/ai-provider/model-runtime/model-providers/perfxcloud/llm/Llama3-Chinese_v2.yaml b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Llama3-Chinese_v2.yaml
new file mode 100644
index 00000000..bf91468f
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Llama3-Chinese_v2.yaml
@@ -0,0 +1,62 @@
+model: Llama3-Chinese_v2
+label:
+ en_US: Llama3-Chinese_v2
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.5
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 1248
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/perfxcloud/llm/Meta-Llama-3-70B-Instruct-GPTQ-Int4.yaml b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Meta-Llama-3-70B-Instruct-GPTQ-Int4.yaml
new file mode 100644
index 00000000..781b837e
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Meta-Llama-3-70B-Instruct-GPTQ-Int4.yaml
@@ -0,0 +1,62 @@
+model: Meta-Llama-3-70B-Instruct-GPTQ-Int4
+label:
+ en_US: Meta-Llama-3-70B-Instruct-GPTQ-Int4
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 1024
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.5
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 1248
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/perfxcloud/llm/Meta-Llama-3-8B-Instruct.yaml b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Meta-Llama-3-8B-Instruct.yaml
new file mode 100644
index 00000000..67210e90
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Meta-Llama-3-8B-Instruct.yaml
@@ -0,0 +1,62 @@
+model: Meta-Llama-3-8B-Instruct
+label:
+ en_US: Meta-Llama-3-8B-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.5
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 1248
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/perfxcloud/llm/Meta-Llama-3.1-405B-Instruct-AWQ-INT4.yaml b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Meta-Llama-3.1-405B-Instruct-AWQ-INT4.yaml
new file mode 100644
index 00000000..482632ff
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Meta-Llama-3.1-405B-Instruct-AWQ-INT4.yaml
@@ -0,0 +1,62 @@
+model: Meta-Llama-3.1-405B-Instruct-AWQ-INT4
+label:
+ en_US: Meta-Llama-3.1-405B-Instruct-AWQ-INT4
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 410960
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.5
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 1248
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/perfxcloud/llm/Meta-Llama-3.1-8B-Instruct.yaml b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Meta-Llama-3.1-8B-Instruct.yaml
new file mode 100644
index 00000000..bbab4634
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Meta-Llama-3.1-8B-Instruct.yaml
@@ -0,0 +1,61 @@
+model: Meta-Llama-3.1-8B-Instruct
+label:
+ en_US: Meta-Llama-3.1-8B-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.1
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 1248
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen-14B-Chat-Int4.yaml b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen-14B-Chat-Int4.yaml
new file mode 100644
index 00000000..ec6d9bcc
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen-14B-Chat-Int4.yaml
@@ -0,0 +1,62 @@
+model: Qwen-14B-Chat-Int4
+label:
+ en_US: Qwen-14B-Chat-Int4
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 1248
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen1.5-110B-Chat-GPTQ-Int4.yaml b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen1.5-110B-Chat-GPTQ-Int4.yaml
new file mode 100644
index 00000000..b561a530
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen1.5-110B-Chat-GPTQ-Int4.yaml
@@ -0,0 +1,62 @@
+model: Qwen1.5-110B-Chat-GPTQ-Int4
+label:
+ en_US: Qwen1.5-110B-Chat-GPTQ-Int4
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 128
+ min: 1
+ max: 256
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen1.5-72B-Chat-GPTQ-Int4.yaml b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen1.5-72B-Chat-GPTQ-Int4.yaml
new file mode 100644
index 00000000..ddb6fd97
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen1.5-72B-Chat-GPTQ-Int4.yaml
@@ -0,0 +1,62 @@
+model: Qwen1.5-72B-Chat-GPTQ-Int4
+label:
+ en_US: Qwen1.5-72B-Chat-GPTQ-Int4
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 2048
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen1.5-7B.yaml b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen1.5-7B.yaml
new file mode 100644
index 00000000..024c79db
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen1.5-7B.yaml
@@ -0,0 +1,62 @@
+model: Qwen1.5-7B
+label:
+ en_US: Qwen1.5-7B
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: completion
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen2-72B-Instruct-AWQ-int4.yaml b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen2-72B-Instruct-AWQ-int4.yaml
new file mode 100644
index 00000000..94f661f4
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen2-72B-Instruct-AWQ-int4.yaml
@@ -0,0 +1,61 @@
+model: Qwen2-72B-Instruct-AWQ-int4
+label:
+ en_US: Qwen2-72B-Instruct-AWQ-int4
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.5
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 1248
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen2-72B-Instruct-GPTQ-Int4.yaml b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen2-72B-Instruct-GPTQ-Int4.yaml
new file mode 100644
index 00000000..a06f8d5a
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen2-72B-Instruct-GPTQ-Int4.yaml
@@ -0,0 +1,64 @@
+model: Qwen2-72B-Instruct-GPTQ-Int4
+label:
+ en_US: Qwen2-72B-Instruct-GPTQ-Int4
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 2048
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.7
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen2-72B-Instruct.yaml b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen2-72B-Instruct.yaml
new file mode 100644
index 00000000..cea65602
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen2-72B-Instruct.yaml
@@ -0,0 +1,61 @@
+model: Qwen2-72B-Instruct
+label:
+ en_US: Qwen2-72B-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.5
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 1248
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen2-7B-Instruct.yaml b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen2-7B-Instruct.yaml
new file mode 100644
index 00000000..43694113
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen2-7B-Instruct.yaml
@@ -0,0 +1,63 @@
+model: Qwen2-7B-Instruct
+label:
+ en_US: Qwen2-7B-Instruct
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: completion
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen2-7B.yaml b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen2-7B.yaml
new file mode 100644
index 00000000..d549ecd2
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen2-7B.yaml
@@ -0,0 +1,64 @@
+model: Qwen2-7B
+label:
+ en_US: Qwen2-7B
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: completion
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen2.5-72B-Instruct.yaml b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen2.5-72B-Instruct.yaml
new file mode 100644
index 00000000..15cbf01f
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen2.5-72B-Instruct.yaml
@@ -0,0 +1,61 @@
+model: Qwen2.5-72B-Instruct
+label:
+ en_US: Qwen2.5-72B-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 30720
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.5
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 1248
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen2.5-7B-Instruct.yaml b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen2.5-7B-Instruct.yaml
new file mode 100644
index 00000000..dadc8f8f
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Qwen2.5-7B-Instruct.yaml
@@ -0,0 +1,61 @@
+model: Qwen2.5-7B-Instruct
+label:
+ en_US: Qwen2.5-7B-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.5
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 1248
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/perfxcloud/llm/Reflection-Llama-3.1-70B.yaml b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Reflection-Llama-3.1-70B.yaml
new file mode 100644
index 00000000..649be20b
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Reflection-Llama-3.1-70B.yaml
@@ -0,0 +1,61 @@
+model: Reflection-Llama-3.1-70B
+label:
+ en_US: Reflection-Llama-3.1-70B
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 10240
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.5
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 1248
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/perfxcloud/llm/Yi-1_5-9B-Chat-16K.yaml b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Yi-1_5-9B-Chat-16K.yaml
new file mode 100644
index 00000000..92eae680
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Yi-1_5-9B-Chat-16K.yaml
@@ -0,0 +1,61 @@
+model: Yi-1_5-9B-Chat-16K
+label:
+ en_US: Yi-1_5-9B-Chat-16K
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 16384
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.5
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 1248
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/perfxcloud/llm/Yi-Coder-1.5B-Chat.yaml b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Yi-Coder-1.5B-Chat.yaml
new file mode 100644
index 00000000..0e21ce14
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Yi-Coder-1.5B-Chat.yaml
@@ -0,0 +1,61 @@
+model: Yi-Coder-1.5B-Chat
+label:
+ en_US: Yi-Coder-1.5B-Chat
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 20480
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.5
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 1248
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/perfxcloud/llm/Yi-Coder-9B-Chat.yaml b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Yi-Coder-9B-Chat.yaml
new file mode 100644
index 00000000..23b0841c
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/perfxcloud/llm/Yi-Coder-9B-Chat.yaml
@@ -0,0 +1,61 @@
+model: Yi-Coder-9B-Chat
+label:
+ en_US: Yi-Coder-9B-Chat
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 20480
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.5
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 1248
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/perfxcloud/llm/chatglm3-6b.yaml b/ai-provider/model-runtime/model-providers/perfxcloud/llm/chatglm3-6b.yaml
new file mode 100644
index 00000000..75d80f78
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/perfxcloud/llm/chatglm3-6b.yaml
@@ -0,0 +1,62 @@
+model: chatglm3-6b
+label:
+ en_US: chatglm3-6b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.5
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 1248
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/perfxcloud/llm/deepseek-v2-chat.yaml b/ai-provider/model-runtime/model-providers/perfxcloud/llm/deepseek-v2-chat.yaml
new file mode 100644
index 00000000..fa9a7b71
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/perfxcloud/llm/deepseek-v2-chat.yaml
@@ -0,0 +1,62 @@
+model: deepseek-v2-chat
+label:
+ en_US: deepseek-v2-chat
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.5
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 1248
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/perfxcloud/llm/deepseek-v2-lite-chat.yaml b/ai-provider/model-runtime/model-providers/perfxcloud/llm/deepseek-v2-lite-chat.yaml
new file mode 100644
index 00000000..75a26d25
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/perfxcloud/llm/deepseek-v2-lite-chat.yaml
@@ -0,0 +1,62 @@
+model: deepseek-v2-lite-chat
+label:
+ en_US: deepseek-v2-lite-chat
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 2048
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.5
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 600
+ min: 1
+ max: 1248
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+ input: "0.000"
+ output: "0.000"
+ unit: "0.000"
+ currency: RMB
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/perfxcloud/perfxcloud.yaml b/ai-provider/model-runtime/model-providers/perfxcloud/perfxcloud.yaml
new file mode 100644
index 00000000..e1aaeadf
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/perfxcloud/perfxcloud.yaml
@@ -0,0 +1,43 @@
+provider: perfxcloud
+label:
+ en_US: PerfXCloud
+ zh_Hans: PerfXCloud
+description:
+ en_US: PerfXCloud (Pengfeng Technology) is an AI development and deployment platform tailored for developers and enterprises, providing reasoning capabilities for multiple models.
+ zh_Hans: PerfXCloud(澎峰科技)为开发者和企业量身打造的AI开发和部署平台,提供多种模型的的推理能力。
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ en_US: icon_l_en.svg
+background: "#e3f0ff"
+help:
+ title:
+ en_US: Get your API Key from PerfXCloud
+ zh_Hans: 从 PerfXCloud 获取 API Key
+ url:
+ en_US: https://cloud.perfxlab.cn/panel/token
+supported_model_types:
+ - llm
+ - text-embedding
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
+ - variable: endpoint_url
+ label:
+ zh_Hans: 自定义 API endpoint 地址
+ en_US: Custom API endpoint URL
+ type: text-input
+ required: false
+ placeholder:
+ zh_Hans: Base URL, e.g. https://cloud.perfxlab.cn/v1
+ en_US: Base URL, e.g. https://cloud.perfxlab.cn/v1
+address: https://api.openai.com
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/assets/siliconflow.svg b/ai-provider/model-runtime/model-providers/siliconflow/assets/siliconflow.svg
new file mode 100644
index 00000000..16e406f0
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/assets/siliconflow.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/assets/siliconflow_square.svg b/ai-provider/model-runtime/model-providers/siliconflow/assets/siliconflow_square.svg
new file mode 100644
index 00000000..ad6b384f
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/assets/siliconflow_square.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/llm/deepdeek-coder-v2-instruct.yaml b/ai-provider/model-runtime/model-providers/siliconflow/llm/deepdeek-coder-v2-instruct.yaml
new file mode 100644
index 00000000..d4431179
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/llm/deepdeek-coder-v2-instruct.yaml
@@ -0,0 +1,30 @@
+model: deepseek-ai/DeepSeek-Coder-V2-Instruct
+label:
+ en_US: deepseek-ai/DeepSeek-Coder-V2-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '1.33'
+ output: '1.33'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/llm/deepseek-v2-chat.yaml b/ai-provider/model-runtime/model-providers/siliconflow/llm/deepseek-v2-chat.yaml
new file mode 100644
index 00000000..caa6508b
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/llm/deepseek-v2-chat.yaml
@@ -0,0 +1,30 @@
+model: deepseek-ai/DeepSeek-V2-Chat
+label:
+ en_US: deepseek-ai/DeepSeek-V2-Chat
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '1.33'
+ output: '1.33'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/llm/deepseek-v2.5.yaml b/ai-provider/model-runtime/model-providers/siliconflow/llm/deepseek-v2.5.yaml
new file mode 100644
index 00000000..1c8e15ae
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/llm/deepseek-v2.5.yaml
@@ -0,0 +1,30 @@
+model: deepseek-ai/DeepSeek-V2.5
+label:
+ en_US: deepseek-ai/DeepSeek-V2.5
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '1.33'
+ output: '1.33'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/llm/gemma-2-27b-it.yaml b/ai-provider/model-runtime/model-providers/siliconflow/llm/gemma-2-27b-it.yaml
new file mode 100644
index 00000000..2840e3dc
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/llm/gemma-2-27b-it.yaml
@@ -0,0 +1,30 @@
+model: google/gemma-2-27b-it
+label:
+ en_US: google/gemma-2-27b-it
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8196
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '1.26'
+ output: '1.26'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/llm/gemma-2-9b-it.yaml b/ai-provider/model-runtime/model-providers/siliconflow/llm/gemma-2-9b-it.yaml
new file mode 100644
index 00000000..d7e19b46
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/llm/gemma-2-9b-it.yaml
@@ -0,0 +1,30 @@
+model: google/gemma-2-9b-it
+label:
+ en_US: google/gemma-2-9b-it
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8196
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '0'
+ output: '0'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/llm/glm4-9b-chat.yaml b/ai-provider/model-runtime/model-providers/siliconflow/llm/glm4-9b-chat.yaml
new file mode 100644
index 00000000..9b32a024
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/llm/glm4-9b-chat.yaml
@@ -0,0 +1,30 @@
+model: THUDM/glm-4-9b-chat
+label:
+ en_US: THUDM/glm-4-9b-chat
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '0'
+ output: '0'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/llm/internlm2_5-7b-chat.yaml b/ai-provider/model-runtime/model-providers/siliconflow/llm/internlm2_5-7b-chat.yaml
new file mode 100644
index 00000000..73ad4480
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/llm/internlm2_5-7b-chat.yaml
@@ -0,0 +1,30 @@
+model: internlm/internlm2_5-7b-chat
+label:
+ en_US: internlm/internlm2_5-7b-chat
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '0'
+ output: '0'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/llm/meta-mlama-3-70b-instruct.yaml b/ai-provider/model-runtime/model-providers/siliconflow/llm/meta-mlama-3-70b-instruct.yaml
new file mode 100644
index 00000000..9993d781
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/llm/meta-mlama-3-70b-instruct.yaml
@@ -0,0 +1,30 @@
+model: meta-llama/Meta-Llama-3-70B-Instruct
+label:
+ en_US: meta-llama/Meta-Llama-3-70B-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '4.13'
+ output: '4.13'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/llm/meta-mlama-3-8b-instruct.yaml b/ai-provider/model-runtime/model-providers/siliconflow/llm/meta-mlama-3-8b-instruct.yaml
new file mode 100644
index 00000000..60e37647
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/llm/meta-mlama-3-8b-instruct.yaml
@@ -0,0 +1,30 @@
+model: meta-llama/Meta-Llama-3-8B-Instruct
+label:
+ en_US: meta-llama/Meta-Llama-3-8B-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '0'
+ output: '0'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/llm/meta-mlama-3.1-405b-instruct.yaml b/ai-provider/model-runtime/model-providers/siliconflow/llm/meta-mlama-3.1-405b-instruct.yaml
new file mode 100644
index 00000000..f992660a
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/llm/meta-mlama-3.1-405b-instruct.yaml
@@ -0,0 +1,30 @@
+model: meta-llama/Meta-Llama-3.1-405B-Instruct
+label:
+ en_US: meta-llama/Meta-Llama-3.1-405B-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '21'
+ output: '21'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/llm/meta-mlama-3.1-70b-instruct.yaml b/ai-provider/model-runtime/model-providers/siliconflow/llm/meta-mlama-3.1-70b-instruct.yaml
new file mode 100644
index 00000000..1c69d63a
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/llm/meta-mlama-3.1-70b-instruct.yaml
@@ -0,0 +1,30 @@
+model: meta-llama/Meta-Llama-3.1-70B-Instruct
+label:
+ en_US: meta-llama/Meta-Llama-3.1-70B-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '4.13'
+ output: '4.13'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/llm/meta-mlama-3.1-8b-instruct.yaml b/ai-provider/model-runtime/model-providers/siliconflow/llm/meta-mlama-3.1-8b-instruct.yaml
new file mode 100644
index 00000000..a97002a5
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/llm/meta-mlama-3.1-8b-instruct.yaml
@@ -0,0 +1,30 @@
+model: meta-llama/Meta-Llama-3.1-8B-Instruct
+label:
+ en_US: meta-llama/Meta-Llama-3.1-8B-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '0'
+ output: '0'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml b/ai-provider/model-runtime/model-providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml
new file mode 100644
index 00000000..27664eab
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml
@@ -0,0 +1,30 @@
+model: mistralai/Mistral-7B-Instruct-v0.2
+label:
+ en_US: mistralai/Mistral-7B-Instruct-v0.2
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '0'
+ output: '0'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml b/ai-provider/model-runtime/model-providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml
new file mode 100644
index 00000000..fd7aada4
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml
@@ -0,0 +1,30 @@
+model: mistralai/Mixtral-8x7B-Instruct-v0.1
+label:
+ en_US: mistralai/Mixtral-8x7B-Instruct-v0.1
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '1.26'
+ output: '1.26'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/llm/qwen2-1.5b-instruct.yaml b/ai-provider/model-runtime/model-providers/siliconflow/llm/qwen2-1.5b-instruct.yaml
new file mode 100644
index 00000000..f6c976af
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/llm/qwen2-1.5b-instruct.yaml
@@ -0,0 +1,30 @@
+model: Qwen/Qwen2-1.5B-Instruct
+label:
+ en_US: Qwen/Qwen2-1.5B-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '0'
+ output: '0'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/llm/qwen2-57b-a14b-instruct.yaml b/ai-provider/model-runtime/model-providers/siliconflow/llm/qwen2-57b-a14b-instruct.yaml
new file mode 100644
index 00000000..a996e919
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/llm/qwen2-57b-a14b-instruct.yaml
@@ -0,0 +1,30 @@
+model: Qwen/Qwen2-57B-A14B-Instruct
+label:
+ en_US: Qwen/Qwen2-57B-A14B-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '1.26'
+ output: '1.26'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/llm/qwen2-72b-instruct.yaml b/ai-provider/model-runtime/model-providers/siliconflow/llm/qwen2-72b-instruct.yaml
new file mode 100644
index 00000000..a6e2c22d
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/llm/qwen2-72b-instruct.yaml
@@ -0,0 +1,30 @@
+model: Qwen/Qwen2-72B-Instruct
+label:
+ en_US: Qwen/Qwen2-72B-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '4.13'
+ output: '4.13'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/llm/qwen2-7b-instruct.yaml b/ai-provider/model-runtime/model-providers/siliconflow/llm/qwen2-7b-instruct.yaml
new file mode 100644
index 00000000..d8bea5e1
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/llm/qwen2-7b-instruct.yaml
@@ -0,0 +1,30 @@
+model: Qwen/Qwen2-7B-Instruct
+label:
+ en_US: Qwen/Qwen2-7B-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '0'
+ output: '0'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/llm/qwen2.5-14b-instruct.yaml b/ai-provider/model-runtime/model-providers/siliconflow/llm/qwen2.5-14b-instruct.yaml
new file mode 100644
index 00000000..02a40146
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/llm/qwen2.5-14b-instruct.yaml
@@ -0,0 +1,30 @@
+model: Qwen/Qwen2.5-14B-Instruct
+label:
+ en_US: Qwen/Qwen2.5-14B-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '0.7'
+ output: '0.7'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/llm/qwen2.5-32b-instruct.yaml b/ai-provider/model-runtime/model-providers/siliconflow/llm/qwen2.5-32b-instruct.yaml
new file mode 100644
index 00000000..d084617e
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/llm/qwen2.5-32b-instruct.yaml
@@ -0,0 +1,30 @@
+model: Qwen/Qwen2.5-32B-Instruct
+label:
+ en_US: Qwen/Qwen2.5-32B-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '1.26'
+ output: '1.26'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/llm/qwen2.5-72b-instruct.yaml b/ai-provider/model-runtime/model-providers/siliconflow/llm/qwen2.5-72b-instruct.yaml
new file mode 100644
index 00000000..dfbad249
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/llm/qwen2.5-72b-instruct.yaml
@@ -0,0 +1,30 @@
+model: Qwen/Qwen2.5-72B-Instruct
+label:
+ en_US: Qwen/Qwen2.5-72B-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '4.13'
+ output: '4.13'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/llm/qwen2.5-7b-instruct.yaml b/ai-provider/model-runtime/model-providers/siliconflow/llm/qwen2.5-7b-instruct.yaml
new file mode 100644
index 00000000..cdc8ffc4
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/llm/qwen2.5-7b-instruct.yaml
@@ -0,0 +1,30 @@
+model: Qwen/Qwen2.5-7B-Instruct
+label:
+ en_US: Qwen/Qwen2.5-7B-Instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '0'
+ output: '0'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/llm/yi-1.5-34b-chat.yaml b/ai-provider/model-runtime/model-providers/siliconflow/llm/yi-1.5-34b-chat.yaml
new file mode 100644
index 00000000..864ba46f
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/llm/yi-1.5-34b-chat.yaml
@@ -0,0 +1,30 @@
+model: 01-ai/Yi-1.5-34B-Chat
+label:
+ en_US: 01-ai/Yi-1.5-34B-Chat-16K
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 16384
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '1.26'
+ output: '1.26'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/llm/yi-1.5-6b-chat.yaml b/ai-provider/model-runtime/model-providers/siliconflow/llm/yi-1.5-6b-chat.yaml
new file mode 100644
index 00000000..fe4c8b4b
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/llm/yi-1.5-6b-chat.yaml
@@ -0,0 +1,30 @@
+model: 01-ai/Yi-1.5-6B-Chat
+label:
+ en_US: 01-ai/Yi-1.5-6B-Chat
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '0'
+ output: '0'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/llm/yi-1.5-9b-chat.yaml b/ai-provider/model-runtime/model-providers/siliconflow/llm/yi-1.5-9b-chat.yaml
new file mode 100644
index 00000000..c61f0dc5
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/llm/yi-1.5-9b-chat.yaml
@@ -0,0 +1,30 @@
+model: 01-ai/Yi-1.5-9B-Chat-16K
+label:
+ en_US: 01-ai/Yi-1.5-9B-Chat-16K
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 16384
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ - name: frequency_penalty
+ use_template: frequency_penalty
+pricing:
+ input: '0'
+ output: '0'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/siliconflow/siliconflow.yaml b/ai-provider/model-runtime/model-providers/siliconflow/siliconflow.yaml
new file mode 100644
index 00000000..f9c932f4
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/siliconflow/siliconflow.yaml
@@ -0,0 +1,33 @@
+provider: siliconflow
+label:
+ zh_Hans: 硅基流动
+ en_US: SiliconFlow
+icon_small:
+ en_US: siliconflow_square.svg
+icon_large:
+ en_US: siliconflow.svg
+background: "#ffecff"
+help:
+ title:
+ en_US: Get your API Key from SiliconFlow
+ zh_Hans: 从 SiliconFlow 获取 API Key
+ url:
+ en_US: https://cloud.siliconflow.cn/account/ak
+supported_model_types:
+ - llm
+ - text-embedding
+ - rerank
+ - speech2text
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
+address: https://api.openai.com
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/spark/assets/icon_l_en.svg b/ai-provider/model-runtime/model-providers/spark/assets/icon_l_en.svg
new file mode 100644
index 00000000..521c68ca
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/spark/assets/icon_l_en.svg
@@ -0,0 +1,24 @@
+
diff --git a/ai-provider/model-runtime/model-providers/spark/assets/icon_l_zh.svg b/ai-provider/model-runtime/model-providers/spark/assets/icon_l_zh.svg
new file mode 100644
index 00000000..71d85216
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/spark/assets/icon_l_zh.svg
@@ -0,0 +1,11 @@
+
diff --git a/ai-provider/model-runtime/model-providers/spark/assets/icon_s_en.svg b/ai-provider/model-runtime/model-providers/spark/assets/icon_s_en.svg
new file mode 100644
index 00000000..ef0a9131
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/spark/assets/icon_s_en.svg
@@ -0,0 +1,5 @@
+
diff --git a/ai-provider/model-runtime/model-providers/spark/llm/spark-1.5.yaml b/ai-provider/model-runtime/model-providers/spark/llm/spark-1.5.yaml
new file mode 100644
index 00000000..fcd65c24
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/spark/llm/spark-1.5.yaml
@@ -0,0 +1,34 @@
+model: spark-1.5
+deprecated: true
+label:
+ en_US: Spark V1.5
+model_type: llm
+model_properties:
+ mode: chat
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.5
+ help:
+ zh_Hans: 核采样阈值。用于决定结果随机性,取值越高随机性越强即相同的问题得到的不同答案的可能性越高。
+ en_US: Kernel sampling threshold. Used to determine the randomness of the results. The higher the value, the stronger the randomness, that is, the higher the possibility of getting different answers to the same question.
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 模型回答的tokens的最大长度。
+ en_US: 模型回答的tokens的最大长度。
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ default: 4
+ min: 1
+ max: 6
+ help:
+ zh_Hans: 从 k 个候选中随机选择一个(非等概率)。
+ en_US: Randomly select one from k candidates (non-equal probability).
+ required: false
diff --git a/ai-provider/model-runtime/model-providers/spark/llm/spark-2.yaml b/ai-provider/model-runtime/model-providers/spark/llm/spark-2.yaml
new file mode 100644
index 00000000..2db6805a
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/spark/llm/spark-2.yaml
@@ -0,0 +1,34 @@
+model: spark-2
+deprecated: true
+label:
+ en_US: Spark V2.0
+model_type: llm
+model_properties:
+ mode: chat
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.5
+ help:
+ zh_Hans: 核采样阈值。用于决定结果随机性,取值越高随机性越强即相同的问题得到的不同答案的可能性越高。
+ en_US: Kernel sampling threshold. Used to determine the randomness of the results. The higher the value, the stronger the randomness, that is, the higher the possibility of getting different answers to the same question.
+ - name: max_tokens
+ use_template: max_tokens
+ default: 2048
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 模型回答的tokens的最大长度。
+ en_US: 模型回答的tokens的最大长度。
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ default: 4
+ min: 1
+ max: 6
+ help:
+ zh_Hans: 从 k 个候选中随机选择一个(非等概率)。
+ en_US: Randomly select one from k candidates (non-equal probability).
+ required: false
diff --git a/ai-provider/model-runtime/model-providers/spark/llm/spark-3.5.yaml b/ai-provider/model-runtime/model-providers/spark/llm/spark-3.5.yaml
new file mode 100644
index 00000000..86617a53
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/spark/llm/spark-3.5.yaml
@@ -0,0 +1,34 @@
+model: spark-3.5
+deprecated: true
+label:
+ en_US: Spark V3.5
+model_type: llm
+model_properties:
+ mode: chat
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.5
+ help:
+ zh_Hans: 核采样阈值。用于决定结果随机性,取值越高随机性越强即相同的问题得到的不同答案的可能性越高。
+ en_US: Kernel sampling threshold. Used to determine the randomness of the results. The higher the value, the stronger the randomness, that is, the higher the possibility of getting different answers to the same question.
+ - name: max_tokens
+ use_template: max_tokens
+ default: 2048
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 模型回答的tokens的最大长度。
+ en_US: 模型回答的tokens的最大长度。
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ default: 4
+ min: 1
+ max: 6
+ help:
+ zh_Hans: 从 k 个候选中随机选择一个(非等概率)。
+ en_US: Randomly select one from k candidates (non-equal probability).
+ required: false
diff --git a/ai-provider/model-runtime/model-providers/spark/llm/spark-3.yaml b/ai-provider/model-runtime/model-providers/spark/llm/spark-3.yaml
new file mode 100644
index 00000000..9f296c68
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/spark/llm/spark-3.yaml
@@ -0,0 +1,34 @@
+model: spark-3
+deprecated: true
+label:
+ en_US: Spark V3.0
+model_type: llm
+model_properties:
+ mode: chat
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.5
+ help:
+ zh_Hans: 核采样阈值。用于决定结果随机性,取值越高随机性越强即相同的问题得到的不同答案的可能性越高。
+ en_US: Kernel sampling threshold. Used to determine the randomness of the results. The higher the value, the stronger the randomness, that is, the higher the possibility of getting different answers to the same question.
+ - name: max_tokens
+ use_template: max_tokens
+ default: 2048
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 模型回答的tokens的最大长度。
+ en_US: 模型回答的tokens的最大长度。
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ default: 4
+ min: 1
+ max: 6
+ help:
+ zh_Hans: 从 k 个候选中随机选择一个(非等概率)。
+ en_US: Randomly select one from k candidates (non-equal probability).
+ required: false
diff --git a/ai-provider/model-runtime/model-providers/spark/llm/spark-4.0-ultra.yaml b/ai-provider/model-runtime/model-providers/spark/llm/spark-4.0-ultra.yaml
new file mode 100644
index 00000000..bbf85764
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/spark/llm/spark-4.0-ultra.yaml
@@ -0,0 +1,42 @@
+model: spark-4.0-ultra
+label:
+ en_US: Spark 4.0 Ultra
+model_type: llm
+model_properties:
+ mode: chat
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.5
+ help:
+ zh_Hans: 核采样阈值。用于决定结果随机性,取值越高随机性越强即相同的问题得到的不同答案的可能性越高。
+ en_US: Kernel sampling threshold. Used to determine the randomness of the results. The higher the value, the stronger the randomness, that is, the higher the possibility of getting different answers to the same question.
+ - name: max_tokens
+ use_template: max_tokens
+ default: 4096
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 模型回答的tokens的最大长度。
+ en_US: Maximum length of tokens for the model response.
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ default: 4
+ min: 1
+ max: 6
+ help:
+ zh_Hans: 从 k 个候选中随机选择一个(非等概率)。
+ en_US: Randomly select one from k candidates (non-equal probability).
+ required: false
+ - name: show_ref_label
+ label:
+ zh_Hans: 联网检索
+ en_US: web search
+ type: boolean
+ default: false
+ help:
+ zh_Hans: 该参数仅4.0 Ultra版本支持,当设置为true时,如果输入内容触发联网检索插件,会先返回检索信源列表,然后再返回星火回复结果,否则仅返回星火回复结果
+ en_US: The parameter is only supported in the 4.0 Ultra version. When set to true, if the input triggers the online search plugin, it will first return a list of search sources and then return the Spark response. Otherwise, it will only return the Spark response.
diff --git a/ai-provider/model-runtime/model-providers/spark/llm/spark-4.yaml b/ai-provider/model-runtime/model-providers/spark/llm/spark-4.yaml
new file mode 100644
index 00000000..4b5529e8
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/spark/llm/spark-4.yaml
@@ -0,0 +1,34 @@
+model: spark-4
+deprecated: true
+label:
+ en_US: Spark V4.0
+model_type: llm
+model_properties:
+ mode: chat
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.5
+ help:
+ zh_Hans: 核采样阈值。用于决定结果随机性,取值越高随机性越强即相同的问题得到的不同答案的可能性越高。
+ en_US: Kernel sampling threshold. Used to determine the randomness of the results. The higher the value, the stronger the randomness, that is, the higher the possibility of getting different answers to the same question.
+ - name: max_tokens
+ use_template: max_tokens
+ default: 4096
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 模型回答的tokens的最大长度。
+ en_US: 模型回答的tokens的最大长度。
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ default: 4
+ min: 1
+ max: 6
+ help:
+ zh_Hans: 从 k 个候选中随机选择一个(非等概率)。
+ en_US: Randomly select one from k candidates (non-equal probability).
+ required: false
diff --git a/ai-provider/model-runtime/model-providers/spark/llm/spark-lite.yaml b/ai-provider/model-runtime/model-providers/spark/llm/spark-lite.yaml
new file mode 100644
index 00000000..1f6141a8
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/spark/llm/spark-lite.yaml
@@ -0,0 +1,33 @@
+model: spark-lite
+label:
+ en_US: Spark Lite
+model_type: llm
+model_properties:
+ mode: chat
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.5
+ help:
+ zh_Hans: 核采样阈值。用于决定结果随机性,取值越高随机性越强即相同的问题得到的不同答案的可能性越高。
+ en_US: Kernel sampling threshold. Used to determine the randomness of the results. The higher the value, the stronger the randomness, that is, the higher the possibility of getting different answers to the same question.
+ - name: max_tokens
+ use_template: max_tokens
+ default: 4096
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 模型回答的tokens的最大长度。
+ en_US: Maximum length of tokens for the model response.
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ default: 4
+ min: 1
+ max: 6
+ help:
+ zh_Hans: 从 k 个候选中随机选择一个(非等概率)。
+ en_US: Randomly select one from k candidates (non-equal probability).
+ required: false
diff --git a/ai-provider/model-runtime/model-providers/spark/llm/spark-max-32k.yaml b/ai-provider/model-runtime/model-providers/spark/llm/spark-max-32k.yaml
new file mode 100644
index 00000000..1a1ab684
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/spark/llm/spark-max-32k.yaml
@@ -0,0 +1,33 @@
+model: spark-max-32k
+label:
+ en_US: Spark Max-32K
+model_type: llm
+model_properties:
+ mode: chat
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.5
+ help:
+ zh_Hans: 核采样阈值。用于决定结果随机性,取值越高随机性越强即相同的问题得到的不同答案的可能性越高。
+ en_US: Kernel sampling threshold. Used to determine the randomness of the results. The higher the value, the stronger the randomness, that is, the higher the possibility of getting different answers to the same question.
+ - name: max_tokens
+ use_template: max_tokens
+ default: 4096
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 模型回答的tokens的最大长度。
+ en_US: Maximum length of tokens for the model response.
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ default: 4
+ min: 1
+ max: 6
+ help:
+ zh_Hans: 从 k 个候选中随机选择一个(非等概率)。
+ en_US: Randomly select one from k candidates (non-equal probability).
+ required: false
diff --git a/ai-provider/model-runtime/model-providers/spark/llm/spark-max.yaml b/ai-provider/model-runtime/model-providers/spark/llm/spark-max.yaml
new file mode 100644
index 00000000..71eb2b86
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/spark/llm/spark-max.yaml
@@ -0,0 +1,33 @@
+model: spark-max
+label:
+ en_US: Spark Max
+model_type: llm
+model_properties:
+ mode: chat
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.5
+ help:
+ zh_Hans: 核采样阈值。用于决定结果随机性,取值越高随机性越强即相同的问题得到的不同答案的可能性越高。
+ en_US: Kernel sampling threshold. Used to determine the randomness of the results. The higher the value, the stronger the randomness, that is, the higher the possibility of getting different answers to the same question.
+ - name: max_tokens
+ use_template: max_tokens
+ default: 4096
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 模型回答的tokens的最大长度。
+ en_US: Maximum length of tokens for the model response.
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ default: 4
+ min: 1
+ max: 6
+ help:
+ zh_Hans: 从 k 个候选中随机选择一个(非等概率)。
+ en_US: Randomly select one from k candidates (non-equal probability).
+ required: false
diff --git a/ai-provider/model-runtime/model-providers/spark/llm/spark-pro-128k.yaml b/ai-provider/model-runtime/model-providers/spark/llm/spark-pro-128k.yaml
new file mode 100644
index 00000000..da1fead6
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/spark/llm/spark-pro-128k.yaml
@@ -0,0 +1,33 @@
+model: spark-pro-128k
+label:
+ en_US: Spark Pro-128K
+model_type: llm
+model_properties:
+ mode: chat
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.5
+ help:
+ zh_Hans: 核采样阈值。用于决定结果随机性,取值越高随机性越强即相同的问题得到的不同答案的可能性越高。
+ en_US: Kernel sampling threshold. Used to determine the randomness of the results. The higher the value, the stronger the randomness, that is, the higher the possibility of getting different answers to the same question.
+ - name: max_tokens
+ use_template: max_tokens
+ default: 4096
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 模型回答的tokens的最大长度。
+ en_US: Maximum length of tokens for the model response.
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ default: 4
+ min: 1
+ max: 6
+ help:
+ zh_Hans: 从 k 个候选中随机选择一个(非等概率)。
+ en_US: Randomly select one from k candidates (non-equal probability).
+ required: false
diff --git a/ai-provider/model-runtime/model-providers/spark/llm/spark-pro.yaml b/ai-provider/model-runtime/model-providers/spark/llm/spark-pro.yaml
new file mode 100644
index 00000000..9ee479f1
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/spark/llm/spark-pro.yaml
@@ -0,0 +1,33 @@
+model: spark-pro
+label:
+ en_US: Spark Pro
+model_type: llm
+model_properties:
+ mode: chat
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.5
+ help:
+ zh_Hans: 核采样阈值。用于决定结果随机性,取值越高随机性越强即相同的问题得到的不同答案的可能性越高。
+ en_US: Kernel sampling threshold. Used to determine the randomness of the results. The higher the value, the stronger the randomness, that is, the higher the possibility of getting different answers to the same question.
+ - name: max_tokens
+ use_template: max_tokens
+ default: 4096
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 模型回答的tokens的最大长度。
+ en_US: Maximum length of tokens for the model response.
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ default: 4
+ min: 1
+ max: 6
+ help:
+ zh_Hans: 从 k 个候选中随机选择一个(非等概率)。
+ en_US: Randomly select one from k candidates (non-equal probability).
+ required: false
diff --git a/ai-provider/model-runtime/model-providers/spark/spark.yaml b/ai-provider/model-runtime/model-providers/spark/spark.yaml
new file mode 100644
index 00000000..d09db7aa
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/spark/spark.yaml
@@ -0,0 +1,47 @@
+provider: spark
+label:
+ zh_Hans: 讯飞星火
+ en_US: iFLYTEK SPARK
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ zh_Hans: icon_l_zh.svg
+ en_US: icon_l_en.svg
+background: "#EBF8FF"
+help:
+ title:
+ en_US: Get your API key from iFLYTEK SPARK
+ zh_Hans: 从讯飞星火获取 API Keys
+ url:
+ en_US: https://www.xfyun.cn/solutions/xinghuoAPI
+supported_model_types:
+ - llm
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: app_id
+ label:
+ en_US: APPID
+ type: text-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 APPID
+ en_US: Enter your APPID
+ - variable: api_secret
+ label:
+ en_US: APISecret
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 APISecret
+ en_US: Enter your APISecret
+ - variable: api_key
+ label:
+ en_US: APIKey
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 APIKey
+ en_US: Enter your APIKey
+address: https://api.openai.com
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/stepfun/assets/icon_l_en.svg b/ai-provider/model-runtime/model-providers/stepfun/assets/icon_l_en.svg
new file mode 100644
index 00000000..f63f4fff
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/stepfun/assets/icon_l_en.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/stepfun/assets/icon_s_en.svg b/ai-provider/model-runtime/model-providers/stepfun/assets/icon_s_en.svg
new file mode 100644
index 00000000..14f12419
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/stepfun/assets/icon_s_en.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/stepfun/llm/step-1-128k.yaml b/ai-provider/model-runtime/model-providers/stepfun/llm/step-1-128k.yaml
new file mode 100644
index 00000000..13f7b7fd
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/stepfun/llm/step-1-128k.yaml
@@ -0,0 +1,25 @@
+model: step-1-128k
+label:
+ zh_Hans: step-1-128k
+ en_US: step-1-128k
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 128000
+pricing:
+ input: '0.04'
+ output: '0.20'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/stepfun/llm/step-1-256k.yaml b/ai-provider/model-runtime/model-providers/stepfun/llm/step-1-256k.yaml
new file mode 100644
index 00000000..f80ec985
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/stepfun/llm/step-1-256k.yaml
@@ -0,0 +1,25 @@
+model: step-1-256k
+label:
+ zh_Hans: step-1-256k
+ en_US: step-1-256k
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 256000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 256000
+pricing:
+ input: '0.095'
+ output: '0.300'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/stepfun/llm/step-1-32k.yaml b/ai-provider/model-runtime/model-providers/stepfun/llm/step-1-32k.yaml
new file mode 100644
index 00000000..96132d14
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/stepfun/llm/step-1-32k.yaml
@@ -0,0 +1,28 @@
+model: step-1-32k
+label:
+ zh_Hans: step-1-32k
+ en_US: step-1-32k
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+ - multi-tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 32000
+pricing:
+ input: '0.015'
+ output: '0.070'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/stepfun/llm/step-1-8k.yaml b/ai-provider/model-runtime/model-providers/stepfun/llm/step-1-8k.yaml
new file mode 100644
index 00000000..4a4ba8d1
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/stepfun/llm/step-1-8k.yaml
@@ -0,0 +1,28 @@
+model: step-1-8k
+label:
+ zh_Hans: step-1-8k
+ en_US: step-1-8k
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+ - multi-tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 8000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 8000
+pricing:
+ input: '0.005'
+ output: '0.020'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/stepfun/llm/step-1-flash.yaml b/ai-provider/model-runtime/model-providers/stepfun/llm/step-1-flash.yaml
new file mode 100644
index 00000000..afb880f2
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/stepfun/llm/step-1-flash.yaml
@@ -0,0 +1,25 @@
+model: step-1-flash
+label:
+ zh_Hans: step-1-flash
+ en_US: step-1-flash
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 8000
+pricing:
+ input: '0.001'
+ output: '0.004'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/stepfun/llm/step-1v-32k.yaml b/ai-provider/model-runtime/model-providers/stepfun/llm/step-1v-32k.yaml
new file mode 100644
index 00000000..08d6ad24
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/stepfun/llm/step-1v-32k.yaml
@@ -0,0 +1,28 @@
+model: step-1v-32k
+label:
+ zh_Hans: step-1v-32k
+ en_US: step-1v-32k
+model_type: llm
+features:
+ - vision
+ - tool-call
+ - multi-tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 32000
+pricing:
+ input: '0.015'
+ output: '0.070'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/stepfun/llm/step-1v-8k.yaml b/ai-provider/model-runtime/model-providers/stepfun/llm/step-1v-8k.yaml
new file mode 100644
index 00000000..843d14d9
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/stepfun/llm/step-1v-8k.yaml
@@ -0,0 +1,28 @@
+model: step-1v-8k
+label:
+ zh_Hans: step-1v-8k
+ en_US: step-1v-8k
+model_type: llm
+features:
+ - vision
+ - tool-call
+ - multi-tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 8192
+pricing:
+ input: '0.005'
+ output: '0.020'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/stepfun/llm/step-2-16k.yaml b/ai-provider/model-runtime/model-providers/stepfun/llm/step-2-16k.yaml
new file mode 100644
index 00000000..6f2dabbf
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/stepfun/llm/step-2-16k.yaml
@@ -0,0 +1,28 @@
+model: step-2-16k
+label:
+ zh_Hans: step-2-16k
+ en_US: step-2-16k
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+ - multi-tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 16000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 16000
+pricing:
+ input: '0.038'
+ output: '0.120'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/stepfun/stepfun.yaml b/ai-provider/model-runtime/model-providers/stepfun/stepfun.yaml
new file mode 100644
index 00000000..6f49d733
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/stepfun/stepfun.yaml
@@ -0,0 +1,82 @@
+provider: stepfun
+label:
+ zh_Hans: 阶跃星辰
+ en_US: Stepfun
+description:
+ en_US: Models provided by stepfun, such as step-1-8k, step-1-32k、step-1v-8k、step-1v-32k, step-1-128k and step-1-256k
+ zh_Hans: 阶跃星辰提供的模型,例如 step-1-8k、step-1-32k、step-1v-8k、step-1v-32k、step-1-128k 和 step-1-256k。
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ en_US: icon_l_en.svg
+background: "#FFFFFF"
+help:
+ title:
+ en_US: Get your API Key from stepfun
+ zh_Hans: 从 stepfun 获取 API Key
+ url:
+ en_US: https://platform.stepfun.com/interface-key
+supported_model_types:
+ - llm
+configurate_methods:
+ - predefined-model
+ - customizable-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
+model_credential_schema:
+ model:
+ label:
+ en_US: Model Name
+ zh_Hans: 模型名称
+ placeholder:
+ en_US: Enter your model name
+ zh_Hans: 输入模型名称
+ credential_form_schemas:
+ - variable: api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
+ - variable: context_size
+ label:
+ zh_Hans: 模型上下文长度
+ en_US: Model context size
+ required: true
+ type: text-input
+ default: '8192'
+ placeholder:
+ zh_Hans: 在此输入您的模型上下文长度
+ en_US: Enter your Model context size
+ - variable: max_tokens
+ label:
+ zh_Hans: 最大 token 上限
+ en_US: Upper bound for max tokens
+ default: '8192'
+ type: text-input
+ - variable: function_calling_type
+ label:
+ en_US: Function calling
+ type: select
+ required: false
+ default: no_call
+ options:
+ - value: no_call
+ label:
+ en_US: Not supported
+ zh_Hans: 不支持
+ - value: tool_call
+ label:
+ en_US: Tool Call
+ zh_Hans: Tool Call
+address: https://api.openai.com
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/tongyi/assets/icon_l_en.svg b/ai-provider/model-runtime/model-providers/tongyi/assets/icon_l_en.svg
new file mode 100644
index 00000000..2a7a4f4f
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/assets/icon_l_en.svg
@@ -0,0 +1,2 @@
+
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/tongyi/assets/icon_l_zh.svg b/ai-provider/model-runtime/model-providers/tongyi/assets/icon_l_zh.svg
new file mode 100644
index 00000000..9f650f2b
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/assets/icon_l_zh.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/tongyi/assets/icon_s_en.svg b/ai-provider/model-runtime/model-providers/tongyi/assets/icon_s_en.svg
new file mode 100644
index 00000000..851ba565
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/assets/icon_s_en.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-coder-turbo-0919.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-coder-turbo-0919.yaml
new file mode 100644
index 00000000..d9792e71
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-coder-turbo-0919.yaml
@@ -0,0 +1,74 @@
+model: qwen-coder-turbo-0919
+label:
+ en_US: qwen-coder-turbo-0919
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.002'
+ output: '0.006'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-coder-turbo-latest.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-coder-turbo-latest.yaml
new file mode 100644
index 00000000..0b03505c
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-coder-turbo-latest.yaml
@@ -0,0 +1,74 @@
+model: qwen-coder-turbo-latest
+label:
+ en_US: qwen-coder-turbo-latest
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.002'
+ output: '0.006'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-coder-turbo.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-coder-turbo.yaml
new file mode 100644
index 00000000..2a6c0408
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-coder-turbo.yaml
@@ -0,0 +1,74 @@
+model: qwen-coder-turbo
+label:
+ en_US: qwen-coder-turbo
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.002'
+ output: '0.006'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-long.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-long.yaml
new file mode 100644
index 00000000..bad7f4f4
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-long.yaml
@@ -0,0 +1,77 @@
+# model docs: https://help.aliyun.com/zh/model-studio/getting-started/models#27b2b3a15d5c6
+model: qwen-long
+label:
+ en_US: qwen-long
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 10000000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 2000
+ min: 1
+ max: 6000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.0005'
+ output: '0.002'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-math-plus-0816.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-math-plus-0816.yaml
new file mode 100644
index 00000000..c14aee1e
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-math-plus-0816.yaml
@@ -0,0 +1,74 @@
+model: qwen-math-plus-0816
+label:
+ en_US: qwen-math-plus-0816
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 3072
+ min: 1
+ max: 3072
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.004'
+ output: '0.012'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-math-plus-0919.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-math-plus-0919.yaml
new file mode 100644
index 00000000..9d74eeca
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-math-plus-0919.yaml
@@ -0,0 +1,74 @@
+model: qwen-math-plus-0919
+label:
+ en_US: qwen-math-plus-0919
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 3072
+ min: 1
+ max: 3072
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.004'
+ output: '0.012'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-math-plus-latest.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-math-plus-latest.yaml
new file mode 100644
index 00000000..b8601a96
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-math-plus-latest.yaml
@@ -0,0 +1,74 @@
+model: qwen-math-plus-latest
+label:
+ en_US: qwen-math-plus-latest
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 3072
+ min: 1
+ max: 3072
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.004'
+ output: '0.012'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-math-plus.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-math-plus.yaml
new file mode 100644
index 00000000..4a948be5
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-math-plus.yaml
@@ -0,0 +1,74 @@
+model: qwen-math-plus
+label:
+ en_US: qwen-math-plus
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 3072
+ min: 1
+ max: 3072
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.004'
+ output: '0.012'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-math-turbo-0919.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-math-turbo-0919.yaml
new file mode 100644
index 00000000..bffe324a
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-math-turbo-0919.yaml
@@ -0,0 +1,74 @@
+model: qwen-math-turbo-0919
+label:
+ en_US: qwen-math-turbo-0919
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 3072
+ min: 1
+ max: 3072
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.002'
+ output: '0.006'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-math-turbo-latest.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-math-turbo-latest.yaml
new file mode 100644
index 00000000..0747e966
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-math-turbo-latest.yaml
@@ -0,0 +1,74 @@
+model: qwen-math-turbo-latest
+label:
+ en_US: qwen-math-turbo-latest
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 3072
+ min: 1
+ max: 3072
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.002'
+ output: '0.006'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-math-turbo.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-math-turbo.yaml
new file mode 100644
index 00000000..dffb5557
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-math-turbo.yaml
@@ -0,0 +1,74 @@
+model: qwen-math-turbo
+label:
+ en_US: qwen-math-turbo
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 3072
+ min: 1
+ max: 3072
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.002'
+ output: '0.006'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-max-0107.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-max-0107.yaml
new file mode 100644
index 00000000..8ae159f1
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-max-0107.yaml
@@ -0,0 +1,76 @@
+model: qwen-max-0107
+label:
+ en_US: qwen-max-0107
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 8000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 2000
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.04'
+ output: '0.12'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-max-0403.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-max-0403.yaml
new file mode 100644
index 00000000..93fb3725
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-max-0403.yaml
@@ -0,0 +1,76 @@
+model: qwen-max-0403
+label:
+ en_US: qwen-max-0403
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 8000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 2000
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.04'
+ output: '0.12'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-max-0428.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-max-0428.yaml
new file mode 100644
index 00000000..a5c9d496
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-max-0428.yaml
@@ -0,0 +1,76 @@
+model: qwen-max-0428
+label:
+ en_US: qwen-max-0428
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 8000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 2000
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.04'
+ output: '0.12'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-max-0919.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-max-0919.yaml
new file mode 100644
index 00000000..e4a6dae6
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-max-0919.yaml
@@ -0,0 +1,76 @@
+model: qwen-max-0919
+label:
+ en_US: qwen-max-0919
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.02'
+ output: '0.06'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-max-1201.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-max-1201.yaml
new file mode 100644
index 00000000..6fae8a7d
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-max-1201.yaml
@@ -0,0 +1,76 @@
+model: qwen-max-1201
+label:
+ en_US: qwen-max-1201
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 2000
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.04'
+ output: '0.12'
+ unit: '0.001'
+ currency: RMB
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-max-latest.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-max-latest.yaml
new file mode 100644
index 00000000..8e209688
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-max-latest.yaml
@@ -0,0 +1,76 @@
+model: qwen-max-latest
+label:
+ en_US: qwen-max-latest
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.02'
+ output: '0.06'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-max-longcontext.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-max-longcontext.yaml
new file mode 100644
index 00000000..9bc50c73
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-max-longcontext.yaml
@@ -0,0 +1,76 @@
+model: qwen-max-longcontext
+label:
+ en_US: qwen-max-longcontext
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8000
+ min: 1
+ max: 8000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.04'
+ output: '0.12'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-max.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-max.yaml
new file mode 100644
index 00000000..c6a64dc5
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-max.yaml
@@ -0,0 +1,85 @@
+model: qwen-max
+label:
+ en_US: qwen-max
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 8000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 2000
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: enable_search
+ type: boolean
+ default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.02'
+ output: '0.06'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-plus-0206.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-plus-0206.yaml
new file mode 100644
index 00000000..43059930
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-plus-0206.yaml
@@ -0,0 +1,74 @@
+model: qwen-plus-0206
+label:
+ en_US: qwen-plus-0206
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: completion
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8000
+ min: 1
+ max: 8000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.004'
+ output: '0.012'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-plus-0624.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-plus-0624.yaml
new file mode 100644
index 00000000..906995d2
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-plus-0624.yaml
@@ -0,0 +1,74 @@
+model: qwen-plus-0624
+label:
+ en_US: qwen-plus-0624
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: completion
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8000
+ min: 1
+ max: 8000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.004'
+ output: '0.012'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-plus-0723.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-plus-0723.yaml
new file mode 100644
index 00000000..b33e725d
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-plus-0723.yaml
@@ -0,0 +1,74 @@
+model: qwen-plus-0723
+label:
+ en_US: qwen-plus-0723
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: completion
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8000
+ min: 1
+ max: 8000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.004'
+ output: '0.012'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-plus-0806.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-plus-0806.yaml
new file mode 100644
index 00000000..bb394fad
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-plus-0806.yaml
@@ -0,0 +1,74 @@
+model: qwen-plus-0806
+label:
+ en_US: qwen-plus-0806
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: completion
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.004'
+ output: '0.012'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-plus-0919.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-plus-0919.yaml
new file mode 100644
index 00000000..118e304a
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-plus-0919.yaml
@@ -0,0 +1,74 @@
+model: qwen-plus-0919
+label:
+ en_US: qwen-plus-0919
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: completion
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.0008'
+ output: '0.002'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-plus-chat.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-plus-chat.yaml
new file mode 100644
index 00000000..761312bc
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-plus-chat.yaml
@@ -0,0 +1,77 @@
+model: qwen-plus-chat
+label:
+ en_US: qwen-plus-chat
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 2000
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.004'
+ output: '0.012'
+ unit: '0.001'
+ currency: RMB
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-plus-latest.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-plus-latest.yaml
new file mode 100644
index 00000000..430872fb
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-plus-latest.yaml
@@ -0,0 +1,74 @@
+model: qwen-plus-latest
+label:
+ en_US: qwen-plus-latest
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.0008'
+ output: '0.002'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-plus.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-plus.yaml
new file mode 100644
index 00000000..f3fce302
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-plus.yaml
@@ -0,0 +1,85 @@
+model: qwen-plus
+label:
+ en_US: qwen-plus
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: enable_search
+ type: boolean
+ default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.0008'
+ output: '0.002'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-turbo-0206.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-turbo-0206.yaml
new file mode 100644
index 00000000..2628d824
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-turbo-0206.yaml
@@ -0,0 +1,74 @@
+model: qwen-turbo-0206
+label:
+ en_US: qwen-turbo-0206
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 2000
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.002'
+ output: '0.006'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-turbo-0624.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-turbo-0624.yaml
new file mode 100644
index 00000000..8097459b
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-turbo-0624.yaml
@@ -0,0 +1,74 @@
+model: qwen-turbo-0624
+label:
+ en_US: qwen-turbo-0624
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 2000
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.002'
+ output: '0.006'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-turbo-0919.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-turbo-0919.yaml
new file mode 100644
index 00000000..e43beeb1
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-turbo-0919.yaml
@@ -0,0 +1,74 @@
+model: qwen-turbo-0919
+label:
+ en_US: qwen-turbo-0919
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.0003'
+ output: '0.0006'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-turbo-chat.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-turbo-chat.yaml
new file mode 100644
index 00000000..c30cb7ca
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-turbo-chat.yaml
@@ -0,0 +1,77 @@
+model: qwen-turbo-chat
+label:
+ en_US: qwen-turbo-chat
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 1500
+ min: 1
+ max: 1500
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.002'
+ output: '0.006'
+ unit: '0.001'
+ currency: RMB
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-turbo-latest.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-turbo-latest.yaml
new file mode 100644
index 00000000..e443d688
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-turbo-latest.yaml
@@ -0,0 +1,74 @@
+model: qwen-turbo-latest
+label:
+ en_US: qwen-turbo-latest
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.0006'
+ output: '0.0003'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-turbo.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-turbo.yaml
new file mode 100644
index 00000000..33f05967
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-turbo.yaml
@@ -0,0 +1,85 @@
+model: qwen-turbo
+label:
+ en_US: qwen-turbo
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 8000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 2000
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: enable_search
+ type: boolean
+ default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.0006'
+ output: '0.0003'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-vl-max-0201.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-vl-max-0201.yaml
new file mode 100644
index 00000000..63b6074d
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-vl-max-0201.yaml
@@ -0,0 +1,48 @@
+model: qwen-vl-max-0201
+label:
+ en_US: qwen-vl-max-0201
+model_type: llm
+features:
+ - vision
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.02'
+ output: '0.02'
+ unit: '0.001'
+ currency: RMB
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-vl-max-0809.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-vl-max-0809.yaml
new file mode 100644
index 00000000..fd203770
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-vl-max-0809.yaml
@@ -0,0 +1,78 @@
+model: qwen-vl-max-0809
+label:
+ en_US: qwen-vl-max-0809
+model_type: llm
+features:
+ - vision
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: max_tokens
+ required: false
+ use_template: max_tokens
+ type: int
+ default: 2000
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: response_format
+ use_template: response_format
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.02'
+ output: '0.02'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-vl-max.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-vl-max.yaml
new file mode 100644
index 00000000..31a9fb51
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-vl-max.yaml
@@ -0,0 +1,78 @@
+model: qwen-vl-max
+label:
+ en_US: qwen-vl-max
+model_type: llm
+features:
+ - vision
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: max_tokens
+ required: false
+ use_template: max_tokens
+ type: int
+ default: 2000
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: response_format
+ use_template: response_format
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.02'
+ output: '0.02'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-vl-plus-0201.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-vl-plus-0201.yaml
new file mode 100644
index 00000000..5f90cf48
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-vl-plus-0201.yaml
@@ -0,0 +1,78 @@
+model: qwen-vl-plus-0201
+label:
+ en_US: qwen-vl-plus-0201
+model_type: llm
+features:
+ - vision
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: max_tokens
+ required: false
+ use_template: max_tokens
+ type: int
+ default: 2000
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: response_format
+ use_template: response_format
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.02'
+ output: '0.02'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-vl-plus-0809.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-vl-plus-0809.yaml
new file mode 100644
index 00000000..97820c0f
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-vl-plus-0809.yaml
@@ -0,0 +1,78 @@
+model: qwen-vl-plus-0809
+label:
+ en_US: qwen-vl-plus-0809
+model_type: llm
+features:
+ - vision
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: max_tokens
+ required: false
+ use_template: max_tokens
+ type: int
+ default: 2000
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: response_format
+ use_template: response_format
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.008'
+ output: '0.008'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-vl-plus.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-vl-plus.yaml
new file mode 100644
index 00000000..6af36cd6
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen-vl-plus.yaml
@@ -0,0 +1,78 @@
+model: qwen-vl-plus
+label:
+ en_US: qwen-vl-plus
+model_type: llm
+features:
+ - vision
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: max_tokens
+ required: false
+ use_template: max_tokens
+ type: int
+ default: 2000
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: response_format
+ use_template: response_format
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.008'
+ output: '0.008'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml
new file mode 100644
index 00000000..158e2c7e
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml
@@ -0,0 +1,74 @@
+model: qwen2-math-1.5b-instruct
+label:
+ en_US: qwen2-math-1.5b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 2000
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.004'
+ output: '0.012'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2-math-72b-instruct.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2-math-72b-instruct.yaml
new file mode 100644
index 00000000..e26a6923
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2-math-72b-instruct.yaml
@@ -0,0 +1,74 @@
+model: qwen2-math-72b-instruct
+label:
+ en_US: qwen2-math-72b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 2000
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.004'
+ output: '0.012'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2-math-7b-instruct.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2-math-7b-instruct.yaml
new file mode 100644
index 00000000..589119b2
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2-math-7b-instruct.yaml
@@ -0,0 +1,74 @@
+model: qwen2-math-7b-instruct
+label:
+ en_US: qwen2-math-7b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 2000
+ min: 1
+ max: 2000
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.004'
+ output: '0.012'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml
new file mode 100644
index 00000000..dd608fbf
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml
@@ -0,0 +1,74 @@
+model: qwen2.5-0.5b-instruct
+label:
+ en_US: qwen2.5-0.5b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.000'
+ output: '0.000'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml
new file mode 100644
index 00000000..08237b39
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml
@@ -0,0 +1,74 @@
+model: qwen2.5-1.5b-instruct
+label:
+ en_US: qwen2.5-1.5b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.000'
+ output: '0.000'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2.5-14b-instruct.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2.5-14b-instruct.yaml
new file mode 100644
index 00000000..640b0197
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2.5-14b-instruct.yaml
@@ -0,0 +1,74 @@
+model: qwen2.5-14b-instruct
+label:
+ en_US: qwen2.5-14b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.002'
+ output: '0.006'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2.5-32b-instruct.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2.5-32b-instruct.yaml
new file mode 100644
index 00000000..3a90ca75
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2.5-32b-instruct.yaml
@@ -0,0 +1,74 @@
+model: qwen2.5-32b-instruct
+label:
+ en_US: qwen2.5-32b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.0035'
+ output: '0.007'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2.5-3b-instruct.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2.5-3b-instruct.yaml
new file mode 100644
index 00000000..b79755eb
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2.5-3b-instruct.yaml
@@ -0,0 +1,74 @@
+model: qwen2.5-3b-instruct
+label:
+ en_US: qwen2.5-3b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.000'
+ output: '0.000'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2.5-72b-instruct.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2.5-72b-instruct.yaml
new file mode 100644
index 00000000..e9dd51a3
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2.5-72b-instruct.yaml
@@ -0,0 +1,74 @@
+model: qwen2.5-72b-instruct
+label:
+ en_US: qwen2.5-72b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.004'
+ output: '0.012'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2.5-7b-instruct.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2.5-7b-instruct.yaml
new file mode 100644
index 00000000..04f26cf5
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2.5-7b-instruct.yaml
@@ -0,0 +1,74 @@
+model: qwen2.5-7b-instruct
+label:
+ en_US: qwen2.5-7b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.001'
+ output: '0.002'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml
new file mode 100644
index 00000000..04f26cf5
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml
@@ -0,0 +1,74 @@
+model: qwen2.5-7b-instruct
+label:
+ en_US: qwen2.5-7b-instruct
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 8192
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。
+ en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.1
+ max: 0.9
+ help:
+ zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
+ en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+ - name: top_k
+ type: int
+ min: 0
+ max: 99
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ help:
+ zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。
+ en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+ - name: seed
+ required: false
+ type: int
+ default: 1234
+ label:
+ zh_Hans: 随机种子
+ en_US: Random seed
+ help:
+ zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。
+ en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.001'
+ output: '0.002'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/tongyi/tongyi.yaml b/ai-provider/model-runtime/model-providers/tongyi/tongyi.yaml
new file mode 100644
index 00000000..6b2f9192
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/tongyi/tongyi.yaml
@@ -0,0 +1,51 @@
+provider: tongyi
+label:
+ zh_Hans: 通义千问
+ en_US: TONGYI
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ zh_Hans: icon_l_zh.svg
+ en_US: icon_l_en.svg
+background: "#EFF1FE"
+help:
+ title:
+ en_US: Get your API key from AliCloud
+ zh_Hans: 从阿里云百炼获取 API Key
+ url:
+ en_US: https://bailian.console.aliyun.com/?apiKey=1#/api-key
+supported_model_types:
+ - llm
+ - tts
+ - text-embedding
+configurate_methods:
+ - predefined-model
+ - customizable-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: dashscope_api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
+model_credential_schema:
+ model:
+ label:
+ en_US: Model Name
+ zh_Hans: 模型名称
+ placeholder:
+ en_US: Enter full model name
+ zh_Hans: 输入模型全称
+ credential_form_schemas:
+ - variable: dashscope_api_key
+ required: true
+ label:
+ en_US: API Key
+ type: secret-input
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
+address: https://api.openai.com
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/upstage/assets/icon_l_en.svg b/ai-provider/model-runtime/model-providers/upstage/assets/icon_l_en.svg
new file mode 100644
index 00000000..0761f85b
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/upstage/assets/icon_l_en.svg
@@ -0,0 +1,14 @@
+
diff --git a/ai-provider/model-runtime/model-providers/upstage/assets/icon_s_en.svg b/ai-provider/model-runtime/model-providers/upstage/assets/icon_s_en.svg
new file mode 100644
index 00000000..44ef12b7
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/upstage/assets/icon_s_en.svg
@@ -0,0 +1,3 @@
+
diff --git a/ai-provider/model-runtime/model-providers/upstage/llm/solar-1-mini-chat.yaml b/ai-provider/model-runtime/model-providers/upstage/llm/solar-1-mini-chat.yaml
new file mode 100644
index 00000000..787ac83f
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/upstage/llm/solar-1-mini-chat.yaml
@@ -0,0 +1,43 @@
+model: solar-1-mini-chat
+label:
+ zh_Hans: solar-1-mini-chat
+ en_US: solar-1-mini-chat
+ ko_KR: solar-1-mini-chat
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 32768
+ - name: seed
+ label:
+ zh_Hans: 种子
+ en_US: Seed
+ type: int
+ help:
+ zh_Hans:
+ 如果指定,模型将尽最大努力进行确定性采样,使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性,您应该参考 system_fingerprint
+ 响应参数来监视变化。
+ en_US:
+ If specified, model will make a best effort to sample deterministically,
+ such that repeated requests with the same seed and parameters should return
+ the same result. Determinism is not guaranteed, and you should refer to the
+ system_fingerprint response parameter to monitor changes in the backend.
+ required: false
+pricing:
+ input: "0.5"
+ output: "0.5"
+ unit: "0.000001"
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/upstage/upstage.yaml b/ai-provider/model-runtime/model-providers/upstage/upstage.yaml
new file mode 100644
index 00000000..2c46c9d3
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/upstage/upstage.yaml
@@ -0,0 +1,50 @@
+provider: upstage
+label:
+ en_US: Upstage
+description:
+ en_US: Models provided by Upstage, such as Solar-1-mini-chat.
+ zh_Hans: Upstage 提供的模型,例如 Solar-1-mini-chat.
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ en_US: icon_l_en.svg
+background: "#FFFFF"
+help:
+ title:
+ en_US: Get your API Key from Upstage
+ zh_Hans: 从 Upstage 获取 API Key
+ url:
+ en_US: https://console.upstage.ai/api-keys
+supported_model_types:
+ - llm
+ - text-embedding
+configurate_methods:
+ - predefined-model
+model_credential_schema:
+ model:
+ label:
+ en_US: Model Name
+ zh_Hans: 模型名称
+ placeholder:
+ en_US: Enter your model name
+ zh_Hans: 输入模型名称
+ credential_form_schemas:
+ - variable: upstage_api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: upstage_api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
+address: https://api.openai.com
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/vertex_ai/assets/icon_l_en.svg b/ai-provider/model-runtime/model-providers/vertex_ai/assets/icon_l_en.svg
new file mode 100644
index 00000000..d7cbf326
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/vertex_ai/assets/icon_l_en.svg
@@ -0,0 +1,2 @@
+
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/vertex_ai/assets/icon_s_en.svg b/ai-provider/model-runtime/model-providers/vertex_ai/assets/icon_s_en.svg
new file mode 100644
index 00000000..efc3589c
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/vertex_ai/assets/icon_s_en.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/vertex_ai/llm/anthropic.claude-3-haiku.yaml b/ai-provider/model-runtime/model-providers/vertex_ai/llm/anthropic.claude-3-haiku.yaml
new file mode 100644
index 00000000..56133486
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/vertex_ai/llm/anthropic.claude-3-haiku.yaml
@@ -0,0 +1,56 @@
+model: claude-3-haiku@20240307
+label:
+ en_US: Claude 3 Haiku
+model_type: llm
+features:
+ - agent-thought
+ - vision
+model_properties:
+ mode: chat
+ context_size: 200000
+parameter_rules:
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ type: int
+ default: 4096
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+ en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+ # docs: https://docs.anthropic.com/claude/docs/system-prompts
+ - name: temperature
+ use_template: temperature
+ required: false
+ type: float
+ default: 1
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 生成内容的随机性。
+ en_US: The amount of randomness injected into the response.
+ - name: top_p
+ required: false
+ type: float
+ default: 0.999
+ min: 0.000
+ max: 1.000
+ help:
+ zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
+ en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+ - name: top_k
+ required: false
+ type: int
+ default: 0
+ min: 0
+ # tip docs from aws has error, max value is 500
+ max: 500
+ help:
+ zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+ en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+ input: '0.00025'
+ output: '0.00125'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/vertex_ai/llm/anthropic.claude-3-opus.yaml b/ai-provider/model-runtime/model-providers/vertex_ai/llm/anthropic.claude-3-opus.yaml
new file mode 100644
index 00000000..ab084636
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/vertex_ai/llm/anthropic.claude-3-opus.yaml
@@ -0,0 +1,56 @@
+model: claude-3-opus@20240229
+label:
+ en_US: Claude 3 Opus
+model_type: llm
+features:
+ - agent-thought
+ - vision
+model_properties:
+ mode: chat
+ context_size: 200000
+parameter_rules:
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ type: int
+ default: 4096
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+ en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+ # docs: https://docs.anthropic.com/claude/docs/system-prompts
+ - name: temperature
+ use_template: temperature
+ required: false
+ type: float
+ default: 1
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 生成内容的随机性。
+ en_US: The amount of randomness injected into the response.
+ - name: top_p
+ required: false
+ type: float
+ default: 0.999
+ min: 0.000
+ max: 1.000
+ help:
+ zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
+ en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+ - name: top_k
+ required: false
+ type: int
+ default: 0
+ min: 0
+ # tip docs from aws has error, max value is 500
+ max: 500
+ help:
+ zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+ en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+ input: '0.015'
+ output: '0.075'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/vertex_ai/llm/anthropic.claude-3-sonnet.yaml b/ai-provider/model-runtime/model-providers/vertex_ai/llm/anthropic.claude-3-sonnet.yaml
new file mode 100644
index 00000000..0be0113f
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/vertex_ai/llm/anthropic.claude-3-sonnet.yaml
@@ -0,0 +1,55 @@
+model: claude-3-sonnet@20240229
+label:
+ en_US: Claude 3 Sonnet
+model_type: llm
+features:
+ - agent-thought
+ - vision
+model_properties:
+ mode: chat
+ context_size: 200000
+parameter_rules:
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ type: int
+ default: 4096
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+ en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+ - name: temperature
+ use_template: temperature
+ required: false
+ type: float
+ default: 1
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 生成内容的随机性。
+ en_US: The amount of randomness injected into the response.
+ - name: top_p
+ required: false
+ type: float
+ default: 0.999
+ min: 0.000
+ max: 1.000
+ help:
+ zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
+ en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+ - name: top_k
+ required: false
+ type: int
+ default: 0
+ min: 0
+ # tip docs from aws has error, max value is 500
+ max: 500
+ help:
+ zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+ en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+ input: '0.003'
+ output: '0.015'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/vertex_ai/llm/anthropic.claude-3.5-sonnet.yaml b/ai-provider/model-runtime/model-providers/vertex_ai/llm/anthropic.claude-3.5-sonnet.yaml
new file mode 100644
index 00000000..c64384e6
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/vertex_ai/llm/anthropic.claude-3.5-sonnet.yaml
@@ -0,0 +1,55 @@
+model: claude-3-5-sonnet@20240620
+label:
+ en_US: Claude 3.5 Sonnet
+model_type: llm
+features:
+ - agent-thought
+ - vision
+model_properties:
+ mode: chat
+ context_size: 200000
+parameter_rules:
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ type: int
+ default: 4096
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+ en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+ - name: temperature
+ use_template: temperature
+ required: false
+ type: float
+ default: 1
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 生成内容的随机性。
+ en_US: The amount of randomness injected into the response.
+ - name: top_p
+ required: false
+ type: float
+ default: 0.999
+ min: 0.000
+ max: 1.000
+ help:
+ zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
+ en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+ - name: top_k
+ required: false
+ type: int
+ default: 0
+ min: 0
+ # tip docs from aws has error, max value is 500
+ max: 500
+ help:
+ zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+ en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+ input: '0.003'
+ output: '0.015'
+ unit: '0.001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/vertex_ai/llm/gemini-1.0-pro-vision.yaml b/ai-provider/model-runtime/model-providers/vertex_ai/llm/gemini-1.0-pro-vision.yaml
new file mode 100644
index 00000000..ebb276b8
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/vertex_ai/llm/gemini-1.0-pro-vision.yaml
@@ -0,0 +1,37 @@
+model: gemini-1.0-pro-vision-001
+label:
+ en_US: Gemini 1.0 Pro Vision
+model_type: llm
+features:
+ - agent-thought
+ - vision
+model_properties:
+ mode: chat
+ context_size: 16384
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ en_US: Top k
+ type: int
+ help:
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_output_tokens
+ use_template: max_tokens
+ required: true
+ default: 2048
+ min: 1
+ max: 2048
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/vertex_ai/llm/gemini-1.0-pro.yaml b/ai-provider/model-runtime/model-providers/vertex_ai/llm/gemini-1.0-pro.yaml
new file mode 100644
index 00000000..c3259738
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/vertex_ai/llm/gemini-1.0-pro.yaml
@@ -0,0 +1,36 @@
+model: gemini-1.0-pro-002
+label:
+ en_US: Gemini 1.0 Pro
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32760
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ en_US: Top k
+ type: int
+ help:
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_output_tokens
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/vertex_ai/llm/gemini-1.5-flash-001.yaml b/ai-provider/model-runtime/model-providers/vertex_ai/llm/gemini-1.5-flash-001.yaml
new file mode 100644
index 00000000..f5386be0
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/vertex_ai/llm/gemini-1.5-flash-001.yaml
@@ -0,0 +1,37 @@
+model: gemini-1.5-flash-001
+label:
+ en_US: Gemini 1.5 Flash 001
+model_type: llm
+features:
+ - agent-thought
+ - vision
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ en_US: Top k
+ type: int
+ help:
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_output_tokens
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/vertex_ai/llm/gemini-1.5-flash-002.yaml b/ai-provider/model-runtime/model-providers/vertex_ai/llm/gemini-1.5-flash-002.yaml
new file mode 100644
index 00000000..97bd44f0
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/vertex_ai/llm/gemini-1.5-flash-002.yaml
@@ -0,0 +1,37 @@
+model: gemini-1.5-flash-002
+label:
+ en_US: Gemini 1.5 Flash 002
+model_type: llm
+features:
+ - agent-thought
+ - vision
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ en_US: Top k
+ type: int
+ help:
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_output_tokens
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/vertex_ai/llm/gemini-1.5-pro-001.yaml b/ai-provider/model-runtime/model-providers/vertex_ai/llm/gemini-1.5-pro-001.yaml
new file mode 100644
index 00000000..5e08f229
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/vertex_ai/llm/gemini-1.5-pro-001.yaml
@@ -0,0 +1,37 @@
+model: gemini-1.5-pro-001
+label:
+ en_US: Gemini 1.5 Pro 001
+model_type: llm
+features:
+ - agent-thought
+ - vision
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ en_US: Top k
+ type: int
+ help:
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_output_tokens
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/vertex_ai/llm/gemini-1.5-pro-002.yaml b/ai-provider/model-runtime/model-providers/vertex_ai/llm/gemini-1.5-pro-002.yaml
new file mode 100644
index 00000000..8f327ea2
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/vertex_ai/llm/gemini-1.5-pro-002.yaml
@@ -0,0 +1,37 @@
+model: gemini-1.5-pro-002
+label:
+ en_US: Gemini 1.5 Pro 002
+model_type: llm
+features:
+ - agent-thought
+ - vision
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ en_US: Top k
+ type: int
+ help:
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_output_tokens
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/vertex_ai/llm/gemini-flash-experimental.yaml b/ai-provider/model-runtime/model-providers/vertex_ai/llm/gemini-flash-experimental.yaml
new file mode 100644
index 00000000..0f5eb34c
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/vertex_ai/llm/gemini-flash-experimental.yaml
@@ -0,0 +1,37 @@
+model: gemini-flash-experimental
+label:
+ en_US: Gemini Flash Experimental
+model_type: llm
+features:
+ - agent-thought
+ - vision
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ en_US: Top k
+ type: int
+ help:
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_output_tokens
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/vertex_ai/llm/gemini-pro-experimental.yaml b/ai-provider/model-runtime/model-providers/vertex_ai/llm/gemini-pro-experimental.yaml
new file mode 100644
index 00000000..fa31cabb
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/vertex_ai/llm/gemini-pro-experimental.yaml
@@ -0,0 +1,37 @@
+model: gemini-pro-experimental
+label:
+ en_US: Gemini Pro Experimental
+model_type: llm
+features:
+ - agent-thought
+ - vision
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ en_US: Top k
+ type: int
+ help:
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_output_tokens
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/ai-provider/model-runtime/model-providers/vertex_ai/vertex_ai.yaml b/ai-provider/model-runtime/model-providers/vertex_ai/vertex_ai.yaml
new file mode 100644
index 00000000..43945e5f
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/vertex_ai/vertex_ai.yaml
@@ -0,0 +1,44 @@
+provider: vertex_ai
+label:
+ en_US: Vertex AI | Google Cloud Platform
+description:
+ en_US: Vertex AI in Google Cloud Platform.
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ en_US: icon_l_en.svg
+background: "#FCFDFF"
+help:
+ title:
+ en_US: Get your Access Details from Google
+ url:
+ en_US: https://cloud.google.com/vertex-ai/
+supported_model_types:
+ - llm
+ - text-embedding
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: vertex_project_id
+ label:
+ en_US: Project ID
+ type: text-input
+ required: true
+ placeholder:
+ en_US: Enter your Google Cloud Project ID
+ - variable: vertex_location
+ label:
+ en_US: Location
+ type: text-input
+ required: true
+ placeholder:
+ en_US: Enter your Google Cloud Location
+ - variable: vertex_service_account_key
+ label:
+ en_US: Service Account Key (Leave blank if you use Application Default Credentials)
+ type: secret-input
+ required: false
+ placeholder:
+ en_US: Enter your Google Cloud Service Account Key in base64 format
+address: https://api.openai.com
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/wenxin/assets/icon_l_en.png b/ai-provider/model-runtime/model-providers/wenxin/assets/icon_l_en.png
new file mode 100644
index 00000000..fb50487c
Binary files /dev/null and b/ai-provider/model-runtime/model-providers/wenxin/assets/icon_l_en.png differ
diff --git a/ai-provider/model-runtime/model-providers/wenxin/assets/icon_l_en.svg b/ai-provider/model-runtime/model-providers/wenxin/assets/icon_l_en.svg
new file mode 100644
index 00000000..f1f0abe3
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/wenxin/assets/icon_l_en.svg
@@ -0,0 +1,2 @@
+
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/wenxin/assets/icon_l_zh.png b/ai-provider/model-runtime/model-providers/wenxin/assets/icon_l_zh.png
new file mode 100644
index 00000000..669d3c7a
Binary files /dev/null and b/ai-provider/model-runtime/model-providers/wenxin/assets/icon_l_zh.png differ
diff --git a/ai-provider/model-runtime/model-providers/wenxin/assets/icon_l_zh.svg b/ai-provider/model-runtime/model-providers/wenxin/assets/icon_l_zh.svg
new file mode 100644
index 00000000..52df2f28
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/wenxin/assets/icon_l_zh.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/wenxin/assets/icon_s_en.png b/ai-provider/model-runtime/model-providers/wenxin/assets/icon_s_en.png
new file mode 100644
index 00000000..92391995
Binary files /dev/null and b/ai-provider/model-runtime/model-providers/wenxin/assets/icon_s_en.png differ
diff --git a/ai-provider/model-runtime/model-providers/wenxin/assets/icon_s_en.svg b/ai-provider/model-runtime/model-providers/wenxin/assets/icon_s_en.svg
new file mode 100644
index 00000000..2f1fc405
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/wenxin/assets/icon_s_en.svg
@@ -0,0 +1,2 @@
+
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-3.5-128k.yaml b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-3.5-128k.yaml
new file mode 100644
index 00000000..b1b1ba1f
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-3.5-128k.yaml
@@ -0,0 +1,37 @@
+model: ernie-3.5-128k
+label:
+ en_US: Ernie-3.5-128K
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0.1
+ max: 1.0
+ default: 0.8
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 4096
+ min: 2
+ max: 4096
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: response_format
+ use_template: response_format
+ - name: disable_search
+ label:
+ zh_Hans: 禁用搜索
+ en_US: Disable Search
+ type: boolean
+ help:
+ zh_Hans: 禁用模型自行进行外部搜索。
+ en_US: Disable the model to perform external search.
+ required: false
diff --git a/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-3.5-4k-0205.yaml b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-3.5-4k-0205.yaml
new file mode 100644
index 00000000..1e8cf964
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-3.5-4k-0205.yaml
@@ -0,0 +1,38 @@
+model: ernie-3.5-4k-0205
+label:
+ en_US: Ernie-3.5-4k-0205
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0.1
+ max: 1.0
+ default: 0.8
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 2
+ max: 2048
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: response_format
+ use_template: response_format
+ - name: disable_search
+ label:
+ zh_Hans: 禁用搜索
+ en_US: Disable Search
+ type: boolean
+ help:
+ zh_Hans: 禁用模型自行进行外部搜索。
+ en_US: Disable the model to perform external search.
+ required: false
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-3.5-8k-0205.yaml b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-3.5-8k-0205.yaml
new file mode 100644
index 00000000..b308abcb
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-3.5-8k-0205.yaml
@@ -0,0 +1,38 @@
+model: ernie-3.5-8k-0205
+label:
+ en_US: Ernie-3.5-8K-0205
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0.1
+ max: 1.0
+ default: 0.8
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 2
+ max: 2048
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: response_format
+ use_template: response_format
+ - name: disable_search
+ label:
+ zh_Hans: 禁用搜索
+ en_US: Disable Search
+ type: boolean
+ help:
+ zh_Hans: 禁用模型自行进行外部搜索。
+ en_US: Disable the model to perform external search.
+ required: false
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-3.5-8k-1222.yaml b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-3.5-8k-1222.yaml
new file mode 100644
index 00000000..c43588cf
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-3.5-8k-1222.yaml
@@ -0,0 +1,38 @@
+model: ernie-3.5-8k-1222
+label:
+ en_US: Ernie-3.5-8K-1222
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0.1
+ max: 1.0
+ default: 0.8
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 2
+ max: 2048
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: response_format
+ use_template: response_format
+ - name: disable_search
+ label:
+ zh_Hans: 禁用搜索
+ en_US: Disable Search
+ type: boolean
+ help:
+ zh_Hans: 禁用模型自行进行外部搜索。
+ en_US: Disable the model to perform external search.
+ required: false
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-3.5-8k.yaml b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-3.5-8k.yaml
new file mode 100644
index 00000000..145844a4
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-3.5-8k.yaml
@@ -0,0 +1,40 @@
+model: ernie-3.5-8k
+label:
+ en_US: Ernie-3.5-8K
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0.1
+ max: 1.0
+ default: 0.8
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 2
+ max: 2048
+ - name: presence_penalty
+ use_template: presence_penalty
+ default: 1.0
+ min: 1.0
+ max: 2.0
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: response_format
+ use_template: response_format
+ - name: disable_search
+ label:
+ zh_Hans: 禁用搜索
+ en_US: Disable Search
+ type: boolean
+ help:
+ zh_Hans: 禁用模型自行进行外部搜索。
+ en_US: Disable the model to perform external search.
+ required: false
diff --git a/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-4.0-8k-latest.yaml b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-4.0-8k-latest.yaml
new file mode 100644
index 00000000..d23ae0dc
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-4.0-8k-latest.yaml
@@ -0,0 +1,40 @@
+model: ernie-4.0-8k-latest
+label:
+ en_US: Ernie-4.0-8K-Latest
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0.1
+ max: 1.0
+ default: 0.8
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 2
+ max: 2048
+ - name: presence_penalty
+ use_template: presence_penalty
+ default: 1.0
+ min: 1.0
+ max: 2.0
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: response_format
+ use_template: response_format
+ - name: disable_search
+ label:
+ zh_Hans: 禁用搜索
+ en_US: Disable Search
+ type: boolean
+ help:
+ zh_Hans: 禁用模型自行进行外部搜索。
+ en_US: Disable the model to perform external search.
+ required: false
diff --git a/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-4.0-8k.yaml b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-4.0-8k.yaml
new file mode 100644
index 00000000..9ebb5c8c
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-4.0-8k.yaml
@@ -0,0 +1,40 @@
+model: ernie-4.0-8k
+label:
+ en_US: Ernie-4.0-8K
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0.1
+ max: 1.0
+ default: 0.8
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 2
+ max: 2048
+ - name: presence_penalty
+ use_template: presence_penalty
+ default: 1.0
+ min: 1.0
+ max: 2.0
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: response_format
+ use_template: response_format
+ - name: disable_search
+ label:
+ zh_Hans: 禁用搜索
+ en_US: Disable Search
+ type: boolean
+ help:
+ zh_Hans: 禁用模型自行进行外部搜索。
+ en_US: Disable the model to perform external search.
+ required: false
diff --git a/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-4.0-turbo-8k-preview.yaml b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-4.0-turbo-8k-preview.yaml
new file mode 100644
index 00000000..16df5402
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-4.0-turbo-8k-preview.yaml
@@ -0,0 +1,40 @@
+model: ernie-4.0-turbo-8k-preview
+label:
+ en_US: Ernie-4.0-turbo-8k-preview
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0.1
+ max: 1.0
+ default: 0.8
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 2
+ max: 2048
+ - name: presence_penalty
+ use_template: presence_penalty
+ default: 1.0
+ min: 1.0
+ max: 2.0
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: response_format
+ use_template: response_format
+ - name: disable_search
+ label:
+ zh_Hans: 禁用搜索
+ en_US: Disable Search
+ type: boolean
+ help:
+ zh_Hans: 禁用模型自行进行外部搜索。
+ en_US: Disable the model to perform external search.
+ required: false
diff --git a/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-4.0-turbo-8k.yaml b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-4.0-turbo-8k.yaml
new file mode 100644
index 00000000..2887a510
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-4.0-turbo-8k.yaml
@@ -0,0 +1,40 @@
+model: ernie-4.0-turbo-8k
+label:
+ en_US: Ernie-4.0-turbo-8K
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0.1
+ max: 1.0
+ default: 0.8
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 2
+ max: 2048
+ - name: presence_penalty
+ use_template: presence_penalty
+ default: 1.0
+ min: 1.0
+ max: 2.0
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: response_format
+ use_template: response_format
+ - name: disable_search
+ label:
+ zh_Hans: 禁用搜索
+ en_US: Disable Search
+ type: boolean
+ help:
+ zh_Hans: 禁用模型自行进行外部搜索。
+ en_US: Disable the model to perform external search.
+ required: false
diff --git a/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-bot-4.yaml b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-bot-4.yaml
new file mode 100644
index 00000000..f352787a
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-bot-4.yaml
@@ -0,0 +1,39 @@
+model: ernie-bot-4
+label:
+ en_US: Ernie Bot 4
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4800
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0.1
+ max: 1.0
+ default: 0.8
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 256
+ min: 1
+ max: 4800
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: response_format
+ use_template: response_format
+ - name: disable_search
+ label:
+ zh_Hans: 禁用搜索
+ en_US: Disable Search
+ type: boolean
+ help:
+ zh_Hans: 禁用模型自行进行外部搜索。
+ en_US: Disable the model to perform external search.
+ required: false
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-bot-8k.yaml b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-bot-8k.yaml
new file mode 100644
index 00000000..fa4b7dd8
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-bot-8k.yaml
@@ -0,0 +1,39 @@
+model: ernie-bot-8k
+label:
+ en_US: Ernie Bot 8k
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0.1
+ max: 1.0
+ default: 0.8
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 1024
+ min: 1
+ max: 8000
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: response_format
+ use_template: response_format
+ - name: disable_search
+ label:
+ zh_Hans: 禁用搜索
+ en_US: Disable Search
+ type: boolean
+ help:
+ zh_Hans: 禁用模型自行进行外部搜索。
+ en_US: Disable the model to perform external search.
+ required: false
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-bot-turbo.yaml b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-bot-turbo.yaml
new file mode 100644
index 00000000..c94aa2db
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-bot-turbo.yaml
@@ -0,0 +1,30 @@
+model: ernie-bot-turbo
+label:
+ en_US: Ernie Bot Turbo
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 11200
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0.1
+ max: 1.0
+ default: 0.8
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 1024
+ min: 1
+ max: 11200
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: response_format
+ use_template: response_format
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-bot.yaml b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-bot.yaml
new file mode 100644
index 00000000..13985b74
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-bot.yaml
@@ -0,0 +1,39 @@
+model: ernie-bot
+label:
+ en_US: Ernie Bot
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4800
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0.1
+ max: 1.0
+ default: 0.8
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ required: true
+ default: 256
+ min: 1
+ max: 4800
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: disable_search
+ label:
+ zh_Hans: 禁用搜索
+ en_US: Disable Search
+ type: boolean
+ help:
+ zh_Hans: 禁用模型自行进行外部搜索。
+ en_US: Disable the model to perform external search.
+ required: false
+ - name: response_format
+ use_template: response_format
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-character-8k-0321.yaml b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-character-8k-0321.yaml
new file mode 100644
index 00000000..74451ff9
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-character-8k-0321.yaml
@@ -0,0 +1,31 @@
+model: ernie-character-8k-0321
+label:
+ en_US: ERNIE-Character-8K-0321
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0.1
+ max: 1.0
+ default: 0.95
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1.0
+ default: 0.7
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 2
+ max: 1024
+ - name: presence_penalty
+ use_template: presence_penalty
+ default: 1.0
+ min: 1.0
+ max: 2.0
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-character-8k.yaml b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-character-8k.yaml
new file mode 100644
index 00000000..4b11b3e8
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-character-8k.yaml
@@ -0,0 +1,30 @@
+model: ernie-character-8k-0321
+label:
+ en_US: ERNIE-Character-8K
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0.1
+ max: 1.0
+ default: 0.95
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1.0
+ default: 0.7
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 2
+ max: 1024
+ - name: presence_penalty
+ use_template: presence_penalty
+ default: 1.0
+ min: 1.0
+ max: 2.0
diff --git a/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-lite-8k-0308.yaml b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-lite-8k-0308.yaml
new file mode 100644
index 00000000..97ecb03f
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-lite-8k-0308.yaml
@@ -0,0 +1,31 @@
+model: ernie-lite-8k-0308
+label:
+ en_US: ERNIE-Lite-8K-0308
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0.1
+ max: 1.0
+ default: 0.95
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1.0
+ default: 0.7
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 2
+ max: 2048
+ - name: presence_penalty
+ use_template: presence_penalty
+ default: 1.0
+ min: 1.0
+ max: 2.0
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-lite-8k-0922.yaml b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-lite-8k-0922.yaml
new file mode 100644
index 00000000..7410ce51
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-lite-8k-0922.yaml
@@ -0,0 +1,31 @@
+model: ernie-lite-8k-0922
+label:
+ en_US: ERNIE-Lite-8K-0922
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0.1
+ max: 1.0
+ default: 0.95
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1.0
+ default: 0.7
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 2
+ max: 1024
+ - name: presence_penalty
+ use_template: presence_penalty
+ default: 1.0
+ min: 1.0
+ max: 2.0
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-speed-128k.yaml b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-speed-128k.yaml
new file mode 100644
index 00000000..33163962
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-speed-128k.yaml
@@ -0,0 +1,30 @@
+model: ernie-speed-128k
+label:
+ en_US: ERNIE-Speed-128K
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 128000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0.1
+ max: 1.0
+ default: 0.95
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1.0
+ default: 0.7
+ - name: max_tokens
+ use_template: max_tokens
+ default: 4096
+ min: 2
+ max: 4096
+ - name: presence_penalty
+ use_template: presence_penalty
+ default: 1.0
+ min: 1.0
+ max: 2.0
diff --git a/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-speed-8k.yaml b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-speed-8k.yaml
new file mode 100644
index 00000000..304c6d1f
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-speed-8k.yaml
@@ -0,0 +1,30 @@
+model: ernie-speed-8k
+label:
+ en_US: ERNIE-Speed-8K
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0.1
+ max: 1.0
+ default: 0.95
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1.0
+ default: 0.7
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 2
+ max: 2048
+ - name: presence_penalty
+ use_template: presence_penalty
+ default: 1.0
+ min: 1.0
+ max: 2.0
diff --git a/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-speed-appbuilder.yaml b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-speed-appbuilder.yaml
new file mode 100644
index 00000000..c254ae02
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/wenxin/llm/ernie-speed-appbuilder.yaml
@@ -0,0 +1,25 @@
+model: ernie-speed-appbuilder
+label:
+ en_US: ERNIE-Speed-AppBuilder
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0.1
+ max: 1.0
+ default: 0.95
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1.0
+ default: 0.7
+ - name: presence_penalty
+ use_template: presence_penalty
+ default: 1.0
+ min: 1.0
+ max: 2.0
diff --git a/ai-provider/model-runtime/model-providers/wenxin/llm/yi_34b_chat.yaml b/ai-provider/model-runtime/model-providers/wenxin/llm/yi_34b_chat.yaml
new file mode 100644
index 00000000..0b247fbd
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/wenxin/llm/yi_34b_chat.yaml
@@ -0,0 +1,30 @@
+model: yi_34b_chat
+label:
+ en_US: yi_34b_chat
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0.1
+ max: 1.0
+ default: 0.95
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1.0
+ default: 0.7
+ - name: max_tokens
+ use_template: max_tokens
+ default: 4096
+ min: 2
+ max: 4096
+ - name: presence_penalty
+ use_template: presence_penalty
+ default: 1.0
+ min: 1.0
+ max: 2.0
diff --git a/ai-provider/model-runtime/model-providers/wenxin/wenxin.yaml b/ai-provider/model-runtime/model-providers/wenxin/wenxin.yaml
new file mode 100644
index 00000000..bfcf023b
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/wenxin/wenxin.yaml
@@ -0,0 +1,41 @@
+provider: wenxin
+label:
+ en_US: WenXin
+ zh_Hans: 文心一言
+icon_small:
+ en_US: icon_s_en.svg
+ zh_Hans: icon_s_en.svg
+icon_large:
+ en_US: icon_l_en.svg
+ zh_Hans: icon_l_zh.svg
+background: "#E8F5FE"
+help:
+ title:
+ en_US: Get your API Key from WenXin
+ zh_Hans: 从文心一言获取您的 API Key
+ url:
+ en_US: https://cloud.baidu.com/wenxin.html
+supported_model_types:
+ - llm
+ - text-embedding
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
+ - variable: secret_key
+ label:
+ en_US: Secret Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 Secret Key
+ en_US: Enter your Secret Key
+address: https://api.openai.com
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/yi/assets/icon_l_en.svg b/ai-provider/model-runtime/model-providers/yi/assets/icon_l_en.svg
new file mode 100644
index 00000000..9ce3badd
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/yi/assets/icon_l_en.svg
@@ -0,0 +1,12 @@
+
+
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/yi/assets/icon_s_en.svg b/ai-provider/model-runtime/model-providers/yi/assets/icon_s_en.svg
new file mode 100644
index 00000000..eb0395a2
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/yi/assets/icon_s_en.svg
@@ -0,0 +1,8 @@
+
+
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/yi/llm/yi-34b-chat-0205.yaml b/ai-provider/model-runtime/model-providers/yi/llm/yi-34b-chat-0205.yaml
new file mode 100644
index 00000000..ea3d8f5d
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/yi/llm/yi-34b-chat-0205.yaml
@@ -0,0 +1,43 @@
+model: yi-34b-chat-0205
+label:
+ zh_Hans: yi-34b-chat-0205
+ en_US: yi-34b-chat-0205
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 控制生成结果的多样性和随机性。数值越小,越严谨;数值越大,越发散。
+ en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 4000
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.01
+ max: 1.00
+ help:
+ zh_Hans: 控制生成结果的随机性。数值越小,随机性越弱;数值越大,随机性越强。一般而言,top_p 和 temperature 两个参数选择一个进行调整即可。
+ en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
+pricing:
+ input: '2.5'
+ output: '2.5'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/yi/llm/yi-34b-chat-200k.yaml b/ai-provider/model-runtime/model-providers/yi/llm/yi-34b-chat-200k.yaml
new file mode 100644
index 00000000..d91f984d
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/yi/llm/yi-34b-chat-200k.yaml
@@ -0,0 +1,43 @@
+model: yi-34b-chat-200k
+label:
+ zh_Hans: yi-34b-chat-200k
+ en_US: yi-34b-chat-200k
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 200000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.6
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 控制生成结果的多样性和随机性。数值越小,越严谨;数值越大,越发散。
+ en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 4096
+ min: 1
+ max: 199950
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.9
+ min: 0.01
+ max: 1.00
+ help:
+ zh_Hans: 控制生成结果的随机性。数值越小,随机性越弱;数值越大,随机性越强。一般而言,top_p 和 temperature 两个参数选择一个进行调整即可。
+ en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
+pricing:
+ input: '12'
+ output: '12'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/yi/llm/yi-large-turbo.yaml b/ai-provider/model-runtime/model-providers/yi/llm/yi-large-turbo.yaml
new file mode 100644
index 00000000..1d00eca2
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/yi/llm/yi-large-turbo.yaml
@@ -0,0 +1,43 @@
+model: yi-large-turbo
+label:
+ zh_Hans: yi-large-turbo
+ en_US: yi-large-turbo
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 16384
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 控制生成结果的多样性和随机性。数值越小,越严谨;数值越大,越发散。
+ en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 1024
+ min: 1
+ max: 16384
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.9
+ min: 0.01
+ max: 1.00
+ help:
+ zh_Hans: 控制生成结果的随机性。数值越小,随机性越弱;数值越大,随机性越强。一般而言,top_p 和 temperature 两个参数选择一个进行调整即可。
+ en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
+pricing:
+ input: '12'
+ output: '12'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/yi/llm/yi-large.yaml b/ai-provider/model-runtime/model-providers/yi/llm/yi-large.yaml
new file mode 100644
index 00000000..347f5112
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/yi/llm/yi-large.yaml
@@ -0,0 +1,43 @@
+model: yi-large
+label:
+ zh_Hans: yi-large
+ en_US: yi-large
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 16384
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 控制生成结果的多样性和随机性。数值越小,越严谨;数值越大,越发散。
+ en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 1024
+ min: 1
+ max: 16384
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.9
+ min: 0.01
+ max: 1.00
+ help:
+ zh_Hans: 控制生成结果的随机性。数值越小,随机性越弱;数值越大,随机性越强。一般而言,top_p 和 temperature 两个参数选择一个进行调整即可。
+ en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
+pricing:
+ input: '20'
+ output: '20'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/yi/llm/yi-medium-200k.yaml b/ai-provider/model-runtime/model-providers/yi/llm/yi-medium-200k.yaml
new file mode 100644
index 00000000..e8ddbcba
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/yi/llm/yi-medium-200k.yaml
@@ -0,0 +1,43 @@
+model: yi-medium-200k
+label:
+ zh_Hans: yi-medium-200k
+ en_US: yi-medium-200k
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 204800
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 控制生成结果的多样性和随机性。数值越小,越严谨;数值越大,越发散。
+ en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 1024
+ min: 1
+ max: 204800
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.9
+ min: 0.01
+ max: 1.00
+ help:
+ zh_Hans: 控制生成结果的随机性。数值越小,随机性越弱;数值越大,随机性越强。一般而言,top_p 和 temperature 两个参数选择一个进行调整即可。
+ en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
+pricing:
+ input: '12'
+ output: '12'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/yi/llm/yi-medium.yaml b/ai-provider/model-runtime/model-providers/yi/llm/yi-medium.yaml
new file mode 100644
index 00000000..4f0244d1
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/yi/llm/yi-medium.yaml
@@ -0,0 +1,43 @@
+model: yi-medium
+label:
+ zh_Hans: yi-medium
+ en_US: yi-medium
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 16384
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 控制生成结果的多样性和随机性。数值越小,越严谨;数值越大,越发散。
+ en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 1024
+ min: 1
+ max: 16384
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.9
+ min: 0.01
+ max: 1.00
+ help:
+ zh_Hans: 控制生成结果的随机性。数值越小,随机性越弱;数值越大,随机性越强。一般而言,top_p 和 temperature 两个参数选择一个进行调整即可。
+ en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
+pricing:
+ input: '2.5'
+ output: '2.5'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/yi/llm/yi-spark.yaml b/ai-provider/model-runtime/model-providers/yi/llm/yi-spark.yaml
new file mode 100644
index 00000000..e28e9fd8
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/yi/llm/yi-spark.yaml
@@ -0,0 +1,43 @@
+model: yi-spark
+label:
+ zh_Hans: yi-spark
+ en_US: yi-spark
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 16384
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 控制生成结果的多样性和随机性。数值越小,越严谨;数值越大,越发散。
+ en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 1024
+ min: 1
+ max: 16384
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.9
+ min: 0.01
+ max: 1.00
+ help:
+ zh_Hans: 控制生成结果的随机性。数值越小,随机性越弱;数值越大,随机性越强。一般而言,top_p 和 temperature 两个参数选择一个进行调整即可。
+ en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
+pricing:
+ input: '1'
+ output: '1'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/yi/llm/yi-vision.yaml b/ai-provider/model-runtime/model-providers/yi/llm/yi-vision.yaml
new file mode 100644
index 00000000..bce34f58
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/yi/llm/yi-vision.yaml
@@ -0,0 +1,44 @@
+model: yi-vision
+label:
+ zh_Hans: yi-vision
+ en_US: yi-vision
+model_type: llm
+features:
+ - agent-thought
+ - vision
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 控制生成结果的多样性和随机性。数值越小,越严谨;数值越大,越发散。
+ en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 1024
+ min: 1
+ max: 4096
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.9
+ min: 0.01
+ max: 1.00
+ help:
+ zh_Hans: 控制生成结果的随机性。数值越小,随机性越弱;数值越大,随机性越强。一般而言,top_p 和 temperature 两个参数选择一个进行调整即可。
+ en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
+pricing:
+ input: '6'
+ output: '6'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/yi/llm/yi-vl-plus.yaml b/ai-provider/model-runtime/model-providers/yi/llm/yi-vl-plus.yaml
new file mode 100644
index 00000000..461c6858
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/yi/llm/yi-vl-plus.yaml
@@ -0,0 +1,43 @@
+model: yi-vl-plus
+label:
+ zh_Hans: yi-vl-plus
+ en_US: yi-vl-plus
+model_type: llm
+features:
+ - vision
+model_properties:
+ mode: chat
+ context_size: 4096
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 控制生成结果的多样性和随机性。数值越小,越严谨;数值越大,越发散。
+ en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
+ - name: max_tokens
+ use_template: max_tokens
+ type: int
+ default: 512
+ min: 1
+ max: 4000
+ help:
+ zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
+ en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+ - name: top_p
+ use_template: top_p
+ type: float
+ default: 0.8
+ min: 0.01
+ max: 1.00
+ help:
+ zh_Hans: 控制生成结果的随机性。数值越小,随机性越弱;数值越大,随机性越强。一般而言,top_p 和 temperature 两个参数选择一个进行调整即可。
+ en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
+pricing:
+ input: '6'
+ output: '6'
+ unit: '0.000001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/yi/yi.yaml b/ai-provider/model-runtime/model-providers/yi/yi.yaml
new file mode 100644
index 00000000..bd5f3e37
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/yi/yi.yaml
@@ -0,0 +1,42 @@
+provider: yi
+label:
+ en_US: 01.AI
+ zh_Hans: 零一万物
+description:
+ en_US: Models provided by 01.AI, such as yi-34b-chat and yi-vl-plus.
+ zh_Hans: 零一万物提供的模型,例如 yi-34b-chat 和 yi-vl-plus。
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ en_US: icon_l_en.svg
+background: "#E9F1EC"
+help:
+ title:
+ en_US: Get your API Key from 01.ai
+ zh_Hans: 从零一万物获取 API Key
+ url:
+ en_US: https://platform.lingyiwanwu.com/apikeys
+supported_model_types:
+ - llm
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
+ - variable: endpoint_url
+ label:
+ zh_Hans: 自定义 API endpoint 地址
+ en_US: Custom API endpoint URL
+ type: text-input
+ required: false
+ placeholder:
+ zh_Hans: Base URL, e.g. https://api.lingyiwanwu.com/v1
+ en_US: Base URL, e.g. https://api.lingyiwanwu.com/v1
+address: https://api.openai.com
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/zhinao/assets/icon_l_en.svg b/ai-provider/model-runtime/model-providers/zhinao/assets/icon_l_en.svg
new file mode 100644
index 00000000..b22b8694
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/zhinao/assets/icon_l_en.svg
@@ -0,0 +1,8 @@
+
+
+
diff --git a/ai-provider/model-runtime/model-providers/zhinao/assets/icon_s_en.svg b/ai-provider/model-runtime/model-providers/zhinao/assets/icon_s_en.svg
new file mode 100644
index 00000000..8fe72b7d
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/zhinao/assets/icon_s_en.svg
@@ -0,0 +1,8 @@
+
+
+
diff --git a/ai-provider/model-runtime/model-providers/zhinao/llm/360gpt-turbo-responsibility-8k.yaml b/ai-provider/model-runtime/model-providers/zhinao/llm/360gpt-turbo-responsibility-8k.yaml
new file mode 100644
index 00000000..f420df00
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/zhinao/llm/360gpt-turbo-responsibility-8k.yaml
@@ -0,0 +1,36 @@
+model: 360gpt-turbo-responsibility-8k
+label:
+ zh_Hans: 360gpt-turbo-responsibility-8k
+ en_US: 360gpt-turbo-responsibility-8k
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 1
+ default: 0.5
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 8192
+ default: 1024
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
diff --git a/ai-provider/model-runtime/model-providers/zhinao/llm/360gpt-turbo.yaml b/ai-provider/model-runtime/model-providers/zhinao/llm/360gpt-turbo.yaml
new file mode 100644
index 00000000..a2658fbe
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/zhinao/llm/360gpt-turbo.yaml
@@ -0,0 +1,36 @@
+model: 360gpt-turbo
+label:
+ zh_Hans: 360gpt-turbo
+ en_US: 360gpt-turbo
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 2048
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 1
+ default: 0.5
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 2048
+ default: 1024
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
diff --git a/ai-provider/model-runtime/model-providers/zhinao/llm/360gpt2-pro.yaml b/ai-provider/model-runtime/model-providers/zhinao/llm/360gpt2-pro.yaml
new file mode 100644
index 00000000..00c81eb1
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/zhinao/llm/360gpt2-pro.yaml
@@ -0,0 +1,36 @@
+model: 360gpt2-pro
+label:
+ zh_Hans: 360gpt2-pro
+ en_US: 360gpt2-pro
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 2048
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ min: 0
+ max: 1
+ default: 0.5
+ - name: top_p
+ use_template: top_p
+ min: 0
+ max: 1
+ default: 1
+ - name: max_tokens
+ use_template: max_tokens
+ min: 1
+ max: 2048
+ default: 1024
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
diff --git a/ai-provider/model-runtime/model-providers/zhinao/zhinao.yaml b/ai-provider/model-runtime/model-providers/zhinao/zhinao.yaml
new file mode 100644
index 00000000..39a2c595
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/zhinao/zhinao.yaml
@@ -0,0 +1,33 @@
+provider: zhinao
+label:
+ en_US: 360 AI
+ zh_Hans: 360 智脑
+description:
+ en_US: Models provided by 360 AI.
+ zh_Hans: 360 智脑提供的模型。
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ en_US: icon_l_en.svg
+background: "#e3f0ff"
+help:
+ title:
+ en_US: Get your API Key from 360 AI.
+ zh_Hans: 从360 智脑获取 API Key
+ url:
+ en_US: https://ai.360.com/platform/keys
+supported_model_types:
+ - llm
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
+address: https://api.openai.com
\ No newline at end of file
diff --git a/ai-provider/model-runtime/model-providers/zhipuai/assets/icon_l_en.svg b/ai-provider/model-runtime/model-providers/zhipuai/assets/icon_l_en.svg
new file mode 100644
index 00000000..d3249991
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/zhipuai/assets/icon_l_en.svg
@@ -0,0 +1,6 @@
+
diff --git a/ai-provider/model-runtime/model-providers/zhipuai/assets/icon_l_zh.svg b/ai-provider/model-runtime/model-providers/zhipuai/assets/icon_l_zh.svg
new file mode 100644
index 00000000..067ea2c4
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/zhipuai/assets/icon_l_zh.svg
@@ -0,0 +1,8 @@
+
diff --git a/ai-provider/model-runtime/model-providers/zhipuai/assets/icon_s_en.svg b/ai-provider/model-runtime/model-providers/zhipuai/assets/icon_s_en.svg
new file mode 100644
index 00000000..016f97dd
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/zhipuai/assets/icon_s_en.svg
@@ -0,0 +1,8 @@
+
diff --git a/ai-provider/model-runtime/model-providers/zhipuai/llm/chatglm_lite.yaml b/ai-provider/model-runtime/model-providers/zhipuai/llm/chatglm_lite.yaml
new file mode 100644
index 00000000..9778de1a
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/zhipuai/llm/chatglm_lite.yaml
@@ -0,0 +1,22 @@
+model: chatglm_lite
+label:
+ en_US: chatglm_lite
+model_type: llm
+model_properties:
+ mode: chat
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.9
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 采样温度,控制输出的随机性,必须为正数取值范围是:(0.0,1.0],不能等于 0,默认值为 0.95 值越大,会使输出更随机,更具创造性;值越小,输出会更加稳定或确定建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Sampling temperature, controls the randomness of the output, must be a positive number. The value range is (0.0,1.0], which cannot be equal to 0. The default value is 0.95. The larger the value, the more random and creative the output will be; the smaller the value, The output will be more stable or certain. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+ - name: top_p
+ use_template: top_p
+ default: 0.7
+ help:
+ zh_Hans: 用温度取样的另一种方法,称为核取样取值范围是:(0.0, 1.0) 开区间,不能等于 0 或 1,默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如:0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/zhipuai/llm/chatglm_lite_32k.yaml b/ai-provider/model-runtime/model-providers/zhipuai/llm/chatglm_lite_32k.yaml
new file mode 100644
index 00000000..7836d964
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/zhipuai/llm/chatglm_lite_32k.yaml
@@ -0,0 +1,22 @@
+model: chatglm_lite_32k
+label:
+ en_US: chatglm_lite_32k
+model_type: llm
+model_properties:
+ mode: chat
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.9
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 采样温度,控制输出的随机性,必须为正数取值范围是:(0.0,1.0],不能等于 0,默认值为 0.95 值越大,会使输出更随机,更具创造性;值越小,输出会更加稳定或确定建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Sampling temperature, controls the randomness of the output, must be a positive number. The value range is (0.0,1.0], which cannot be equal to 0. The default value is 0.95. The larger the value, the more random and creative the output will be; the smaller the value, The output will be more stable or certain. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+ - name: top_p
+ use_template: top_p
+ default: 0.7
+ help:
+ zh_Hans: 用温度取样的另一种方法,称为核取样取值范围是:(0.0, 1.0) 开区间,不能等于 0 或 1,默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如:0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/zhipuai/llm/chatglm_pro.yaml b/ai-provider/model-runtime/model-providers/zhipuai/llm/chatglm_pro.yaml
new file mode 100644
index 00000000..b3d53c81
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/zhipuai/llm/chatglm_pro.yaml
@@ -0,0 +1,22 @@
+model: chatglm_pro
+label:
+ en_US: chatglm_pro
+model_type: llm
+model_properties:
+ mode: chat
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.9
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 采样温度,控制输出的随机性,必须为正数取值范围是:(0.0,1.0],不能等于 0,默认值为 0.95 值越大,会使输出更随机,更具创造性;值越小,输出会更加稳定或确定建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Sampling temperature, controls the randomness of the output, must be a positive number. The value range is (0.0,1.0], which cannot be equal to 0. The default value is 0.95. The larger the value, the more random and creative the output will be; the smaller the value, The output will be more stable or certain. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+ - name: top_p
+ use_template: top_p
+ default: 0.7
+ help:
+ zh_Hans: 用温度取样的另一种方法,称为核取样取值范围是:(0.0, 1.0) 开区间,不能等于 0 或 1,默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如:0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/zhipuai/llm/chatglm_std.yaml b/ai-provider/model-runtime/model-providers/zhipuai/llm/chatglm_std.yaml
new file mode 100644
index 00000000..7d8b9520
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/zhipuai/llm/chatglm_std.yaml
@@ -0,0 +1,22 @@
+model: chatglm_std
+label:
+ en_US: chatglm_std
+model_type: llm
+model_properties:
+ mode: chat
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.9
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 采样温度,控制输出的随机性,必须为正数取值范围是:(0.0,1.0],不能等于 0,默认值为 0.95 值越大,会使输出更随机,更具创造性;值越小,输出会更加稳定或确定建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Sampling temperature, controls the randomness of the output, must be a positive number. The value range is (0.0,1.0], which cannot be equal to 0. The default value is 0.95. The larger the value, the more random and creative the output will be; the smaller the value, The output will be more stable or certain. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+ - name: top_p
+ use_template: top_p
+ default: 0.7
+ help:
+ zh_Hans: 用温度取样的另一种方法,称为核取样取值范围是:(0.0, 1.0) 开区间,不能等于 0 或 1,默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如:0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+deprecated: true
diff --git a/ai-provider/model-runtime/model-providers/zhipuai/llm/chatglm_turbo.yaml b/ai-provider/model-runtime/model-providers/zhipuai/llm/chatglm_turbo.yaml
new file mode 100644
index 00000000..fcd5c5ef
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/zhipuai/llm/chatglm_turbo.yaml
@@ -0,0 +1,51 @@
+model: chatglm_turbo
+label:
+ en_US: chatglm_turbo
+model_type: llm
+model_properties:
+ mode: chat
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.95
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 采样温度,控制输出的随机性,必须为正数取值范围是:(0.0,1.0],不能等于 0,默认值为 0.95 值越大,会使输出更随机,更具创造性;值越小,输出会更加稳定或确定建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Sampling temperature, controls the randomness of the output, must be a positive number. The value range is (0.0,1.0], which cannot be equal to 0. The default value is 0.95. The larger the value, the more random and creative the output will be; the smaller the value, The output will be more stable or certain. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+ - name: top_p
+ use_template: top_p
+ default: 0.7
+ help:
+ zh_Hans: 用温度取样的另一种方法,称为核取样取值范围是:(0.0, 1.0) 开区间,不能等于 0 或 1,默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如:0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+ - name: do_sample
+ label:
+ zh_Hans: 采样策略
+ en_US: Sampling strategy
+ type: boolean
+ help:
+ zh_Hans: do_sample 为 true 时启用采样策略,do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
+ en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
+ default: true
+ - name: stream
+ label:
+ zh_Hans: 流处理
+ en_US: Event Stream
+ type: boolean
+ help:
+ zh_Hans: 使用同步调用时,此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true,模型将通过标准 Event Stream ,逐块返回模型生成内容。Event Stream 结束时会返回一条data:[DONE]消息。注意:在模型流式输出生成内容的过程中,我们会分批对模型生成内容进行检测,当检测到违法及不良信息时,API会返回错误码(1301)。开发者识别到错误码(1301),应及时采取(清屏、重启对话)等措施删除生成内容,并确保不将含有违法及不良信息的内容传递给模型继续生成,避免其造成负面影响。
+ en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data:[DONE] message will be sent at the end of the Event Stream.Note:During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
+ default: false
+ - name: return_type
+ label:
+ zh_Hans: 回复类型
+ en_US: Return Type
+ type: string
+ help:
+ zh_Hans: 用于控制每次返回内容的类型,空或者没有此字段时默认按照 json_string 返回,json_string 返回标准的 JSON 字符串,text 返回原始的文本内容。
+ en_US: Used to control the type of content returned each time. When it is empty or does not have this field, it will be returned as json_string by default. json_string returns a standard JSON string, and text returns the original text content.
+ required: false
+ options:
+ - text
+ - json_string
diff --git a/ai-provider/model-runtime/model-providers/zhipuai/llm/glm-4-0520.yaml b/ai-provider/model-runtime/model-providers/zhipuai/llm/glm-4-0520.yaml
new file mode 100644
index 00000000..7fcf6922
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/zhipuai/llm/glm-4-0520.yaml
@@ -0,0 +1,62 @@
+model: glm-4-0520
+label:
+ en_US: glm-4-0520
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.95
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 采样温度,控制输出的随机性,必须为正数取值范围是:(0.0,1.0],不能等于 0,默认值为 0.95 值越大,会使输出更随机,更具创造性;值越小,输出会更加稳定或确定建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Sampling temperature, controls the randomness of the output, must be a positive number. The value range is (0.0,1.0], which cannot be equal to 0. The default value is 0.95. The larger the value, the more random and creative the output will be; the smaller the value, The output will be more stable or certain. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+ - name: top_p
+ use_template: top_p
+ default: 0.7
+ help:
+ zh_Hans: 用温度取样的另一种方法,称为核取样取值范围是:(0.0, 1.0) 开区间,不能等于 0 或 1,默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如:0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+ - name: do_sample
+ label:
+ zh_Hans: 采样策略
+ en_US: Sampling strategy
+ type: boolean
+ help:
+ zh_Hans: do_sample 为 true 时启用采样策略,do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
+ en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
+ default: true
+ - name: stream
+ label:
+ zh_Hans: 流处理
+ en_US: Event Stream
+ type: boolean
+ help:
+ zh_Hans: 使用同步调用时,此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true,模型将通过标准 Event Stream ,逐块返回模型生成内容。Event Stream 结束时会返回一条data:[DONE]消息。注意:在模型流式输出生成内容的过程中,我们会分批对模型生成内容进行检测,当检测到违法及不良信息时,API会返回错误码(1301)。开发者识别到错误码(1301),应及时采取(清屏、重启对话)等措施删除生成内容,并确保不将含有违法及不良信息的内容传递给模型继续生成,避免其造成负面影响。
+ en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data:[DONE] message will be sent at the end of the Event Stream.Note:During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
+ default: false
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 4095
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+pricing:
+ input: '0.1'
+ output: '0.1'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/zhipuai/llm/glm-4-air.yaml b/ai-provider/model-runtime/model-providers/zhipuai/llm/glm-4-air.yaml
new file mode 100644
index 00000000..fcd7c776
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/zhipuai/llm/glm-4-air.yaml
@@ -0,0 +1,62 @@
+model: glm-4-air
+label:
+ en_US: glm-4-air
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.95
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 采样温度,控制输出的随机性,必须为正数取值范围是:(0.0,1.0],不能等于 0,默认值为 0.95 值越大,会使输出更随机,更具创造性;值越小,输出会更加稳定或确定建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Sampling temperature, controls the randomness of the output, must be a positive number. The value range is (0.0,1.0], which cannot be equal to 0. The default value is 0.95. The larger the value, the more random and creative the output will be; the smaller the value, The output will be more stable or certain. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+ - name: top_p
+ use_template: top_p
+ default: 0.7
+ help:
+ zh_Hans: 用温度取样的另一种方法,称为核取样取值范围是:(0.0, 1.0) 开区间,不能等于 0 或 1,默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如:0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+ - name: do_sample
+ label:
+ zh_Hans: 采样策略
+ en_US: Sampling strategy
+ type: boolean
+ help:
+ zh_Hans: do_sample 为 true 时启用采样策略,do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
+ en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
+ default: true
+ - name: stream
+ label:
+ zh_Hans: 流处理
+ en_US: Event Stream
+ type: boolean
+ help:
+ zh_Hans: 使用同步调用时,此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true,模型将通过标准 Event Stream ,逐块返回模型生成内容。Event Stream 结束时会返回一条data:[DONE]消息。注意:在模型流式输出生成内容的过程中,我们会分批对模型生成内容进行检测,当检测到违法及不良信息时,API会返回错误码(1301)。开发者识别到错误码(1301),应及时采取(清屏、重启对话)等措施删除生成内容,并确保不将含有违法及不良信息的内容传递给模型继续生成,避免其造成负面影响。
+ en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data:[DONE] message will be sent at the end of the Event Stream.Note:During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
+ default: false
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 4095
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+pricing:
+ input: '0.001'
+ output: '0.001'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/zhipuai/llm/glm-4-airx.yaml b/ai-provider/model-runtime/model-providers/zhipuai/llm/glm-4-airx.yaml
new file mode 100644
index 00000000..c9ae5abf
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/zhipuai/llm/glm-4-airx.yaml
@@ -0,0 +1,62 @@
+model: glm-4-airx
+label:
+ en_US: glm-4-airx
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.95
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 采样温度,控制输出的随机性,必须为正数取值范围是:(0.0,1.0],不能等于 0,默认值为 0.95 值越大,会使输出更随机,更具创造性;值越小,输出会更加稳定或确定建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Sampling temperature, controls the randomness of the output, must be a positive number. The value range is (0.0,1.0], which cannot be equal to 0. The default value is 0.95. The larger the value, the more random and creative the output will be; the smaller the value, The output will be more stable or certain. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+ - name: top_p
+ use_template: top_p
+ default: 0.7
+ help:
+ zh_Hans: 用温度取样的另一种方法,称为核取样取值范围是:(0.0, 1.0) 开区间,不能等于 0 或 1,默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如:0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+ - name: do_sample
+ label:
+ zh_Hans: 采样策略
+ en_US: Sampling strategy
+ type: boolean
+ help:
+ zh_Hans: do_sample 为 true 时启用采样策略,do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
+ en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
+ default: true
+ - name: stream
+ label:
+ zh_Hans: 流处理
+ en_US: Event Stream
+ type: boolean
+ help:
+ zh_Hans: 使用同步调用时,此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true,模型将通过标准 Event Stream ,逐块返回模型生成内容。Event Stream 结束时会返回一条data:[DONE]消息。注意:在模型流式输出生成内容的过程中,我们会分批对模型生成内容进行检测,当检测到违法及不良信息时,API会返回错误码(1301)。开发者识别到错误码(1301),应及时采取(清屏、重启对话)等措施删除生成内容,并确保不将含有违法及不良信息的内容传递给模型继续生成,避免其造成负面影响。
+ en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data:[DONE] message will be sent at the end of the Event Stream.Note:During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
+ default: false
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 4095
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+pricing:
+ input: '0.01'
+ output: '0.01'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/zhipuai/llm/glm-4-flash.yaml b/ai-provider/model-runtime/model-providers/zhipuai/llm/glm-4-flash.yaml
new file mode 100644
index 00000000..98c4f72c
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/zhipuai/llm/glm-4-flash.yaml
@@ -0,0 +1,62 @@
+model: glm-4-flash
+label:
+ en_US: glm-4-flash
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.95
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 采样温度,控制输出的随机性,必须为正数取值范围是:(0.0,1.0],不能等于 0,默认值为 0.95 值越大,会使输出更随机,更具创造性;值越小,输出会更加稳定或确定建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Sampling temperature, controls the randomness of the output, must be a positive number. The value range is (0.0,1.0], which cannot be equal to 0. The default value is 0.95. The larger the value, the more random and creative the output will be; the smaller the value, The output will be more stable or certain. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+ - name: top_p
+ use_template: top_p
+ default: 0.7
+ help:
+ zh_Hans: 用温度取样的另一种方法,称为核取样取值范围是:(0.0, 1.0) 开区间,不能等于 0 或 1,默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如:0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+ - name: do_sample
+ label:
+ zh_Hans: 采样策略
+ en_US: Sampling strategy
+ type: boolean
+ help:
+ zh_Hans: do_sample 为 true 时启用采样策略,do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
+ en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
+ default: true
+ - name: stream
+ label:
+ zh_Hans: 流处理
+ en_US: Event Stream
+ type: boolean
+ help:
+ zh_Hans: 使用同步调用时,此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true,模型将通过标准 Event Stream ,逐块返回模型生成内容。Event Stream 结束时会返回一条data:[DONE]消息。注意:在模型流式输出生成内容的过程中,我们会分批对模型生成内容进行检测,当检测到违法及不良信息时,API会返回错误码(1301)。开发者识别到错误码(1301),应及时采取(清屏、重启对话)等措施删除生成内容,并确保不将含有违法及不良信息的内容传递给模型继续生成,避免其造成负面影响。
+ en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data:[DONE] message will be sent at the end of the Event Stream.Note:During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
+ default: false
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 4095
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+pricing:
+ input: '0'
+ output: '0'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/zhipuai/llm/glm_3_turbo.yaml b/ai-provider/model-runtime/model-providers/zhipuai/llm/glm_3_turbo.yaml
new file mode 100644
index 00000000..0b5391ce
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/zhipuai/llm/glm_3_turbo.yaml
@@ -0,0 +1,62 @@
+model: glm-3-turbo
+label:
+ en_US: glm-3-turbo
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.95
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 采样温度,控制输出的随机性,必须为正数取值范围是:(0.0,1.0],不能等于 0,默认值为 0.95 值越大,会使输出更随机,更具创造性;值越小,输出会更加稳定或确定建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Sampling temperature, controls the randomness of the output, must be a positive number. The value range is (0.0,1.0], which cannot be equal to 0. The default value is 0.95. The larger the value, the more random and creative the output will be; the smaller the value, The output will be more stable or certain. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+ - name: top_p
+ use_template: top_p
+ default: 0.7
+ help:
+ zh_Hans: 用温度取样的另一种方法,称为核取样取值范围是:(0.0, 1.0) 开区间,不能等于 0 或 1,默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如:0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+ - name: do_sample
+ label:
+ zh_Hans: 采样策略
+ en_US: Sampling strategy
+ type: boolean
+ help:
+ zh_Hans: do_sample 为 true 时启用采样策略,do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
+ en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
+ default: true
+ - name: stream
+ label:
+ zh_Hans: 流处理
+ en_US: Event Stream
+ type: boolean
+ help:
+ zh_Hans: 使用同步调用时,此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true,模型将通过标准 Event Stream ,逐块返回模型生成内容。Event Stream 结束时会返回一条data:[DONE]消息。注意:在模型流式输出生成内容的过程中,我们会分批对模型生成内容进行检测,当检测到违法及不良信息时,API会返回错误码(1301)。开发者识别到错误码(1301),应及时采取(清屏、重启对话)等措施删除生成内容,并确保不将含有违法及不良信息的内容传递给模型继续生成,避免其造成负面影响。
+ en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data:[DONE] message will be sent at the end of the Event Stream.Note:During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
+ default: false
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 8192
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+pricing:
+ input: '0.001'
+ output: '0.001'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/zhipuai/llm/glm_4.yaml b/ai-provider/model-runtime/model-providers/zhipuai/llm/glm_4.yaml
new file mode 100644
index 00000000..62f453fb
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/zhipuai/llm/glm_4.yaml
@@ -0,0 +1,62 @@
+model: glm-4
+label:
+ en_US: glm-4
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.95
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 采样温度,控制输出的随机性,必须为正数取值范围是:(0.0,1.0],不能等于 0,默认值为 0.95 值越大,会使输出更随机,更具创造性;值越小,输出会更加稳定或确定建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Sampling temperature, controls the randomness of the output, must be a positive number. The value range is (0.0,1.0], which cannot be equal to 0. The default value is 0.95. The larger the value, the more random and creative the output will be; the smaller the value, The output will be more stable or certain. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+ - name: top_p
+ use_template: top_p
+ default: 0.7
+ help:
+ zh_Hans: 用温度取样的另一种方法,称为核取样取值范围是:(0.0, 1.0) 开区间,不能等于 0 或 1,默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如:0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+ - name: do_sample
+ label:
+ zh_Hans: 采样策略
+ en_US: Sampling strategy
+ type: boolean
+ help:
+ zh_Hans: do_sample 为 true 时启用采样策略,do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
+ en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
+ default: true
+ - name: stream
+ label:
+ zh_Hans: 流处理
+ en_US: Event Stream
+ type: boolean
+ help:
+ zh_Hans: 使用同步调用时,此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true,模型将通过标准 Event Stream ,逐块返回模型生成内容。Event Stream 结束时会返回一条data:[DONE]消息。注意:在模型流式输出生成内容的过程中,我们会分批对模型生成内容进行检测,当检测到违法及不良信息时,API会返回错误码(1301)。开发者识别到错误码(1301),应及时采取(清屏、重启对话)等措施删除生成内容,并确保不将含有违法及不良信息的内容传递给模型继续生成,避免其造成负面影响。
+ en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data:[DONE] message will be sent at the end of the Event Stream.Note:During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
+ default: false
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 4095
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+pricing:
+ input: '0.1'
+ output: '0.1'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/zhipuai/llm/glm_4_long.yaml b/ai-provider/model-runtime/model-providers/zhipuai/llm/glm_4_long.yaml
new file mode 100644
index 00000000..350b080c
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/zhipuai/llm/glm_4_long.yaml
@@ -0,0 +1,65 @@
+model: glm-4-long
+label:
+ en_US: glm-4-long
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 10240
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.95
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 采样温度,控制输出的随机性,必须为正数取值范围是:(0.0,1.0],不能等于 0,默认值为 0.95 值越大,会使输出更随机,更具创造性;值越小,输出会更加稳定或确定建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Sampling temperature, controls the randomness of the output, must be a positive number. The value range is (0.0,1.0], which cannot be equal to 0. The default value is 0.95. The larger the value, the more random and creative the output will be; the smaller the value, The output will be more stable or certain. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+ - name: top_p
+ use_template: top_p
+ default: 0.7
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 用温度取样的另一种方法,称为核取样取值范围是:(0.0, 1.0) 开区间,不能等于 0 或 1,默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如:0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+ - name: do_sample
+ label:
+ zh_Hans: 采样策略
+ en_US: Sampling strategy
+ type: boolean
+ help:
+ zh_Hans: do_sample 为 true 时启用采样策略,do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
+ en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
+ default: true
+ - name: stream
+ label:
+ zh_Hans: 流处理
+ en_US: Event Stream
+ type: boolean
+ help:
+ zh_Hans: 使用同步调用时,此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true,模型将通过标准 Event Stream ,逐块返回模型生成内容。Event Stream 结束时会返回一条data:[DONE]消息。注意:在模型流式输出生成内容的过程中,我们会分批对模型生成内容进行检测,当检测到违法及不良信息时,API会返回错误码(1301)。开发者识别到错误码(1301),应及时采取(清屏、重启对话)等措施删除生成内容,并确保不将含有违法及不良信息的内容传递给模型继续生成,避免其造成负面影响。
+ en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data:[DONE] message will be sent at the end of the Event Stream.Note:During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
+ default: false
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 4095
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+pricing:
+ input: '0.001'
+ output: '0.001'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/zhipuai/llm/glm_4_plus.yaml b/ai-provider/model-runtime/model-providers/zhipuai/llm/glm_4_plus.yaml
new file mode 100644
index 00000000..2d7ebd71
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/zhipuai/llm/glm_4_plus.yaml
@@ -0,0 +1,62 @@
+model: glm-4-plus
+label:
+ en_US: glm-4-plus
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.95
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 采样温度,控制输出的随机性,必须为正数取值范围是:(0.0,1.0],不能等于 0,默认值为 0.95 值越大,会使输出更随机,更具创造性;值越小,输出会更加稳定或确定建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Sampling temperature, controls the randomness of the output, must be a positive number. The value range is (0.0,1.0], which cannot be equal to 0. The default value is 0.95. The larger the value, the more random and creative the output will be; the smaller the value, The output will be more stable or certain. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+ - name: top_p
+ use_template: top_p
+ default: 0.7
+ help:
+ zh_Hans: 用温度取样的另一种方法,称为核取样取值范围是:(0.0, 1.0) 开区间,不能等于 0 或 1,默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如:0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+ - name: do_sample
+ label:
+ zh_Hans: 采样策略
+ en_US: Sampling strategy
+ type: boolean
+ help:
+ zh_Hans: do_sample 为 true 时启用采样策略,do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
+ en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
+ default: true
+ - name: stream
+ label:
+ zh_Hans: 流处理
+ en_US: Event Stream
+ type: boolean
+ help:
+ zh_Hans: 使用同步调用时,此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true,模型将通过标准 Event Stream ,逐块返回模型生成内容。Event Stream 结束时会返回一条data:[DONE]消息。注意:在模型流式输出生成内容的过程中,我们会分批对模型生成内容进行检测,当检测到违法及不良信息时,API会返回错误码(1301)。开发者识别到错误码(1301),应及时采取(清屏、重启对话)等措施删除生成内容,并确保不将含有违法及不良信息的内容传递给模型继续生成,避免其造成负面影响。
+ en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data:[DONE] message will be sent at the end of the Event Stream.Note:During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
+ default: false
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 4095
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+pricing:
+ input: '0.05'
+ output: '0.05'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/zhipuai/llm/glm_4v.yaml b/ai-provider/model-runtime/model-providers/zhipuai/llm/glm_4v.yaml
new file mode 100644
index 00000000..3a1120ff
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/zhipuai/llm/glm_4v.yaml
@@ -0,0 +1,60 @@
+model: glm-4v
+label:
+ en_US: glm-4v
+model_type: llm
+model_properties:
+ mode: chat
+features:
+ - vision
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.95
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 采样温度,控制输出的随机性,必须为正数取值范围是:(0.0,1.0],不能等于 0,默认值为 0.95 值越大,会使输出更随机,更具创造性;值越小,输出会更加稳定或确定建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Sampling temperature, controls the randomness of the output, must be a positive number. The value range is (0.0,1.0], which cannot be equal to 0. The default value is 0.95. The larger the value, the more random and creative the output will be; the smaller the value, The output will be more stable or certain. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+ - name: top_p
+ use_template: top_p
+ default: 0.6
+ help:
+ zh_Hans: 用温度取样的另一种方法,称为核取样取值范围是:(0.0, 1.0) 开区间,不能等于 0 或 1,默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如:0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+ - name: do_sample
+ label:
+ zh_Hans: 采样策略
+ en_US: Sampling strategy
+ type: boolean
+ help:
+ zh_Hans: do_sample 为 true 时启用采样策略,do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
+ en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
+ default: true
+ - name: stream
+ label:
+ zh_Hans: 流处理
+ en_US: Event Stream
+ type: boolean
+ help:
+ zh_Hans: 使用同步调用时,此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true,模型将通过标准 Event Stream ,逐块返回模型生成内容。Event Stream 结束时会返回一条data:[DONE]消息。注意:在模型流式输出生成内容的过程中,我们会分批对模型生成内容进行检测,当检测到违法及不良信息时,API会返回错误码(1301)。开发者识别到错误码(1301),应及时采取(清屏、重启对话)等措施删除生成内容,并确保不将含有违法及不良信息的内容传递给模型继续生成,避免其造成负面影响。
+ en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data:[DONE] message will be sent at the end of the Event Stream.Note:During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
+ default: false
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 1024
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+pricing:
+ input: '0.05'
+ output: '0.05'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/zhipuai/llm/glm_4v_plus.yaml b/ai-provider/model-runtime/model-providers/zhipuai/llm/glm_4v_plus.yaml
new file mode 100644
index 00000000..14b9623e
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/zhipuai/llm/glm_4v_plus.yaml
@@ -0,0 +1,60 @@
+model: glm-4v-plus
+label:
+ en_US: glm-4v-plus
+model_type: llm
+model_properties:
+ mode: chat
+features:
+ - vision
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.95
+ min: 0.0
+ max: 1.0
+ help:
+ zh_Hans: 采样温度,控制输出的随机性,必须为正数取值范围是:(0.0,1.0],不能等于 0,默认值为 0.95 值越大,会使输出更随机,更具创造性;值越小,输出会更加稳定或确定建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Sampling temperature, controls the randomness of the output, must be a positive number. The value range is (0.0,1.0], which cannot be equal to 0. The default value is 0.95. The larger the value, the more random and creative the output will be; the smaller the value, The output will be more stable or certain. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+ - name: top_p
+ use_template: top_p
+ default: 0.6
+ help:
+ zh_Hans: 用温度取样的另一种方法,称为核取样取值范围是:(0.0, 1.0) 开区间,不能等于 0 或 1,默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如:0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。
+ en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+ - name: do_sample
+ label:
+ zh_Hans: 采样策略
+ en_US: Sampling strategy
+ type: boolean
+ help:
+ zh_Hans: do_sample 为 true 时启用采样策略,do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
+ en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
+ default: true
+ - name: stream
+ label:
+ zh_Hans: 流处理
+ en_US: Event Stream
+ type: boolean
+ help:
+ zh_Hans: 使用同步调用时,此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true,模型将通过标准 Event Stream ,逐块返回模型生成内容。Event Stream 结束时会返回一条data:[DONE]消息。注意:在模型流式输出生成内容的过程中,我们会分批对模型生成内容进行检测,当检测到违法及不良信息时,API会返回错误码(1301)。开发者识别到错误码(1301),应及时采取(清屏、重启对话)等措施删除生成内容,并确保不将含有违法及不良信息的内容传递给模型继续生成,避免其造成负面影响。
+ en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data:[DONE] message will be sent at the end of the Event Stream.Note:During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
+ default: false
+ - name: max_tokens
+ use_template: max_tokens
+ default: 1024
+ min: 1
+ max: 1024
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
+pricing:
+ input: '0.01'
+ output: '0.01'
+ unit: '0.001'
+ currency: RMB
diff --git a/ai-provider/model-runtime/model-providers/zhipuai/zhipuai.yaml b/ai-provider/model-runtime/model-providers/zhipuai/zhipuai.yaml
new file mode 100644
index 00000000..2c3ba4c5
--- /dev/null
+++ b/ai-provider/model-runtime/model-providers/zhipuai/zhipuai.yaml
@@ -0,0 +1,32 @@
+provider: zhipuai
+label:
+ zh_Hans: 智谱 AI
+ en_US: ZHIPU AI
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ zh_Hans: icon_l_zh.svg
+ en_US: icon_l_en.svg
+background: "#EFF1FE"
+help:
+ title:
+ en_US: Get your API key from ZHIPU AI
+ zh_Hans: 从智谱 AI 获取 API Key
+ url:
+ en_US: https://open.bigmodel.cn/usercenter/apikeys
+supported_model_types:
+ - llm
+ - text-embedding
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: api_key
+ label:
+ en_US: APIKey
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 APIKey
+ en_US: Enter your APIKey
+address: https://api.openai.com
\ No newline at end of file
diff --git a/ai-provider/model-runtime/provider.go b/ai-provider/model-runtime/provider.go
index c9d9c94c..076f746f 100644
--- a/ai-provider/model-runtime/provider.go
+++ b/ai-provider/model-runtime/provider.go
@@ -3,11 +3,13 @@ package model_runtime
import (
"encoding/json"
"fmt"
- "github.com/APIParkLab/APIPark/ai-provider/model-runtime/entity"
- "github.com/eolinker/eosc"
- "gopkg.in/yaml.v3"
"net/url"
"strings"
+
+ yaml "gopkg.in/yaml.v3"
+
+ "github.com/APIParkLab/APIPark/ai-provider/model-runtime/entity"
+ "github.com/eolinker/eosc"
)
const (
@@ -54,8 +56,14 @@ func NewProvider(providerData string, modelContents map[string]eosc.Untyped[stri
}
delete(modelContents, DirAssets)
- providerLogo, _ := assetsFiles.Get(providerCfg.IconLarge[entity.LanguageEnglish])
- modelLogo, _ := assetsFiles.Get(providerCfg.IconSmall[entity.LanguageEnglish])
+ providerLogo, ok := assetsFiles.Get(providerCfg.IconLarge[entity.LanguageEnglish])
+ if !ok {
+ return nil, fmt.Errorf("provider logo not found:%s", providerCfg.Provider)
+ }
+ modelLogo, ok := assetsFiles.Get(providerCfg.IconSmall[entity.LanguageEnglish])
+ if !ok {
+ return nil, fmt.Errorf("model logo not found:%s", providerCfg.Provider)
+ }
provider := &Provider{
id: providerCfg.Provider,
name: providerCfg.Label[entity.LanguageEnglish],
diff --git a/frontend/frontend.go b/frontend/frontend.go
index d7d91afc..fb85f8ce 100644
--- a/frontend/frontend.go
+++ b/frontend/frontend.go
@@ -81,10 +81,10 @@ func IndexHtml(ginCtx *gin.Context) {
func (f *Frontend) Api() []pm3.Api {
return []pm3.Api{
- pm3.CreateRouterSimple(http.MethodGet, "/favicon.ico", func(ginCtx *gin.Context) {
+ pm3.CreateApiSimple(http.MethodGet, "/favicon.ico", func(ginCtx *gin.Context) {
ginCtx.Data(http.StatusOK, iconType, iconContent)
}),
- pm3.CreateRouterSimple(http.MethodGet, "/vite.svg", func(ginCtx *gin.Context) {
+ pm3.CreateApiSimple(http.MethodGet, "/vite.svg", func(ginCtx *gin.Context) {
ginCtx.Data(http.StatusOK, viteContentType, viteContent)
}),
}
diff --git a/gateway/apinto/profession.go b/gateway/apinto/profession.go
index bc900fd6..9bc21beb 100644
--- a/gateway/apinto/profession.go
+++ b/gateway/apinto/profession.go
@@ -47,6 +47,10 @@ var dynamicResourceMap = map[string]Worker{
Profession: ProfessionAIProvider,
Driver: "openai",
},
+ "google": {
+ Profession: ProfessionAIProvider,
+ Driver: "google",
+ },
"moonshot": {
Profession: ProfessionAIProvider,
Driver: "moonshot",
diff --git a/go.mod b/go.mod
index 684d3ef0..937b7035 100644
--- a/go.mod
+++ b/go.mod
@@ -13,7 +13,6 @@ require (
github.com/gin-gonic/gin v1.10.0
github.com/google/uuid v1.6.0
github.com/influxdata/influxdb-client-go/v2 v2.14.0
- github.com/sashabaranov/go-openai v1.30.3
github.com/urfave/cli/v2 v2.27.2
golang.org/x/crypto v0.24.0
gopkg.in/yaml.v3 v3.0.1