mirror of
https://github.com/APIParkLab/APIPark.git
synced 2026-06-04 10:13:53 +08:00
Merge branch 'main' into feature/dashen/model_mapping
This commit is contained in:
@@ -4,3 +4,4 @@
|
||||
/build/
|
||||
/apipark
|
||||
.gitlab-ci.yml
|
||||
/.vscode/
|
||||
|
||||
@@ -10,6 +10,7 @@ variables:
|
||||
VIEW_ADDR: http://172.18.166.219:8288
|
||||
SAVE_DIR: /opt/${APP}
|
||||
NODE_OPTIONS: --max_old_space_size=8192
|
||||
APIPARK_OLLAMA_BASE: http://127.0.0.1:11434
|
||||
|
||||
stages:
|
||||
- notice
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
package ai_provider_local
|
||||
|
||||
import "time"
|
||||
|
||||
type Model struct {
|
||||
Name string `json:"name"`
|
||||
Model string `json:"model"`
|
||||
ModifiedAt time.Time `json:"modified_at"`
|
||||
Size int64 `json:"size"`
|
||||
Digest string `json:"digest"`
|
||||
Details ModelDetails `json:"details,omitempty"`
|
||||
}
|
||||
|
||||
// ModelDetails provides details about a model.
|
||||
type ModelDetails struct {
|
||||
ParentModel string `json:"parent_model"`
|
||||
Format string `json:"format"`
|
||||
Family string `json:"family"`
|
||||
Families []string `json:"families"`
|
||||
ParameterSize string `json:"parameter_size"`
|
||||
QuantizationLevel string `json:"quantization_level"`
|
||||
}
|
||||
@@ -0,0 +1,336 @@
|
||||
package ai_provider_local
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"github.com/ollama/ollama/progress"
|
||||
|
||||
"github.com/eolinker/eosc"
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
var (
|
||||
taskExecutor = NewAsyncExecutor(100)
|
||||
)
|
||||
|
||||
// Pipeline 结构体,表示每个用户的管道
|
||||
type Pipeline struct {
|
||||
id string
|
||||
channel chan PullMessage
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
}
|
||||
|
||||
func (p *Pipeline) Message() <-chan PullMessage {
|
||||
return p.channel
|
||||
}
|
||||
|
||||
// AsyncExecutor 结构体,管理不同模型的管道和任务队列
|
||||
type AsyncExecutor struct {
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
mu sync.Mutex
|
||||
pipelines map[string]*modelPipeline // 以模型为 key,存管道列表
|
||||
msgQueue chan messageTask // 消息队列
|
||||
}
|
||||
|
||||
type modelPipeline struct {
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
pipelines eosc.Untyped[string, *Pipeline]
|
||||
pullFn PullCallback
|
||||
maxSize int
|
||||
}
|
||||
|
||||
func (m *modelPipeline) List() []*Pipeline {
|
||||
return m.pipelines.List()
|
||||
}
|
||||
|
||||
func (m *modelPipeline) Get(id string) (*Pipeline, bool) {
|
||||
return m.pipelines.Get(id)
|
||||
}
|
||||
|
||||
func (m *modelPipeline) Set(id string, p *Pipeline) error {
|
||||
_, ok := m.pipelines.Get(id)
|
||||
if !ok {
|
||||
if m.pipelines.Count() > m.maxSize {
|
||||
return fmt.Errorf("pipeline size exceed %d", m.maxSize)
|
||||
}
|
||||
}
|
||||
m.pipelines.Set(id, p)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *modelPipeline) AddPipeline(id string) (*Pipeline, error) {
|
||||
ctx, cancel := context.WithCancel(m.ctx)
|
||||
pipeline := &Pipeline{
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
id: id,
|
||||
channel: make(chan PullMessage, 10), // 带缓冲,防止阻塞
|
||||
}
|
||||
err := m.Set(id, pipeline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return pipeline, nil
|
||||
}
|
||||
|
||||
func (m *modelPipeline) Close() {
|
||||
m.cancel()
|
||||
ids := m.pipelines.Keys()
|
||||
for _, id := range ids {
|
||||
m.ClosePipeline(id)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (m *modelPipeline) ClosePipeline(id string) {
|
||||
// 关闭管道
|
||||
p, has := m.pipelines.Del(id)
|
||||
if !has {
|
||||
return
|
||||
}
|
||||
p.cancel()
|
||||
close(p.channel)
|
||||
}
|
||||
|
||||
func newModelPipeline(ctx context.Context, maxSize int) *modelPipeline {
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
return &modelPipeline{
|
||||
pipelines: eosc.BuildUntyped[string, *Pipeline](),
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
maxSize: maxSize,
|
||||
}
|
||||
}
|
||||
|
||||
// messageTask 结构体,包含模型名和消息内容
|
||||
type messageTask struct {
|
||||
message PullMessage
|
||||
}
|
||||
|
||||
type PullMessage struct {
|
||||
Model string
|
||||
Status string
|
||||
Digest string
|
||||
Total int64
|
||||
Completed int64
|
||||
Msg string
|
||||
}
|
||||
|
||||
// NewAsyncExecutor 创建一个新的异步任务执行器
|
||||
func NewAsyncExecutor(queueSize int) *AsyncExecutor {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
executor := &AsyncExecutor{
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
pipelines: make(map[string]*modelPipeline), // 以模型为 key,存管道列表
|
||||
msgQueue: make(chan messageTask, queueSize),
|
||||
}
|
||||
executor.StartMessageDistributor()
|
||||
|
||||
return executor
|
||||
}
|
||||
|
||||
func (e *AsyncExecutor) GetModelPipeline(model string) (*modelPipeline, bool) {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
|
||||
mp, ok := e.pipelines[model]
|
||||
return mp, ok
|
||||
}
|
||||
|
||||
func (e *AsyncExecutor) SetModelPipeline(model string, mp *modelPipeline) {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
e.pipelines[model] = mp
|
||||
}
|
||||
|
||||
// ClosePipeline 关闭管道并移除
|
||||
func (e *AsyncExecutor) ClosePipeline(model string, id string) {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
mp, ok := e.pipelines[model]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
mp.ClosePipeline(id)
|
||||
}
|
||||
|
||||
// CloseModelPipeline 关闭当前模型所有管道
|
||||
func (e *AsyncExecutor) CloseModelPipeline(model string) {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
mp, ok := e.pipelines[model]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
mp.Close()
|
||||
delete(e.pipelines, model)
|
||||
}
|
||||
|
||||
// StartMessageDistributor 启动消息分发器
|
||||
func (e *AsyncExecutor) StartMessageDistributor() {
|
||||
go func() {
|
||||
for task := range e.msgQueue {
|
||||
msg := task.message
|
||||
e.DistributeToModelPipelines(msg.Model, msg)
|
||||
if msg.Status == "error" || msg.Status == "success" {
|
||||
mp, has := e.GetModelPipeline(msg.Model)
|
||||
if has && mp.pullFn != nil {
|
||||
mp.pullFn(msg)
|
||||
}
|
||||
e.CloseModelPipeline(msg.Model)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// DistributeToModelPipelines 仅将消息分发给指定模型的管道
|
||||
func (e *AsyncExecutor) DistributeToModelPipelines(model string, msg PullMessage) {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
pipelines, ok := e.pipelines[model]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
for _, pipeline := range pipelines.List() {
|
||||
select {
|
||||
case pipeline.channel <- msg:
|
||||
default:
|
||||
// 如果管道已满,跳过
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type PullCallback func(msg PullMessage) error
|
||||
|
||||
func PullModel(model string, id string, fn PullCallback) (*Pipeline, error) {
|
||||
if client == nil {
|
||||
return nil, fmt.Errorf("client not initialized")
|
||||
}
|
||||
mp, has := taskExecutor.GetModelPipeline(model)
|
||||
if !has {
|
||||
mp = newModelPipeline(taskExecutor.ctx, 100)
|
||||
mp.pullFn = fn
|
||||
taskExecutor.SetModelPipeline(model, mp)
|
||||
}
|
||||
p, err := mp.AddPipeline(id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !has {
|
||||
var status string
|
||||
bars := make(map[string]*progress.Bar)
|
||||
fn := func(resp api.ProgressResponse) error {
|
||||
if resp.Digest != "" {
|
||||
bar, ok := bars[resp.Digest]
|
||||
if !ok {
|
||||
bar = progress.NewBar(fmt.Sprintf("pulling %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
|
||||
bars[resp.Digest] = bar
|
||||
}
|
||||
bar.Set(resp.Completed)
|
||||
|
||||
taskExecutor.msgQueue <- messageTask{
|
||||
message: PullMessage{
|
||||
Model: model,
|
||||
Digest: resp.Digest,
|
||||
Total: resp.Total,
|
||||
Completed: resp.Completed,
|
||||
Msg: bar.String(),
|
||||
Status: resp.Status,
|
||||
},
|
||||
}
|
||||
} else if status != resp.Status {
|
||||
taskExecutor.msgQueue <- messageTask{
|
||||
message: PullMessage{
|
||||
Model: model,
|
||||
Digest: resp.Digest,
|
||||
Total: resp.Total,
|
||||
Completed: resp.Completed,
|
||||
Msg: status,
|
||||
Status: resp.Status,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
go func() {
|
||||
err = client.Pull(mp.ctx, &api.PullRequest{Model: model}, fn)
|
||||
if err != nil {
|
||||
taskExecutor.msgQueue <- messageTask{
|
||||
message: PullMessage{
|
||||
Model: model,
|
||||
Status: "error",
|
||||
Digest: "",
|
||||
Total: 0,
|
||||
Completed: 0,
|
||||
Msg: err.Error(),
|
||||
},
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
}
|
||||
|
||||
return p, nil
|
||||
}
|
||||
|
||||
func StopPull(model string) {
|
||||
if client == nil {
|
||||
return
|
||||
}
|
||||
taskExecutor.CloseModelPipeline(model)
|
||||
}
|
||||
|
||||
func CancelPipeline(model string, id string) {
|
||||
taskExecutor.ClosePipeline(model, id)
|
||||
}
|
||||
|
||||
func RemoveModel(model string) error {
|
||||
if client == nil {
|
||||
return fmt.Errorf("client not initialized")
|
||||
}
|
||||
taskExecutor.CloseModelPipeline(model)
|
||||
err := client.Delete(context.Background(), &api.DeleteRequest{Model: model})
|
||||
if err != nil {
|
||||
if err.Error() == fmt.Sprintf("model '%s' not found", model) {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func ModelsInstalled() ([]Model, error) {
|
||||
if client == nil {
|
||||
return nil, fmt.Errorf("client not initialized")
|
||||
}
|
||||
result, err := client.List(context.Background())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
models := make([]Model, 0, len(result.Models))
|
||||
for _, m := range result.Models {
|
||||
models = append(models, Model{
|
||||
Name: m.Name,
|
||||
Model: m.Model,
|
||||
ModifiedAt: m.ModifiedAt,
|
||||
Size: m.Size,
|
||||
Digest: m.Digest,
|
||||
Details: ModelDetails{
|
||||
ParentModel: m.Details.ParentModel,
|
||||
Format: m.Details.Format,
|
||||
Family: m.Details.Family,
|
||||
Families: m.Details.Families,
|
||||
ParameterSize: m.Details.ParameterSize,
|
||||
QuantizationLevel: m.Details.QuantizationLevel,
|
||||
},
|
||||
})
|
||||
}
|
||||
return models, nil
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
package ai_provider_local
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"testing"
|
||||
|
||||
"github.com/gin-contrib/gzip"
|
||||
|
||||
"github.com/eolinker/eosc/log"
|
||||
|
||||
"github.com/google/uuid"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func TestPullModel(t *testing.T) {
|
||||
// 创建 Gin 引擎
|
||||
r := gin.Default()
|
||||
r.Use(gzip.Gzip(gzip.DefaultCompression))
|
||||
// 设置路由,监听 "/stream" 路径
|
||||
r.GET("/stream", streamHandler)
|
||||
r.GET("/stop", stopPull)
|
||||
r.GET("/models", models)
|
||||
|
||||
// 启动 HTTP 服务器
|
||||
r.Run(":11180")
|
||||
}
|
||||
|
||||
func streamHandler(c *gin.Context) {
|
||||
// 创建一个通道,用于监测客户端关闭连接的信号
|
||||
model := c.Query("model")
|
||||
p, err := PullModel(model, uuid.NewString(), nil)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
done := make(chan struct{})
|
||||
// 启动一个 goroutine 监听客户端关闭连接
|
||||
go func() {
|
||||
select {
|
||||
case <-c.Writer.CloseNotify():
|
||||
log.Info("client closed connection,close pipeline")
|
||||
taskExecutor.ClosePipeline(model, p.id)
|
||||
case <-done:
|
||||
}
|
||||
}()
|
||||
|
||||
c.Stream(func(w io.Writer) bool {
|
||||
select {
|
||||
case msg, ok := <-p.channel:
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
_, err := w.Write([]byte(fmt.Sprintf("%s\n", msg.Msg)))
|
||||
if err != nil {
|
||||
log.Error("write message error: %v", err)
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
})
|
||||
done <- struct{}{}
|
||||
}
|
||||
|
||||
func stopPull(c *gin.Context) {
|
||||
model := c.Query("model")
|
||||
StopPull(model)
|
||||
c.JSON(http.StatusOK, gin.H{"message": "stop pull model"})
|
||||
}
|
||||
|
||||
func models(c *gin.Context) {
|
||||
ms, err := ModelsInstalled()
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
c.JSON(http.StatusOK, gin.H{"models": ms})
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
package ai_provider_local
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/url"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
var (
|
||||
client *api.Client
|
||||
)
|
||||
|
||||
func ResetOllamaAddress(address string) error {
|
||||
u, err := url.Parse(address)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
client = api.NewClient(u, http.DefaultClient)
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,70 @@
|
||||
package ai_provider_local
|
||||
|
||||
import (
|
||||
"embed"
|
||||
"encoding/json"
|
||||
"strings"
|
||||
|
||||
"github.com/eolinker/eosc/log"
|
||||
)
|
||||
|
||||
var (
|
||||
//go:embed models.json
|
||||
modelsFs embed.FS
|
||||
modelCanInstall []ModelDetail
|
||||
modelVersion string
|
||||
modelTags = make(map[string][]ModelDetail)
|
||||
)
|
||||
|
||||
type ModelConfig struct {
|
||||
Models []ModelDetail `json:"models"`
|
||||
Version string `json:"version"`
|
||||
}
|
||||
|
||||
type ModelDetail struct {
|
||||
Id string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
Size string `json:"size"`
|
||||
Digest string `json:"digest"`
|
||||
Provider string `json:"provider"`
|
||||
IsPopular bool `json:"is_popular"`
|
||||
Latest bool `json:"latest"`
|
||||
}
|
||||
|
||||
func init() {
|
||||
data, err := modelsFs.ReadFile("models.json")
|
||||
if err != nil {
|
||||
log.Info("read models.json error: ", err)
|
||||
return
|
||||
}
|
||||
var cfg ModelConfig
|
||||
err = json.Unmarshal(data, &cfg)
|
||||
if err != nil {
|
||||
log.Info("unmarshal models.json error: ", err)
|
||||
return
|
||||
}
|
||||
modelVersion = cfg.Version
|
||||
modelCanInstall = make([]ModelDetail, 0, len(cfg.Models))
|
||||
for _, model := range cfg.Models {
|
||||
if _, ok := modelTags[model.Id]; !ok {
|
||||
modelTags[model.Id] = make([]ModelDetail, 0)
|
||||
}
|
||||
names := strings.Split(model.Id, ":")
|
||||
|
||||
modelTags[names[0]] = append(modelTags[names[0]], model)
|
||||
if !model.Latest {
|
||||
continue
|
||||
}
|
||||
modelCanInstall = append(modelCanInstall, model)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func ModelsCanInstall() ([]ModelDetail, string) {
|
||||
return modelCanInstall, modelVersion
|
||||
}
|
||||
|
||||
func ModelsCanInstallById(id string) []ModelDetail {
|
||||
return modelTags[id]
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
package ai_provider_local
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestModels(t *testing.T) {
|
||||
t.Log(ModelsCanInstall())
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,102 @@
|
||||
package ai_provider_local
|
||||
|
||||
var (
|
||||
OllamaConfig = "{\n \"mirostat\": 0,\n \"mirostat_eta\": 0.1,\n \"mirostat_tau\": 5.0,\n \"num_ctx\": 4096,\n \"repeat_last_n\":64,\n \"repeat_penalty\": 1.1,\n \"temperature\": 0.7,\n \"seed\": 42,\n \"num_predict\": 42,\n \"top_k\": 40,\n \"top_p\": 0.9,\n \"min_p\": 0.5\n}\n"
|
||||
OllamaSvg = `<?xml version="1.0" standalone="no"?>
|
||||
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 20010904//EN"
|
||||
"http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
|
||||
<svg version="1.0" xmlns="http://www.w3.org/2000/svg"
|
||||
width="4096.000000pt" height="4096.000000pt" viewBox="0 0 4096.000000 4096.000000"
|
||||
preserveAspectRatio="xMidYMid meet">
|
||||
|
||||
<g transform="translate(0.000000,4096.000000) scale(0.100000,-0.100000)"
|
||||
fill="#000000" stroke="none">
|
||||
<path d="M13179 36749 c-194 -28 -496 -139 -712 -260 -834 -469 -1516 -1636
|
||||
-1861 -3184 -82 -368 -134 -683 -181 -1100 -155 -1374 -116 -2661 120 -4025
|
||||
19 -107 36 -222 37 -254 l3 -59 -85 -70 c-507 -412 -773 -667 -1123 -1076
|
||||
-593 -691 -1055 -1493 -1378 -2390 -158 -436 -305 -1018 -369 -1458 -50 -343
|
||||
-76 -868 -67 -1348 8 -424 25 -600 93 -1004 160 -947 479 -1747 1023 -2559 67
|
||||
-101 125 -190 128 -198 3 -8 -20 -57 -52 -110 -361 -605 -691 -1540 -840
|
||||
-2379 -130 -732 -145 -929 -145 -1930 0 -984 14 -1180 131 -1840 145 -815 441
|
||||
-1680 760 -2221 72 -122 221 -353 275 -427 61 -84 66 -60 -98 -442 -319 -737
|
||||
-582 -1679 -708 -2530 -76 -520 -92 -755 -88 -1295 l3 -385 170 -128 c368
|
||||
-277 840 -604 1260 -872 192 -123 754 -464 823 -499 l33 -17 -30 53 c-143 256
|
||||
-255 663 -312 1134 -27 227 -37 770 -20 1045 55 860 262 1792 567 2546 95 235
|
||||
188 434 390 830 l194 382 0 238 c0 131 -5 258 -11 282 -15 68 -97 227 -159
|
||||
310 -32 42 -117 137 -189 211 -327 336 -485 590 -670 1079 -420 1109 -532
|
||||
2591 -300 3966 165 982 482 1778 902 2267 179 210 274 395 307 601 16 98 8
|
||||
331 -14 411 -56 201 -125 309 -385 603 -625 705 -989 1568 -1097 2598 -22 211
|
||||
-26 749 -6 960 103 1110 491 2126 1144 3000 768 1028 1809 1699 2963 1909 189
|
||||
35 330 46 581 46 248 0 407 -12 634 -46 272 -41 483 -32 634 28 153 60 312
|
||||
167 389 261 64 79 141 208 197 333 168 375 252 512 529 859 231 289 523 574
|
||||
816 795 633 477 1341 822 2052 999 403 100 458 106 1043 106 513 0 557 -3 780
|
||||
-46 928 -181 1875 -656 2640 -1325 171 -149 358 -355 570 -629 218 -280 285
|
||||
-390 419 -687 170 -378 274 -502 526 -623 235 -113 391 -125 815 -61 337 51
|
||||
628 58 915 21 968 -122 1847 -558 2609 -1295 886 -858 1491 -2062 1670 -3323
|
||||
44 -307 51 -416 51 -782 0 -268 -5 -389 -18 -515 -76 -697 -281 -1344 -601
|
||||
-1898 -170 -295 -327 -507 -576 -777 -258 -280 -368 -570 -331 -875 27 -220
|
||||
129 -428 317 -645 164 -190 270 -354 398 -615 337 -685 559 -1640 617 -2653
|
||||
15 -266 7 -870 -16 -1102 -81 -859 -254 -1586 -510 -2146 -150 -330 -269 -500
|
||||
-556 -799 -76 -80 -164 -178 -194 -219 -61 -80 -138 -232 -154 -302 -6 -24
|
||||
-11 -150 -11 -280 l0 -235 223 -445 c364 -724 509 -1097 672 -1734 121 -470
|
||||
193 -879 241 -1365 22 -222 30 -814 15 -1036 -36 -514 -138 -960 -296 -1291
|
||||
-30 -62 -50 -113 -46 -113 13 0 622 368 836 505 404 258 925 620 1280 887
|
||||
l150 113 3 355 c6 699 -34 1099 -188 1872 -154 772 -365 1445 -654 2091 -35
|
||||
78 -72 168 -82 200 -16 52 -17 61 -4 82 8 12 66 98 128 191 471 700 784 1566
|
||||
971 2689 95 572 105 768 98 1845 -6 789 -9 860 -52 1171 -103 737 -234 1310
|
||||
-420 1834 -125 354 -338 813 -549 1186 l-20 37 146 218 c319 479 532 894 714
|
||||
1396 185 509 320 1129 376 1728 16 179 16 1003 0 1250 -43 628 -153 1174 -366
|
||||
1815 -387 1165 -1000 2160 -1862 3020 -201 200 -331 317 -585 523 -103 83
|
||||
-189 158 -192 167 -9 21 5 122 52 389 197 1106 247 2336 145 3551 -163 1958
|
||||
-769 3565 -1637 4343 -144 130 -205 177 -344 269 -393 260 -825 367 -1298 323
|
||||
-1300 -123 -2362 -1541 -2860 -3819 -98 -448 -201 -1110 -224 -1434 -5 -71
|
||||
-14 -134 -20 -140 -14 -14 -51 4 -269 130 -1321 760 -2750 1079 -4137 922
|
||||
-939 -106 -1883 -422 -2736 -917 -197 -114 -258 -145 -274 -138 -15 5 -19 29
|
||||
-35 236 -23 291 -128 947 -215 1341 -211 957 -522 1775 -912 2399 -176 283
|
||||
-323 469 -543 691 -228 230 -396 360 -642 497 -114 63 -325 151 -422 176 -194
|
||||
51 -651 81 -827 55z m377 -1961 c67 -44 217 -198 286 -294 211 -295 383 -645
|
||||
554 -1123 189 -529 357 -1317 424 -1988 39 -394 59 -905 60 -1498 l0 -290
|
||||
-170 -251 -169 -251 -383 -6 c-600 -9 -872 -46 -1466 -197 -130 -33 -246 -60
|
||||
-258 -60 -33 0 -41 21 -63 176 -72 505 -105 1010 -105 1634 0 690 42 1222 139
|
||||
1793 185 1089 584 2047 976 2341 75 56 106 59 175 14z m14019 -13 c244 -174
|
||||
522 -666 720 -1275 338 -1039 471 -2402 369 -3780 -26 -359 -83 -831 -105
|
||||
-871 -5 -11 -19 -19 -31 -19 -12 0 -136 29 -277 64 -596 150 -851 184 -1454
|
||||
193 l-378 6 -170 251 -170 251 6 535 c9 882 40 1296 141 1890 154 902 418
|
||||
1698 748 2251 180 302 409 549 509 549 19 0 52 -16 92 -45z"/>
|
||||
<path d="M20150 22433 c-389 -29 -834 -89 -1069 -144 -413 -97 -967 -296
|
||||
-1322 -474 -1280 -645 -2169 -1692 -2469 -2910 -74 -300 -75 -307 -75 -820 0
|
||||
-487 2 -515 51 -735 99 -440 295 -889 550 -1256 341 -492 803 -911 1349 -1225
|
||||
621 -356 1384 -585 2131 -641 238 -18 2130 -18 2368 0 1169 87 2281 576 3042
|
||||
1336 374 374 627 760 829 1261 61 152 91 248 139 441 69 281 71 309 71 819 0
|
||||
513 -1 520 -75 822 -182 739 -580 1413 -1176 1993 -406 395 -826 686 -1368
|
||||
950 -661 322 -1372 512 -2091 560 -317 21 -762 32 -885 23z m836 -1333 c270
|
||||
-25 670 -109 961 -201 379 -121 793 -317 1099 -522 344 -229 767 -645 979
|
||||
-962 192 -287 321 -603 394 -970 66 -325 57 -531 -35 -900 -90 -355 -301 -716
|
||||
-603 -1028 -256 -264 -501 -434 -906 -627 -447 -213 -794 -305 -1335 -352
|
||||
-208 -18 -1977 -18 -2180 0 -543 49 -1161 251 -1615 527 -255 155 -563 423
|
||||
-743 647 -257 319 -394 631 -478 1088 -24 132 -26 161 -21 315 7 250 61 509
|
||||
163 782 130 352 389 722 755 1078 297 289 572 480 974 674 558 270 1171 433
|
||||
1740 461 192 10 700 4 851 -10z"/>
|
||||
<path d="M19620 19334 c-30 -8 -73 -26 -95 -39 -62 -36 -166 -152 -202 -225
|
||||
-89 -178 -81 -322 28 -508 56 -93 217 -247 354 -336 66 -43 137 -90 158 -104
|
||||
59 -39 72 -72 70 -177 -1 -49 -11 -142 -22 -205 -75 -411 -74 -408 -56 -476
|
||||
35 -131 161 -296 262 -345 41 -20 72 -24 252 -33 254 -12 330 -3 432 50 89 47
|
||||
169 132 199 210 46 123 43 299 -10 561 -33 165 -39 284 -16 336 18 42 66 86
|
||||
151 137 107 65 204 141 294 231 141 142 198 248 208 394 7 99 -6 164 -54 262
|
||||
-59 122 -193 236 -315 269 -73 19 -286 19 -363 -1 -80 -21 -201 -79 -328 -159
|
||||
-58 -36 -110 -66 -117 -66 -6 0 -60 29 -120 65 -151 89 -260 143 -327 160 -76
|
||||
20 -308 19 -383 -1z"/>
|
||||
<path d="M13766 22188 c-238 -24 -521 -166 -723 -361 -254 -246 -405 -551
|
||||
-458 -927 -19 -140 -19 -328 0 -418 67 -322 293 -638 578 -812 150 -91 358
|
||||
-151 582 -170 381 -31 677 88 965 386 156 162 237 282 330 491 116 261 130
|
||||
333 130 674 l0 227 -69 144 c-39 79 -93 178 -121 221 -66 99 -201 241 -303
|
||||
318 -99 75 -296 172 -402 198 -149 38 -327 48 -509 29z"/>
|
||||
<path d="M26855 22189 c-136 -14 -237 -44 -371 -109 -146 -71 -252 -149 -366
|
||||
-269 -109 -114 -173 -210 -259 -388 l-69 -142 0 -228 c0 -343 14 -414 130
|
||||
-676 96 -216 211 -380 382 -542 271 -259 556 -364 913 -335 140 12 322 51 423
|
||||
91 264 105 523 353 643 615 90 198 114 319 106 531 -12 317 -114 629 -291 888
|
||||
-68 100 -209 246 -304 315 -150 109 -351 203 -502 234 -96 20 -318 27 -435 15z"/>
|
||||
</g>
|
||||
</svg>
|
||||
`
|
||||
)
|
||||
+38
@@ -0,0 +1,38 @@
|
||||
model: claude-3-5-haiku-20241022
|
||||
label:
|
||||
en_US: claude-3-5-haiku-20241022
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '1.00'
|
||||
output: '5.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
+40
@@ -0,0 +1,40 @@
|
||||
model: claude-3-5-sonnet-20241022
|
||||
label:
|
||||
en_US: claude-3-5-sonnet-20241022
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '3.00'
|
||||
output: '15.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,26 @@
|
||||
model: ai21.jamba-1-5-large-v1:0
|
||||
label:
|
||||
en_US: Jamba 1.5 Large
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 256000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 2.0
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_gen_len
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
pricing:
|
||||
input: '0.002'
|
||||
output: '0.008'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,26 @@
|
||||
model: ai21.jamba-1-5-mini-v1:0
|
||||
label:
|
||||
en_US: Jamba 1.5 Mini
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 256000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 2.0
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_gen_len
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
pricing:
|
||||
input: '0.0002'
|
||||
output: '0.0004'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,53 @@
|
||||
model: amazon.nova-lite-v1:0
|
||||
label:
|
||||
en_US: Nova Lite V1
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 300000
|
||||
parameter_rules:
|
||||
- name: max_new_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 5000
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.00006'
|
||||
output: '0.00024'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,52 @@
|
||||
model: amazon.nova-micro-v1:0
|
||||
label:
|
||||
en_US: Nova Micro V1
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: max_new_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 5000
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.000035'
|
||||
output: '0.00014'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,53 @@
|
||||
model: amazon.nova-pro-v1:0
|
||||
label:
|
||||
en_US: Nova Pro V1
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 300000
|
||||
parameter_rules:
|
||||
- name: max_new_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 5000
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.0008'
|
||||
output: '0.0032'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
+60
@@ -0,0 +1,60 @@
|
||||
model: anthropic.claude-3-5-haiku-20241022-v1:0
|
||||
label:
|
||||
en_US: Claude 3.5 Haiku
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
# docs: https://docs.anthropic.com/claude/docs/system-prompts
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.001'
|
||||
output: '0.005'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
+60
@@ -0,0 +1,60 @@
|
||||
model: anthropic.claude-3-5-sonnet-20241022-v2:0
|
||||
label:
|
||||
en_US: Claude 3.5 Sonnet V2
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.003'
|
||||
output: '0.015'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
+60
@@ -0,0 +1,60 @@
|
||||
model: eu.anthropic.claude-3-5-sonnet-20241022-v2:0
|
||||
label:
|
||||
en_US: Claude 3.5 Sonnet V2(EU.Cross Region Inference)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.003'
|
||||
output: '0.015'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,53 @@
|
||||
model: us.amazon.nova-lite-v1:0
|
||||
label:
|
||||
en_US: Nova Lite V1 (US.Cross Region Inference)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 300000
|
||||
parameter_rules:
|
||||
- name: max_new_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 5000
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.00006'
|
||||
output: '0.00024'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,52 @@
|
||||
model: us.amazon.nova-micro-v1:0
|
||||
label:
|
||||
en_US: Nova Micro V1 (US.Cross Region Inference)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: max_new_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 5000
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.000035'
|
||||
output: '0.00014'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,53 @@
|
||||
model: us.amazon.nova-pro-v1:0
|
||||
label:
|
||||
en_US: Nova Pro V1 (US.Cross Region Inference)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 300000
|
||||
parameter_rules:
|
||||
- name: max_new_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 5000
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.0008'
|
||||
output: '0.0032'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
+60
@@ -0,0 +1,60 @@
|
||||
model: us.anthropic.claude-3-5-haiku-20241022-v1:0
|
||||
label:
|
||||
en_US: Claude 3.5 Haiku(US.Cross Region Inference)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
# docs: https://docs.anthropic.com/claude/docs/system-prompts
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.001'
|
||||
output: '0.005'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
+60
@@ -0,0 +1,60 @@
|
||||
model: us.anthropic.claude-3-5-sonnet-20241022-v2:0
|
||||
label:
|
||||
en_US: Claude 3.5 Sonnet V2(US.Cross Region Inference)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.003'
|
||||
output: '0.015'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
+29
@@ -0,0 +1,29 @@
|
||||
model: us.meta.llama3-2-11b-instruct-v1:0
|
||||
label:
|
||||
en_US: US Meta Llama 3.2 11B Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- vision
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.5
|
||||
min: 0.0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_gen_len
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 512
|
||||
min: 1
|
||||
max: 2048
|
||||
pricing:
|
||||
input: '0.00035'
|
||||
output: '0.00035'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
+26
@@ -0,0 +1,26 @@
|
||||
model: us.meta.llama3-2-1b-instruct-v1:0
|
||||
label:
|
||||
en_US: US Meta Llama 3.2 1B Instruct
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.5
|
||||
min: 0.0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_gen_len
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 512
|
||||
min: 1
|
||||
max: 2048
|
||||
pricing:
|
||||
input: '0.0001'
|
||||
output: '0.0001'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
+26
@@ -0,0 +1,26 @@
|
||||
model: us.meta.llama3-2-3b-instruct-v1:0
|
||||
label:
|
||||
en_US: US Meta Llama 3.2 3B Instruct
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.5
|
||||
min: 0.0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_gen_len
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 512
|
||||
min: 1
|
||||
max: 2048
|
||||
pricing:
|
||||
input: '0.00015'
|
||||
output: '0.00015'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
+31
@@ -0,0 +1,31 @@
|
||||
model: us.meta.llama3-2-90b-instruct-v1:0
|
||||
label:
|
||||
en_US: US Meta Llama 3.2 90B Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.5
|
||||
min: 0.0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
default: 0.9
|
||||
min: 0
|
||||
max: 1
|
||||
- name: max_gen_len
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 512
|
||||
min: 1
|
||||
max: 2048
|
||||
pricing:
|
||||
input: '0.002'
|
||||
output: '0.002'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,21 @@
|
||||
model: deepseek-reasoner
|
||||
label:
|
||||
zh_Hans: deepseek-reasoner
|
||||
en_US: deepseek-reasoner
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 64000
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 8192
|
||||
default: 4096
|
||||
pricing:
|
||||
input: "4"
|
||||
output: "16"
|
||||
unit: "0.000001"
|
||||
currency: RMB
|
||||
+46
@@ -0,0 +1,46 @@
|
||||
model: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
||||
label:
|
||||
zh_Hans: Llama 3.2 11B Vision Instruct
|
||||
en_US: Llama 3.2 11B Vision Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
- name: context_length_exceeded_behavior
|
||||
default: None
|
||||
label:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
help:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
type: string
|
||||
options:
|
||||
- None
|
||||
- truncate
|
||||
- error
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.2'
|
||||
output: '0.2'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,46 @@
|
||||
model: accounts/fireworks/models/llama-v3p2-1b-instruct
|
||||
label:
|
||||
zh_Hans: Llama 3.2 1B Instruct
|
||||
en_US: Llama 3.2 1B Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
- name: context_length_exceeded_behavior
|
||||
default: None
|
||||
label:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
help:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
type: string
|
||||
options:
|
||||
- None
|
||||
- truncate
|
||||
- error
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.1'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,46 @@
|
||||
model: accounts/fireworks/models/llama-v3p2-3b-instruct
|
||||
label:
|
||||
zh_Hans: Llama 3.2 3B Instruct
|
||||
en_US: Llama 3.2 3B Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
- name: context_length_exceeded_behavior
|
||||
default: None
|
||||
label:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
help:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
type: string
|
||||
options:
|
||||
- None
|
||||
- truncate
|
||||
- error
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.1'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
+46
@@ -0,0 +1,46 @@
|
||||
model: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
||||
label:
|
||||
zh_Hans: Llama 3.2 90B Vision Instruct
|
||||
en_US: Llama 3.2 90B Vision Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
- name: context_length_exceeded_behavior
|
||||
default: None
|
||||
label:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
help:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
type: string
|
||||
options:
|
||||
- None
|
||||
- truncate
|
||||
- error
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.9'
|
||||
output: '0.9'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,46 @@
|
||||
model: accounts/fireworks/models/qwen2p5-72b-instruct
|
||||
label:
|
||||
zh_Hans: Qwen2.5 72B Instruct
|
||||
en_US: Qwen2.5 72B Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
- name: context_length_exceeded_behavior
|
||||
default: None
|
||||
label:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
help:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
type: string
|
||||
options:
|
||||
- None
|
||||
- truncate
|
||||
- error
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.9'
|
||||
output: '0.9'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: gemini-1.5-flash-001
|
||||
label:
|
||||
en_US: Gemini 1.5 Flash 001
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: gemini-1.5-flash-002
|
||||
label:
|
||||
en_US: Gemini 1.5 Flash 002
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
+41
@@ -0,0 +1,41 @@
|
||||
model: gemini-1.5-flash-8b-exp-0924
|
||||
label:
|
||||
en_US: Gemini 1.5 Flash 8B 0924
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: gemini-1.5-flash
|
||||
label:
|
||||
en_US: Gemini 1.5 Flash
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: gemini-1.5-pro-001
|
||||
label:
|
||||
en_US: Gemini 1.5 Pro 001
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 2097152
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: gemini-1.5-pro-002
|
||||
label:
|
||||
en_US: Gemini 1.5 Pro 002
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 2097152
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: gemini-1.5-pro
|
||||
label:
|
||||
en_US: Gemini 1.5 Pro
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 2097152
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: gemini-2.0-flash-001
|
||||
label:
|
||||
en_US: Gemini 2.0 Flash 001
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: gemini-2.0-flash-exp
|
||||
label:
|
||||
en_US: Gemini 2.0 Flash Exp
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
+41
@@ -0,0 +1,41 @@
|
||||
model: gemini-2.0-flash-lite-preview-02-05
|
||||
label:
|
||||
en_US: Gemini 2.0 Flash Lite Preview 0205
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
+39
@@ -0,0 +1,39 @@
|
||||
model: gemini-2.0-flash-thinking-exp-01-21
|
||||
label:
|
||||
en_US: Gemini 2.0 Flash Thinking Exp 01-21
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32767
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
+39
@@ -0,0 +1,39 @@
|
||||
model: gemini-2.0-flash-thinking-exp-1219
|
||||
label:
|
||||
en_US: Gemini 2.0 Flash Thinking Exp 1219
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32767
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: gemini-2.0-pro-exp-02-05
|
||||
label:
|
||||
en_US: Gemini 2.0 pro exp 02-05
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: gemini-exp-1114
|
||||
label:
|
||||
en_US: Gemini exp 1114
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32767
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: gemini-exp-1121
|
||||
label:
|
||||
en_US: Gemini exp 1121
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32767
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: gemini-exp-1206
|
||||
label:
|
||||
en_US: Gemini exp 1206
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 2097152
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
+41
@@ -0,0 +1,41 @@
|
||||
model: learnlm-1.5-pro-experimental
|
||||
label:
|
||||
en_US: LearnLM 1.5 Pro Experimental
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32767
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,36 @@
|
||||
model: deepseek-r1-distill-llama-70b
|
||||
label:
|
||||
en_US: DeepSeek R1 Distill Llama 70b
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '3.00'
|
||||
output: '3.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,37 @@
|
||||
model: gemma-7b-it
|
||||
label:
|
||||
zh_Hans: Gemma 7B Instruction Tuned
|
||||
en_US: Gemma 7B Instruction Tuned
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,37 @@
|
||||
model: gemma2-9b-it
|
||||
label:
|
||||
zh_Hans: Gemma 2 9B Instruction Tuned
|
||||
en_US: Gemma 2 9B Instruction Tuned
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,38 @@
|
||||
model: llama-3.2-11b-text-preview
|
||||
deprecated: true
|
||||
label:
|
||||
zh_Hans: Llama 3.2 11B Text (Preview)
|
||||
en_US: Llama 3.2 11B Text (Preview)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,38 @@
|
||||
model: llama-3.2-11b-vision-preview
|
||||
label:
|
||||
zh_Hans: Llama 3.2 11B Vision (Preview)
|
||||
en_US: Llama 3.2 11B Vision (Preview)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,37 @@
|
||||
model: llama-3.2-1b-preview
|
||||
label:
|
||||
zh_Hans: Llama 3.2 1B Text (Preview)
|
||||
en_US: Llama 3.2 1B Text (Preview)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,37 @@
|
||||
model: llama-3.2-3b-preview
|
||||
label:
|
||||
zh_Hans: Llama 3.2 3B Text (Preview)
|
||||
en_US: Llama 3.2 3B Text (Preview)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,38 @@
|
||||
model: llama-3.2-90b-text-preview
|
||||
depraceted: true
|
||||
label:
|
||||
zh_Hans: Llama 3.2 90B Text (Preview)
|
||||
en_US: Llama 3.2 90B Text (Preview)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,38 @@
|
||||
model: llama-3.2-90b-vision-preview
|
||||
label:
|
||||
zh_Hans: Llama 3.2 90B Vision (Preview)
|
||||
en_US: Llama 3.2 90B Vision (Preview)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,38 @@
|
||||
model: llama-3.3-70b-specdec
|
||||
label:
|
||||
zh_Hans: Llama 3.3 70B Specdec
|
||||
en_US: Llama 3.3 70B Specdec
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- multi-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 32768
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: "0.05"
|
||||
output: "0.1"
|
||||
unit: "0.000001"
|
||||
currency: USD
|
||||
@@ -0,0 +1,38 @@
|
||||
model: llama-3.3-70b-versatile
|
||||
label:
|
||||
zh_Hans: Llama 3.3 70B Versatile
|
||||
en_US: Llama 3.3 70B Versatile
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- multi-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 32768
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: "0.05"
|
||||
output: "0.1"
|
||||
unit: "0.000001"
|
||||
currency: USD
|
||||
@@ -0,0 +1,37 @@
|
||||
model: llama-guard-3-8b
|
||||
label:
|
||||
zh_Hans: Llama-Guard-3-8B
|
||||
en_US: Llama-Guard-3-8B
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '0.20'
|
||||
output: '0.20'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
+38
@@ -0,0 +1,38 @@
|
||||
model: llama3-groq-70b-8192-tool-use-preview
|
||||
label:
|
||||
zh_Hans: Llama3-groq-70b-8192-tool-use (PREVIEW)
|
||||
en_US: Llama3-groq-70b-8192-tool-use (PREVIEW)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- multi-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.08'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,38 @@
|
||||
model: hunyuan-functioncall
|
||||
label:
|
||||
zh_Hans: hunyuan-functioncall
|
||||
en_US: hunyuan-functioncall
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- multi-tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 32000
|
||||
- name: enable_enhance
|
||||
label:
|
||||
zh_Hans: 功能增强
|
||||
en_US: Enable Enhancement
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
|
||||
en_US: Allow the model to perform external search to enhance the generation results.
|
||||
required: false
|
||||
default: true
|
||||
pricing:
|
||||
input: '0.004'
|
||||
output: '0.008'
|
||||
unit: '0.001'
|
||||
currency: RMB
|
||||
@@ -0,0 +1,38 @@
|
||||
model: hunyuan-large-longcontext
|
||||
label:
|
||||
zh_Hans: hunyuan-large-longcontext
|
||||
en_US: hunyuan-large-longcontext
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- multi-tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 134000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 134000
|
||||
- name: enable_enhance
|
||||
label:
|
||||
zh_Hans: 功能增强
|
||||
en_US: Enable Enhancement
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
|
||||
en_US: Allow the model to perform external search to enhance the generation results.
|
||||
required: false
|
||||
default: true
|
||||
pricing:
|
||||
input: '0.006'
|
||||
output: '0.018'
|
||||
unit: '0.001'
|
||||
currency: RMB
|
||||
@@ -0,0 +1,38 @@
|
||||
model: hunyuan-large-role
|
||||
label:
|
||||
zh_Hans: hunyuan-large-role
|
||||
en_US: hunyuan-large-role
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- multi-tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 32000
|
||||
- name: enable_enhance
|
||||
label:
|
||||
zh_Hans: 功能增强
|
||||
en_US: Enable Enhancement
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
|
||||
en_US: Allow the model to perform external search to enhance the generation results.
|
||||
required: false
|
||||
default: true
|
||||
pricing:
|
||||
input: '0.004'
|
||||
output: '0.008'
|
||||
unit: '0.001'
|
||||
currency: RMB
|
||||
@@ -0,0 +1,38 @@
|
||||
model: hunyuan-large
|
||||
label:
|
||||
zh_Hans: hunyuan-large
|
||||
en_US: hunyuan-large
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- multi-tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 32000
|
||||
- name: enable_enhance
|
||||
label:
|
||||
zh_Hans: 功能增强
|
||||
en_US: Enable Enhancement
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
|
||||
en_US: Allow the model to perform external search to enhance the generation results.
|
||||
required: false
|
||||
default: true
|
||||
pricing:
|
||||
input: '0.004'
|
||||
output: '0.012'
|
||||
unit: '0.001'
|
||||
currency: RMB
|
||||
@@ -0,0 +1,38 @@
|
||||
model: hunyuan-role
|
||||
label:
|
||||
zh_Hans: hunyuan-role
|
||||
en_US: hunyuan-role
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- multi-tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 32000
|
||||
- name: enable_enhance
|
||||
label:
|
||||
zh_Hans: 功能增强
|
||||
en_US: Enable Enhancement
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
|
||||
en_US: Allow the model to perform external search to enhance the generation results.
|
||||
required: false
|
||||
default: true
|
||||
pricing:
|
||||
input: '0.004'
|
||||
output: '0.008'
|
||||
unit: '0.001'
|
||||
currency: RMB
|
||||
@@ -0,0 +1,38 @@
|
||||
model: hunyuan-turbo-latest
|
||||
label:
|
||||
zh_Hans: hunyuan-turbo-latest
|
||||
en_US: hunyuan-turbo-latest
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- multi-tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 32000
|
||||
- name: enable_enhance
|
||||
label:
|
||||
zh_Hans: 功能增强
|
||||
en_US: Enable Enhancement
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
|
||||
en_US: Allow the model to perform external search to enhance the generation results.
|
||||
required: false
|
||||
default: true
|
||||
pricing:
|
||||
input: '0.015'
|
||||
output: '0.05'
|
||||
unit: '0.001'
|
||||
currency: RMB
|
||||
@@ -0,0 +1,39 @@
|
||||
model: hunyuan-turbo-vision
|
||||
label:
|
||||
zh_Hans: hunyuan-turbo-vision
|
||||
en_US: hunyuan-turbo-vision
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- multi-tool-call
|
||||
- stream-tool-call
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 8000
|
||||
- name: enable_enhance
|
||||
label:
|
||||
zh_Hans: 功能增强
|
||||
en_US: Enable Enhancement
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
|
||||
en_US: Allow the model to perform external search to enhance the generation results.
|
||||
required: false
|
||||
default: true
|
||||
pricing:
|
||||
input: '0.08'
|
||||
output: '0.08'
|
||||
unit: '0.001'
|
||||
currency: RMB
|
||||
@@ -0,0 +1,44 @@
|
||||
model: abab6.5t-chat
|
||||
label:
|
||||
en_US: Abab6.5t-Chat
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0.01
|
||||
max: 1
|
||||
default: 0.9
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0.01
|
||||
max: 1
|
||||
default: 0.95
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 3072
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: mask_sensitive_info
|
||||
type: boolean
|
||||
default: true
|
||||
label:
|
||||
zh_Hans: 隐私保护
|
||||
en_US: Moderate
|
||||
help:
|
||||
zh_Hans: 对输出中易涉及隐私问题的文本信息进行打码,目前包括但不限于邮箱、域名、链接、证件号、家庭住址等,默认true,即开启打码
|
||||
en_US: Mask the sensitive info of the generated content, such as email/domain/link/address/phone/id..
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '0.005'
|
||||
output: '0.005'
|
||||
unit: '0.001'
|
||||
currency: RMB
|
||||
@@ -0,0 +1,46 @@
|
||||
model: abab7-chat-preview
|
||||
label:
|
||||
en_US: Abab7-chat-preview
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 245760
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0.01
|
||||
max: 1
|
||||
default: 0.1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0.01
|
||||
max: 1
|
||||
default: 0.95
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 245760
|
||||
- name: mask_sensitive_info
|
||||
type: boolean
|
||||
default: true
|
||||
label:
|
||||
zh_Hans: 隐私保护
|
||||
en_US: Moderate
|
||||
help:
|
||||
zh_Hans: 对输出中易涉及隐私问题的文本信息进行打码,目前包括但不限于邮箱、域名、链接、证件号、家庭住址等,默认true,即开启打码
|
||||
en_US: Mask the sensitive info of the generated content, such as email/domain/link/address/phone/id..
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '0.1'
|
||||
output: '0.1'
|
||||
unit: '0.001'
|
||||
currency: RMB
|
||||
@@ -0,0 +1,46 @@
|
||||
model: minimax-text-01
|
||||
label:
|
||||
en_US: Minimax-Text-01
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1000192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0.01
|
||||
max: 1
|
||||
default: 0.1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0.01
|
||||
max: 1
|
||||
default: 0.95
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 1000192
|
||||
- name: mask_sensitive_info
|
||||
type: boolean
|
||||
default: true
|
||||
label:
|
||||
zh_Hans: 隐私保护
|
||||
en_US: Moderate
|
||||
help:
|
||||
zh_Hans: 对输出中易涉及隐私问题的文本信息进行打码,目前包括但不限于邮箱、域名、链接、证件号、家庭住址等,默认true,即开启打码
|
||||
en_US: Mask the sensitive info of the generated content, such as email/domain/link/address/phone/id..
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '0.001'
|
||||
output: '0.008'
|
||||
unit: '0.001'
|
||||
currency: RMB
|
||||
@@ -0,0 +1,52 @@
|
||||
model: pixtral-large-2411
|
||||
label:
|
||||
zh_Hans: pixtral-large-2411
|
||||
en_US: pixtral-large-2411
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.7
|
||||
min: 0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
default: 1
|
||||
min: 0
|
||||
max: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: safe_prompt
|
||||
default: false
|
||||
type: boolean
|
||||
help:
|
||||
en_US: Whether to inject a safety prompt before all conversations.
|
||||
zh_Hans: 是否开启提示词审查
|
||||
label:
|
||||
en_US: SafePrompt
|
||||
zh_Hans: 提示词审查
|
||||
- name: random_seed
|
||||
type: int
|
||||
help:
|
||||
en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
||||
zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
|
||||
label:
|
||||
en_US: RandomSeed
|
||||
zh_Hans: 随机数种子
|
||||
default: 0
|
||||
min: 0
|
||||
max: 2147483647
|
||||
pricing:
|
||||
input: '0.008'
|
||||
output: '0.024'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,52 @@
|
||||
model: pixtral-large-latest
|
||||
label:
|
||||
zh_Hans: pixtral-large-latest
|
||||
en_US: pixtral-large-latest
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.7
|
||||
min: 0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
default: 1
|
||||
min: 0
|
||||
max: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: safe_prompt
|
||||
default: false
|
||||
type: boolean
|
||||
help:
|
||||
en_US: Whether to inject a safety prompt before all conversations.
|
||||
zh_Hans: 是否开启提示词审查
|
||||
label:
|
||||
en_US: SafePrompt
|
||||
zh_Hans: 提示词审查
|
||||
- name: random_seed
|
||||
type: int
|
||||
help:
|
||||
en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
||||
zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
|
||||
label:
|
||||
en_US: RandomSeed
|
||||
zh_Hans: 随机数种子
|
||||
default: 0
|
||||
min: 0
|
||||
max: 2147483647
|
||||
pricing:
|
||||
input: '0.008'
|
||||
output: '0.024'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: Sao10K/L3-8B-Stheno-v3.2
|
||||
label:
|
||||
zh_Hans: L3 8B Stheno V3.2
|
||||
en_US: L3 8B Stheno V3.2
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0005'
|
||||
output: '0.0005'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: deepseek/deepseek-r1
|
||||
label:
|
||||
zh_Hans: DeepSeek R1
|
||||
en_US: DeepSeek R1
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 64000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.04'
|
||||
output: '0.04'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: deepseek/deepseek_v3
|
||||
label:
|
||||
zh_Hans: DeepSeek V3
|
||||
en_US: DeepSeek V3
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 64000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0089'
|
||||
output: '0.0089'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: sao10k/l3-8b-lunaris
|
||||
label:
|
||||
zh_Hans: "Sao10k L3 8B Lunaris"
|
||||
en_US: "Sao10k L3 8B Lunaris"
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0005'
|
||||
output: '0.0005'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: sao10k/l31-70b-euryale-v2.2
|
||||
label:
|
||||
zh_Hans: L31 70B Euryale V2.2
|
||||
en_US: L31 70B Euryale V2.2
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 16000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0148'
|
||||
output: '0.0148'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: meta-llama/llama-3.1-8b-instruct-bf16
|
||||
label:
|
||||
zh_Hans: Llama 3.1 8B Instruct BF16
|
||||
en_US: Llama 3.1 8B Instruct BF16
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0006'
|
||||
output: '0.0006'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: meta-llama/llama-3.1-8b-instruct-max
|
||||
label:
|
||||
zh_Hans: "Llama3.1 8B Instruct Max\t"
|
||||
en_US: "Llama3.1 8B Instruct Max\t"
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 16384
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0005'
|
||||
output: '0.0005'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
||||
+41
@@ -0,0 +1,41 @@
|
||||
model: meta-llama/llama-3.2-11b-vision-instruct
|
||||
label:
|
||||
zh_Hans: "Llama 3.2 11B Vision Instruct\t"
|
||||
en_US: "Llama 3.2 11B Vision Instruct\t"
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0006'
|
||||
output: '0.0006'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: meta-llama/llama-3.2-1b-instruct
|
||||
label:
|
||||
zh_Hans: "Llama 3.2 1B Instruct\t"
|
||||
en_US: "Llama 3.2 1B Instruct\t"
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0002'
|
||||
output: '0.0002'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: meta-llama/llama-3.2-3b-instruct
|
||||
label:
|
||||
zh_Hans: Llama 3.2 3B Instruct
|
||||
en_US: Llama 3.2 3B Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0003'
|
||||
output: '0.0005'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: meta-llama/llama-3.3-70b-instruct
|
||||
label:
|
||||
zh_Hans: Llama 3.3 70B Instruct
|
||||
en_US: Llama 3.3 70B Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0039'
|
||||
output: '0.0039'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
||||
@@ -1,69 +0,0 @@
|
||||
from collections.abc import Generator
|
||||
from typing import Optional, Union
|
||||
|
||||
from core.model_runtime.entities.llm_entities import LLMResult
|
||||
from core.model_runtime.entities.message_entities import PromptMessage, PromptMessageTool
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity
|
||||
from core.model_runtime.model_providers.openai_api_compatible.llm.llm import OAIAPICompatLargeLanguageModel
|
||||
|
||||
|
||||
class NovitaLargeLanguageModel(OAIAPICompatLargeLanguageModel):
|
||||
def _update_endpoint_url(self, credentials: dict):
|
||||
credentials["endpoint_url"] = "https://api.novita.ai/v3/openai"
|
||||
credentials["extra_headers"] = {"X-Novita-Source": "dify.ai"}
|
||||
return credentials
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
prompt_messages: list[PromptMessage],
|
||||
model_parameters: dict,
|
||||
tools: Optional[list[PromptMessageTool]] = None,
|
||||
stop: Optional[list[str]] = None,
|
||||
stream: bool = True,
|
||||
user: Optional[str] = None,
|
||||
) -> Union[LLMResult, Generator]:
|
||||
cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
|
||||
return super()._invoke(model, cred_with_endpoint, prompt_messages, model_parameters, tools, stop, stream, user)
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
|
||||
self._add_custom_parameters(credentials, model)
|
||||
return super().validate_credentials(model, cred_with_endpoint)
|
||||
|
||||
@classmethod
|
||||
def _add_custom_parameters(cls, credentials: dict, model: str) -> None:
|
||||
credentials["mode"] = "chat"
|
||||
|
||||
def _generate(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
prompt_messages: list[PromptMessage],
|
||||
model_parameters: dict,
|
||||
tools: Optional[list[PromptMessageTool]] = None,
|
||||
stop: Optional[list[str]] = None,
|
||||
stream: bool = True,
|
||||
user: Optional[str] = None,
|
||||
) -> Union[LLMResult, Generator]:
|
||||
cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
|
||||
return super()._generate(
|
||||
model, cred_with_endpoint, prompt_messages, model_parameters, tools, stop, stream, user
|
||||
)
|
||||
|
||||
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
|
||||
cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
|
||||
|
||||
return super().get_customizable_model_schema(model, cred_with_endpoint)
|
||||
|
||||
def get_num_tokens(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
prompt_messages: list[PromptMessage],
|
||||
tools: Optional[list[PromptMessageTool]] = None,
|
||||
) -> int:
|
||||
cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
|
||||
|
||||
return super().get_num_tokens(model, cred_with_endpoint, prompt_messages, tools)
|
||||
@@ -0,0 +1,41 @@
|
||||
model: mistralai/mistral-nemo
|
||||
label:
|
||||
zh_Hans: Mistral Nemo
|
||||
en_US: Mistral Nemo
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0017'
|
||||
output: '0.0017'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: openchat/openchat-7b
|
||||
label:
|
||||
zh_Hans: OpenChat 7B
|
||||
en_US: OpenChat 7B
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 4096
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0006'
|
||||
output: '0.0006'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: qwen/qwen-2-72b-instruct
|
||||
label:
|
||||
zh_Hans: Qwen2 72B Instruct
|
||||
en_US: Qwen2 72B Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0034'
|
||||
output: '0.0039'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: qwen/qwen-2-7b-instruct
|
||||
label:
|
||||
zh_Hans: Qwen 2 7B Instruct
|
||||
en_US: Qwen 2 7B Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.00054'
|
||||
output: '0.00054'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: qwen/qwen-2-vl-72b-instruct
|
||||
label:
|
||||
zh_Hans: Qwen 2 VL 72B Instruct
|
||||
en_US: Qwen 2 VL 72B Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0045'
|
||||
output: '0.0045'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,41 @@
|
||||
model: qwen/qwen-2.5-72b-instruct
|
||||
label:
|
||||
zh_Hans: Qwen 2.5 72B Instruct
|
||||
en_US: Qwen 2.5 72B Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0038'
|
||||
output: '0.004'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,35 @@
|
||||
model: deepseek-ai/deepseek-r1
|
||||
label:
|
||||
en_US: deepseek-ai/deepseek-r1
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 1
|
||||
default: 0.5
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 1024
|
||||
default: 1024
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
@@ -0,0 +1,47 @@
|
||||
model: gpt-4o-2024-11-20
|
||||
label:
|
||||
zh_Hans: gpt-4o-2024-11-20
|
||||
en_US: gpt-4o-2024-11-20
|
||||
model_type: llm
|
||||
features:
|
||||
- multi-tool-call
|
||||
- agent-thought
|
||||
- stream-tool-call
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 16384
|
||||
min: 1
|
||||
max: 16384
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
- json_schema
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '2.50'
|
||||
output: '10.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,44 @@
|
||||
model: gpt-4o-audio-preview
|
||||
label:
|
||||
zh_Hans: gpt-4o-audio-preview
|
||||
en_US: gpt-4o-audio-preview
|
||||
model_type: llm
|
||||
features:
|
||||
- multi-tool-call
|
||||
- agent-thought
|
||||
- stream-tool-call
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 16384
|
||||
min: 1
|
||||
max: 16384
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '5.00'
|
||||
output: '15.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,48 @@
|
||||
model: o1-2024-12-17
|
||||
label:
|
||||
en_US: o1-2024-12-17
|
||||
model_type: llm
|
||||
features:
|
||||
- multi-tool-call
|
||||
- agent-thought
|
||||
- stream-tool-call
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 50000
|
||||
min: 1
|
||||
max: 50000
|
||||
- name: reasoning_effort
|
||||
label:
|
||||
zh_Hans: 推理工作
|
||||
en_US: reasoning_effort
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 限制推理模型的推理工作
|
||||
en_US: constrains effort on reasoning for reasoning models
|
||||
required: false
|
||||
options:
|
||||
- low
|
||||
- medium
|
||||
- high
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: response_format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '15.00'
|
||||
output: '60.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -0,0 +1,49 @@
|
||||
model: o1
|
||||
label:
|
||||
zh_Hans: o1
|
||||
en_US: o1
|
||||
model_type: llm
|
||||
features:
|
||||
- multi-tool-call
|
||||
- agent-thought
|
||||
- stream-tool-call
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 50000
|
||||
min: 1
|
||||
max: 50000
|
||||
- name: reasoning_effort
|
||||
label:
|
||||
zh_Hans: 推理工作
|
||||
en_US: reasoning_effort
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 限制推理模型的推理工作
|
||||
en_US: constrains effort on reasoning for reasoning models
|
||||
required: false
|
||||
options:
|
||||
- low
|
||||
- medium
|
||||
- high
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: response_format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '15.00'
|
||||
output: '60.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user