Merge branch 'main' into feature/dashen/model_mapping

This commit is contained in:
2944321442@qq.com
2025-03-05 10:15:00 +08:00
289 changed files with 79473 additions and 1783 deletions
+1
View File
@@ -4,3 +4,4 @@
/build/
/apipark
.gitlab-ci.yml
/.vscode/
+1
View File
@@ -10,6 +10,7 @@ variables:
VIEW_ADDR: http://172.18.166.219:8288
SAVE_DIR: /opt/${APP}
NODE_OPTIONS: --max_old_space_size=8192
APIPARK_OLLAMA_BASE: http://127.0.0.1:11434
stages:
- notice
+22
View File
@@ -0,0 +1,22 @@
package ai_provider_local
import "time"
type Model struct {
Name string `json:"name"`
Model string `json:"model"`
ModifiedAt time.Time `json:"modified_at"`
Size int64 `json:"size"`
Digest string `json:"digest"`
Details ModelDetails `json:"details,omitempty"`
}
// ModelDetails provides details about a model.
type ModelDetails struct {
ParentModel string `json:"parent_model"`
Format string `json:"format"`
Family string `json:"family"`
Families []string `json:"families"`
ParameterSize string `json:"parameter_size"`
QuantizationLevel string `json:"quantization_level"`
}
+336
View File
@@ -0,0 +1,336 @@
package ai_provider_local
import (
"context"
"fmt"
"sync"
"github.com/ollama/ollama/progress"
"github.com/eolinker/eosc"
"github.com/ollama/ollama/api"
)
var (
taskExecutor = NewAsyncExecutor(100)
)
// Pipeline 结构体,表示每个用户的管道
type Pipeline struct {
id string
channel chan PullMessage
ctx context.Context
cancel context.CancelFunc
}
func (p *Pipeline) Message() <-chan PullMessage {
return p.channel
}
// AsyncExecutor 结构体,管理不同模型的管道和任务队列
type AsyncExecutor struct {
ctx context.Context
cancel context.CancelFunc
mu sync.Mutex
pipelines map[string]*modelPipeline // 以模型为 key,存管道列表
msgQueue chan messageTask // 消息队列
}
type modelPipeline struct {
ctx context.Context
cancel context.CancelFunc
pipelines eosc.Untyped[string, *Pipeline]
pullFn PullCallback
maxSize int
}
func (m *modelPipeline) List() []*Pipeline {
return m.pipelines.List()
}
func (m *modelPipeline) Get(id string) (*Pipeline, bool) {
return m.pipelines.Get(id)
}
func (m *modelPipeline) Set(id string, p *Pipeline) error {
_, ok := m.pipelines.Get(id)
if !ok {
if m.pipelines.Count() > m.maxSize {
return fmt.Errorf("pipeline size exceed %d", m.maxSize)
}
}
m.pipelines.Set(id, p)
return nil
}
func (m *modelPipeline) AddPipeline(id string) (*Pipeline, error) {
ctx, cancel := context.WithCancel(m.ctx)
pipeline := &Pipeline{
ctx: ctx,
cancel: cancel,
id: id,
channel: make(chan PullMessage, 10), // 带缓冲,防止阻塞
}
err := m.Set(id, pipeline)
if err != nil {
return nil, err
}
return pipeline, nil
}
func (m *modelPipeline) Close() {
m.cancel()
ids := m.pipelines.Keys()
for _, id := range ids {
m.ClosePipeline(id)
}
return
}
func (m *modelPipeline) ClosePipeline(id string) {
// 关闭管道
p, has := m.pipelines.Del(id)
if !has {
return
}
p.cancel()
close(p.channel)
}
func newModelPipeline(ctx context.Context, maxSize int) *modelPipeline {
ctx, cancel := context.WithCancel(ctx)
return &modelPipeline{
pipelines: eosc.BuildUntyped[string, *Pipeline](),
ctx: ctx,
cancel: cancel,
maxSize: maxSize,
}
}
// messageTask 结构体,包含模型名和消息内容
type messageTask struct {
message PullMessage
}
type PullMessage struct {
Model string
Status string
Digest string
Total int64
Completed int64
Msg string
}
// NewAsyncExecutor 创建一个新的异步任务执行器
func NewAsyncExecutor(queueSize int) *AsyncExecutor {
ctx, cancel := context.WithCancel(context.Background())
executor := &AsyncExecutor{
ctx: ctx,
cancel: cancel,
pipelines: make(map[string]*modelPipeline), // 以模型为 key,存管道列表
msgQueue: make(chan messageTask, queueSize),
}
executor.StartMessageDistributor()
return executor
}
func (e *AsyncExecutor) GetModelPipeline(model string) (*modelPipeline, bool) {
e.mu.Lock()
defer e.mu.Unlock()
mp, ok := e.pipelines[model]
return mp, ok
}
func (e *AsyncExecutor) SetModelPipeline(model string, mp *modelPipeline) {
e.mu.Lock()
defer e.mu.Unlock()
e.pipelines[model] = mp
}
// ClosePipeline 关闭管道并移除
func (e *AsyncExecutor) ClosePipeline(model string, id string) {
e.mu.Lock()
defer e.mu.Unlock()
mp, ok := e.pipelines[model]
if !ok {
return
}
mp.ClosePipeline(id)
}
// CloseModelPipeline 关闭当前模型所有管道
func (e *AsyncExecutor) CloseModelPipeline(model string) {
e.mu.Lock()
defer e.mu.Unlock()
mp, ok := e.pipelines[model]
if !ok {
return
}
mp.Close()
delete(e.pipelines, model)
}
// StartMessageDistributor 启动消息分发器
func (e *AsyncExecutor) StartMessageDistributor() {
go func() {
for task := range e.msgQueue {
msg := task.message
e.DistributeToModelPipelines(msg.Model, msg)
if msg.Status == "error" || msg.Status == "success" {
mp, has := e.GetModelPipeline(msg.Model)
if has && mp.pullFn != nil {
mp.pullFn(msg)
}
e.CloseModelPipeline(msg.Model)
continue
}
}
}()
}
// DistributeToModelPipelines 仅将消息分发给指定模型的管道
func (e *AsyncExecutor) DistributeToModelPipelines(model string, msg PullMessage) {
e.mu.Lock()
defer e.mu.Unlock()
pipelines, ok := e.pipelines[model]
if !ok {
return
}
for _, pipeline := range pipelines.List() {
select {
case pipeline.channel <- msg:
default:
// 如果管道已满,跳过
}
}
}
type PullCallback func(msg PullMessage) error
func PullModel(model string, id string, fn PullCallback) (*Pipeline, error) {
if client == nil {
return nil, fmt.Errorf("client not initialized")
}
mp, has := taskExecutor.GetModelPipeline(model)
if !has {
mp = newModelPipeline(taskExecutor.ctx, 100)
mp.pullFn = fn
taskExecutor.SetModelPipeline(model, mp)
}
p, err := mp.AddPipeline(id)
if err != nil {
return nil, err
}
if !has {
var status string
bars := make(map[string]*progress.Bar)
fn := func(resp api.ProgressResponse) error {
if resp.Digest != "" {
bar, ok := bars[resp.Digest]
if !ok {
bar = progress.NewBar(fmt.Sprintf("pulling %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
bars[resp.Digest] = bar
}
bar.Set(resp.Completed)
taskExecutor.msgQueue <- messageTask{
message: PullMessage{
Model: model,
Digest: resp.Digest,
Total: resp.Total,
Completed: resp.Completed,
Msg: bar.String(),
Status: resp.Status,
},
}
} else if status != resp.Status {
taskExecutor.msgQueue <- messageTask{
message: PullMessage{
Model: model,
Digest: resp.Digest,
Total: resp.Total,
Completed: resp.Completed,
Msg: status,
Status: resp.Status,
},
}
}
return nil
}
go func() {
err = client.Pull(mp.ctx, &api.PullRequest{Model: model}, fn)
if err != nil {
taskExecutor.msgQueue <- messageTask{
message: PullMessage{
Model: model,
Status: "error",
Digest: "",
Total: 0,
Completed: 0,
Msg: err.Error(),
},
}
}
}()
}
return p, nil
}
func StopPull(model string) {
if client == nil {
return
}
taskExecutor.CloseModelPipeline(model)
}
func CancelPipeline(model string, id string) {
taskExecutor.ClosePipeline(model, id)
}
func RemoveModel(model string) error {
if client == nil {
return fmt.Errorf("client not initialized")
}
taskExecutor.CloseModelPipeline(model)
err := client.Delete(context.Background(), &api.DeleteRequest{Model: model})
if err != nil {
if err.Error() == fmt.Sprintf("model '%s' not found", model) {
return nil
}
}
return err
}
func ModelsInstalled() ([]Model, error) {
if client == nil {
return nil, fmt.Errorf("client not initialized")
}
result, err := client.List(context.Background())
if err != nil {
return nil, err
}
models := make([]Model, 0, len(result.Models))
for _, m := range result.Models {
models = append(models, Model{
Name: m.Name,
Model: m.Model,
ModifiedAt: m.ModifiedAt,
Size: m.Size,
Digest: m.Digest,
Details: ModelDetails{
ParentModel: m.Details.ParentModel,
Format: m.Details.Format,
Family: m.Details.Family,
Families: m.Details.Families,
ParameterSize: m.Details.ParameterSize,
QuantizationLevel: m.Details.QuantizationLevel,
},
})
}
return models, nil
}
+80
View File
@@ -0,0 +1,80 @@
package ai_provider_local
import (
"fmt"
"io"
"net/http"
"testing"
"github.com/gin-contrib/gzip"
"github.com/eolinker/eosc/log"
"github.com/google/uuid"
"github.com/gin-gonic/gin"
)
func TestPullModel(t *testing.T) {
// 创建 Gin 引擎
r := gin.Default()
r.Use(gzip.Gzip(gzip.DefaultCompression))
// 设置路由,监听 "/stream" 路径
r.GET("/stream", streamHandler)
r.GET("/stop", stopPull)
r.GET("/models", models)
// 启动 HTTP 服务器
r.Run(":11180")
}
func streamHandler(c *gin.Context) {
// 创建一个通道,用于监测客户端关闭连接的信号
model := c.Query("model")
p, err := PullModel(model, uuid.NewString(), nil)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
done := make(chan struct{})
// 启动一个 goroutine 监听客户端关闭连接
go func() {
select {
case <-c.Writer.CloseNotify():
log.Info("client closed connection,close pipeline")
taskExecutor.ClosePipeline(model, p.id)
case <-done:
}
}()
c.Stream(func(w io.Writer) bool {
select {
case msg, ok := <-p.channel:
if !ok {
return false
}
_, err := w.Write([]byte(fmt.Sprintf("%s\n", msg.Msg)))
if err != nil {
log.Error("write message error: %v", err)
return false
}
return true
}
})
done <- struct{}{}
}
func stopPull(c *gin.Context) {
model := c.Query("model")
StopPull(model)
c.JSON(http.StatusOK, gin.H{"message": "stop pull model"})
}
func models(c *gin.Context) {
ms, err := ModelsInstalled()
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{"models": ms})
}
+21
View File
@@ -0,0 +1,21 @@
package ai_provider_local
import (
"net/http"
"net/url"
"github.com/ollama/ollama/api"
)
var (
client *api.Client
)
func ResetOllamaAddress(address string) error {
u, err := url.Parse(address)
if err != nil {
return err
}
client = api.NewClient(u, http.DefaultClient)
return nil
}
+70
View File
@@ -0,0 +1,70 @@
package ai_provider_local
import (
"embed"
"encoding/json"
"strings"
"github.com/eolinker/eosc/log"
)
var (
//go:embed models.json
modelsFs embed.FS
modelCanInstall []ModelDetail
modelVersion string
modelTags = make(map[string][]ModelDetail)
)
type ModelConfig struct {
Models []ModelDetail `json:"models"`
Version string `json:"version"`
}
type ModelDetail struct {
Id string `json:"id"`
Name string `json:"name"`
Description string `json:"description"`
Size string `json:"size"`
Digest string `json:"digest"`
Provider string `json:"provider"`
IsPopular bool `json:"is_popular"`
Latest bool `json:"latest"`
}
func init() {
data, err := modelsFs.ReadFile("models.json")
if err != nil {
log.Info("read models.json error: ", err)
return
}
var cfg ModelConfig
err = json.Unmarshal(data, &cfg)
if err != nil {
log.Info("unmarshal models.json error: ", err)
return
}
modelVersion = cfg.Version
modelCanInstall = make([]ModelDetail, 0, len(cfg.Models))
for _, model := range cfg.Models {
if _, ok := modelTags[model.Id]; !ok {
modelTags[model.Id] = make([]ModelDetail, 0)
}
names := strings.Split(model.Id, ":")
modelTags[names[0]] = append(modelTags[names[0]], model)
if !model.Latest {
continue
}
modelCanInstall = append(modelCanInstall, model)
}
}
func ModelsCanInstall() ([]ModelDetail, string) {
return modelCanInstall, modelVersion
}
func ModelsCanInstallById(id string) []ModelDetail {
return modelTags[id]
}
+7
View File
@@ -0,0 +1,7 @@
package ai_provider_local
import "testing"
func TestModels(t *testing.T) {
t.Log(ModelsCanInstall())
}
File diff suppressed because it is too large Load Diff
+102
View File
@@ -0,0 +1,102 @@
package ai_provider_local
var (
OllamaConfig = "{\n \"mirostat\": 0,\n \"mirostat_eta\": 0.1,\n \"mirostat_tau\": 5.0,\n \"num_ctx\": 4096,\n \"repeat_last_n\":64,\n \"repeat_penalty\": 1.1,\n \"temperature\": 0.7,\n \"seed\": 42,\n \"num_predict\": 42,\n \"top_k\": 40,\n \"top_p\": 0.9,\n \"min_p\": 0.5\n}\n"
OllamaSvg = `<?xml version="1.0" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 20010904//EN"
"http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
<svg version="1.0" xmlns="http://www.w3.org/2000/svg"
width="4096.000000pt" height="4096.000000pt" viewBox="0 0 4096.000000 4096.000000"
preserveAspectRatio="xMidYMid meet">
<g transform="translate(0.000000,4096.000000) scale(0.100000,-0.100000)"
fill="#000000" stroke="none">
<path d="M13179 36749 c-194 -28 -496 -139 -712 -260 -834 -469 -1516 -1636
-1861 -3184 -82 -368 -134 -683 -181 -1100 -155 -1374 -116 -2661 120 -4025
19 -107 36 -222 37 -254 l3 -59 -85 -70 c-507 -412 -773 -667 -1123 -1076
-593 -691 -1055 -1493 -1378 -2390 -158 -436 -305 -1018 -369 -1458 -50 -343
-76 -868 -67 -1348 8 -424 25 -600 93 -1004 160 -947 479 -1747 1023 -2559 67
-101 125 -190 128 -198 3 -8 -20 -57 -52 -110 -361 -605 -691 -1540 -840
-2379 -130 -732 -145 -929 -145 -1930 0 -984 14 -1180 131 -1840 145 -815 441
-1680 760 -2221 72 -122 221 -353 275 -427 61 -84 66 -60 -98 -442 -319 -737
-582 -1679 -708 -2530 -76 -520 -92 -755 -88 -1295 l3 -385 170 -128 c368
-277 840 -604 1260 -872 192 -123 754 -464 823 -499 l33 -17 -30 53 c-143 256
-255 663 -312 1134 -27 227 -37 770 -20 1045 55 860 262 1792 567 2546 95 235
188 434 390 830 l194 382 0 238 c0 131 -5 258 -11 282 -15 68 -97 227 -159
310 -32 42 -117 137 -189 211 -327 336 -485 590 -670 1079 -420 1109 -532
2591 -300 3966 165 982 482 1778 902 2267 179 210 274 395 307 601 16 98 8
331 -14 411 -56 201 -125 309 -385 603 -625 705 -989 1568 -1097 2598 -22 211
-26 749 -6 960 103 1110 491 2126 1144 3000 768 1028 1809 1699 2963 1909 189
35 330 46 581 46 248 0 407 -12 634 -46 272 -41 483 -32 634 28 153 60 312
167 389 261 64 79 141 208 197 333 168 375 252 512 529 859 231 289 523 574
816 795 633 477 1341 822 2052 999 403 100 458 106 1043 106 513 0 557 -3 780
-46 928 -181 1875 -656 2640 -1325 171 -149 358 -355 570 -629 218 -280 285
-390 419 -687 170 -378 274 -502 526 -623 235 -113 391 -125 815 -61 337 51
628 58 915 21 968 -122 1847 -558 2609 -1295 886 -858 1491 -2062 1670 -3323
44 -307 51 -416 51 -782 0 -268 -5 -389 -18 -515 -76 -697 -281 -1344 -601
-1898 -170 -295 -327 -507 -576 -777 -258 -280 -368 -570 -331 -875 27 -220
129 -428 317 -645 164 -190 270 -354 398 -615 337 -685 559 -1640 617 -2653
15 -266 7 -870 -16 -1102 -81 -859 -254 -1586 -510 -2146 -150 -330 -269 -500
-556 -799 -76 -80 -164 -178 -194 -219 -61 -80 -138 -232 -154 -302 -6 -24
-11 -150 -11 -280 l0 -235 223 -445 c364 -724 509 -1097 672 -1734 121 -470
193 -879 241 -1365 22 -222 30 -814 15 -1036 -36 -514 -138 -960 -296 -1291
-30 -62 -50 -113 -46 -113 13 0 622 368 836 505 404 258 925 620 1280 887
l150 113 3 355 c6 699 -34 1099 -188 1872 -154 772 -365 1445 -654 2091 -35
78 -72 168 -82 200 -16 52 -17 61 -4 82 8 12 66 98 128 191 471 700 784 1566
971 2689 95 572 105 768 98 1845 -6 789 -9 860 -52 1171 -103 737 -234 1310
-420 1834 -125 354 -338 813 -549 1186 l-20 37 146 218 c319 479 532 894 714
1396 185 509 320 1129 376 1728 16 179 16 1003 0 1250 -43 628 -153 1174 -366
1815 -387 1165 -1000 2160 -1862 3020 -201 200 -331 317 -585 523 -103 83
-189 158 -192 167 -9 21 5 122 52 389 197 1106 247 2336 145 3551 -163 1958
-769 3565 -1637 4343 -144 130 -205 177 -344 269 -393 260 -825 367 -1298 323
-1300 -123 -2362 -1541 -2860 -3819 -98 -448 -201 -1110 -224 -1434 -5 -71
-14 -134 -20 -140 -14 -14 -51 4 -269 130 -1321 760 -2750 1079 -4137 922
-939 -106 -1883 -422 -2736 -917 -197 -114 -258 -145 -274 -138 -15 5 -19 29
-35 236 -23 291 -128 947 -215 1341 -211 957 -522 1775 -912 2399 -176 283
-323 469 -543 691 -228 230 -396 360 -642 497 -114 63 -325 151 -422 176 -194
51 -651 81 -827 55z m377 -1961 c67 -44 217 -198 286 -294 211 -295 383 -645
554 -1123 189 -529 357 -1317 424 -1988 39 -394 59 -905 60 -1498 l0 -290
-170 -251 -169 -251 -383 -6 c-600 -9 -872 -46 -1466 -197 -130 -33 -246 -60
-258 -60 -33 0 -41 21 -63 176 -72 505 -105 1010 -105 1634 0 690 42 1222 139
1793 185 1089 584 2047 976 2341 75 56 106 59 175 14z m14019 -13 c244 -174
522 -666 720 -1275 338 -1039 471 -2402 369 -3780 -26 -359 -83 -831 -105
-871 -5 -11 -19 -19 -31 -19 -12 0 -136 29 -277 64 -596 150 -851 184 -1454
193 l-378 6 -170 251 -170 251 6 535 c9 882 40 1296 141 1890 154 902 418
1698 748 2251 180 302 409 549 509 549 19 0 52 -16 92 -45z"/>
<path d="M20150 22433 c-389 -29 -834 -89 -1069 -144 -413 -97 -967 -296
-1322 -474 -1280 -645 -2169 -1692 -2469 -2910 -74 -300 -75 -307 -75 -820 0
-487 2 -515 51 -735 99 -440 295 -889 550 -1256 341 -492 803 -911 1349 -1225
621 -356 1384 -585 2131 -641 238 -18 2130 -18 2368 0 1169 87 2281 576 3042
1336 374 374 627 760 829 1261 61 152 91 248 139 441 69 281 71 309 71 819 0
513 -1 520 -75 822 -182 739 -580 1413 -1176 1993 -406 395 -826 686 -1368
950 -661 322 -1372 512 -2091 560 -317 21 -762 32 -885 23z m836 -1333 c270
-25 670 -109 961 -201 379 -121 793 -317 1099 -522 344 -229 767 -645 979
-962 192 -287 321 -603 394 -970 66 -325 57 -531 -35 -900 -90 -355 -301 -716
-603 -1028 -256 -264 -501 -434 -906 -627 -447 -213 -794 -305 -1335 -352
-208 -18 -1977 -18 -2180 0 -543 49 -1161 251 -1615 527 -255 155 -563 423
-743 647 -257 319 -394 631 -478 1088 -24 132 -26 161 -21 315 7 250 61 509
163 782 130 352 389 722 755 1078 297 289 572 480 974 674 558 270 1171 433
1740 461 192 10 700 4 851 -10z"/>
<path d="M19620 19334 c-30 -8 -73 -26 -95 -39 -62 -36 -166 -152 -202 -225
-89 -178 -81 -322 28 -508 56 -93 217 -247 354 -336 66 -43 137 -90 158 -104
59 -39 72 -72 70 -177 -1 -49 -11 -142 -22 -205 -75 -411 -74 -408 -56 -476
35 -131 161 -296 262 -345 41 -20 72 -24 252 -33 254 -12 330 -3 432 50 89 47
169 132 199 210 46 123 43 299 -10 561 -33 165 -39 284 -16 336 18 42 66 86
151 137 107 65 204 141 294 231 141 142 198 248 208 394 7 99 -6 164 -54 262
-59 122 -193 236 -315 269 -73 19 -286 19 -363 -1 -80 -21 -201 -79 -328 -159
-58 -36 -110 -66 -117 -66 -6 0 -60 29 -120 65 -151 89 -260 143 -327 160 -76
20 -308 19 -383 -1z"/>
<path d="M13766 22188 c-238 -24 -521 -166 -723 -361 -254 -246 -405 -551
-458 -927 -19 -140 -19 -328 0 -418 67 -322 293 -638 578 -812 150 -91 358
-151 582 -170 381 -31 677 88 965 386 156 162 237 282 330 491 116 261 130
333 130 674 l0 227 -69 144 c-39 79 -93 178 -121 221 -66 99 -201 241 -303
318 -99 75 -296 172 -402 198 -149 38 -327 48 -509 29z"/>
<path d="M26855 22189 c-136 -14 -237 -44 -371 -109 -146 -71 -252 -149 -366
-269 -109 -114 -173 -210 -259 -388 l-69 -142 0 -228 c0 -343 14 -414 130
-676 96 -216 211 -380 382 -542 271 -259 556 -364 913 -335 140 12 322 51 423
91 264 105 523 353 643 615 90 198 114 319 106 531 -12 317 -114 629 -291 888
-68 100 -209 246 -304 315 -150 109 -351 203 -502 234 -96 20 -318 27 -435 15z"/>
</g>
</svg>
`
)
@@ -0,0 +1,38 @@
model: claude-3-5-haiku-20241022
label:
en_US: claude-3-5-haiku-20241022
model_type: llm
features:
- agent-thought
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 200000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
pricing:
input: '1.00'
output: '5.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,40 @@
model: claude-3-5-sonnet-20241022
label:
en_US: claude-3-5-sonnet-20241022
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 200000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
pricing:
input: '3.00'
output: '15.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,26 @@
model: ai21.jamba-1-5-large-v1:0
label:
en_US: Jamba 1.5 Large
model_type: llm
model_properties:
mode: completion
context_size: 256000
parameter_rules:
- name: temperature
use_template: temperature
default: 1
min: 0.0
max: 2.0
- name: top_p
use_template: top_p
- name: max_gen_len
use_template: max_tokens
required: true
default: 4096
min: 1
max: 4096
pricing:
input: '0.002'
output: '0.008'
unit: '0.001'
currency: USD
@@ -0,0 +1,26 @@
model: ai21.jamba-1-5-mini-v1:0
label:
en_US: Jamba 1.5 Mini
model_type: llm
model_properties:
mode: completion
context_size: 256000
parameter_rules:
- name: temperature
use_template: temperature
default: 1
min: 0.0
max: 2.0
- name: top_p
use_template: top_p
- name: max_gen_len
use_template: max_tokens
required: true
default: 4096
min: 1
max: 4096
pricing:
input: '0.0002'
output: '0.0004'
unit: '0.001'
currency: USD
@@ -0,0 +1,53 @@
model: amazon.nova-lite-v1:0
label:
en_US: Nova Lite V1
model_type: llm
features:
- agent-thought
- tool-call
- stream-tool-call
- vision
model_properties:
mode: chat
context_size: 300000
parameter_rules:
- name: max_new_tokens
use_template: max_tokens
required: true
default: 2048
min: 1
max: 5000
- name: temperature
use_template: temperature
required: false
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。
en_US: The amount of randomness injected into the response.
- name: top_p
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
- name: top_k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
pricing:
input: '0.00006'
output: '0.00024'
unit: '0.001'
currency: USD
@@ -0,0 +1,52 @@
model: amazon.nova-micro-v1:0
label:
en_US: Nova Micro V1
model_type: llm
features:
- agent-thought
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: max_new_tokens
use_template: max_tokens
required: true
default: 2048
min: 1
max: 5000
- name: temperature
use_template: temperature
required: false
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。
en_US: The amount of randomness injected into the response.
- name: top_p
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
- name: top_k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
pricing:
input: '0.000035'
output: '0.00014'
unit: '0.001'
currency: USD
@@ -0,0 +1,53 @@
model: amazon.nova-pro-v1:0
label:
en_US: Nova Pro V1
model_type: llm
features:
- agent-thought
- tool-call
- stream-tool-call
- vision
model_properties:
mode: chat
context_size: 300000
parameter_rules:
- name: max_new_tokens
use_template: max_tokens
required: true
default: 2048
min: 1
max: 5000
- name: temperature
use_template: temperature
required: false
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。
en_US: The amount of randomness injected into the response.
- name: top_p
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
- name: top_k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
pricing:
input: '0.0008'
output: '0.0032'
unit: '0.001'
currency: USD
@@ -0,0 +1,60 @@
model: anthropic.claude-3-5-haiku-20241022-v1:0
label:
en_US: Claude 3.5 Haiku
model_type: llm
features:
- agent-thought
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 200000
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
parameter_rules:
- name: max_tokens
use_template: max_tokens
required: true
type: int
default: 8192
min: 1
max: 8192
help:
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
# docs: https://docs.anthropic.com/claude/docs/system-prompts
- name: temperature
use_template: temperature
required: false
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。
en_US: The amount of randomness injected into the response.
- name: top_p
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
- name: top_k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
- name: response_format
use_template: response_format
pricing:
input: '0.001'
output: '0.005'
unit: '0.001'
currency: USD
@@ -0,0 +1,60 @@
model: anthropic.claude-3-5-sonnet-20241022-v2:0
label:
en_US: Claude 3.5 Sonnet V2
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 200000
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
parameter_rules:
- name: max_tokens
use_template: max_tokens
required: true
type: int
default: 8192
min: 1
max: 8192
help:
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
- name: temperature
use_template: temperature
required: false
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。
en_US: The amount of randomness injected into the response.
- name: top_p
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
- name: top_k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
- name: response_format
use_template: response_format
pricing:
input: '0.003'
output: '0.015'
unit: '0.001'
currency: USD
@@ -0,0 +1,60 @@
model: eu.anthropic.claude-3-5-sonnet-20241022-v2:0
label:
en_US: Claude 3.5 Sonnet V2(EU.Cross Region Inference)
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 200000
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
parameter_rules:
- name: max_tokens
use_template: max_tokens
required: true
type: int
default: 4096
min: 1
max: 4096
help:
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
- name: temperature
use_template: temperature
required: false
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。
en_US: The amount of randomness injected into the response.
- name: top_p
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
- name: top_k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
- name: response_format
use_template: response_format
pricing:
input: '0.003'
output: '0.015'
unit: '0.001'
currency: USD
@@ -0,0 +1,53 @@
model: us.amazon.nova-lite-v1:0
label:
en_US: Nova Lite V1 (US.Cross Region Inference)
model_type: llm
features:
- agent-thought
- tool-call
- stream-tool-call
- vision
model_properties:
mode: chat
context_size: 300000
parameter_rules:
- name: max_new_tokens
use_template: max_tokens
required: true
default: 2048
min: 1
max: 5000
- name: temperature
use_template: temperature
required: false
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。
en_US: The amount of randomness injected into the response.
- name: top_p
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
- name: top_k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
pricing:
input: '0.00006'
output: '0.00024'
unit: '0.001'
currency: USD
@@ -0,0 +1,52 @@
model: us.amazon.nova-micro-v1:0
label:
en_US: Nova Micro V1 (US.Cross Region Inference)
model_type: llm
features:
- agent-thought
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: max_new_tokens
use_template: max_tokens
required: true
default: 2048
min: 1
max: 5000
- name: temperature
use_template: temperature
required: false
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。
en_US: The amount of randomness injected into the response.
- name: top_p
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
- name: top_k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
pricing:
input: '0.000035'
output: '0.00014'
unit: '0.001'
currency: USD
@@ -0,0 +1,53 @@
model: us.amazon.nova-pro-v1:0
label:
en_US: Nova Pro V1 (US.Cross Region Inference)
model_type: llm
features:
- agent-thought
- tool-call
- stream-tool-call
- vision
model_properties:
mode: chat
context_size: 300000
parameter_rules:
- name: max_new_tokens
use_template: max_tokens
required: true
default: 2048
min: 1
max: 5000
- name: temperature
use_template: temperature
required: false
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。
en_US: The amount of randomness injected into the response.
- name: top_p
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
- name: top_k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
pricing:
input: '0.0008'
output: '0.0032'
unit: '0.001'
currency: USD
@@ -0,0 +1,60 @@
model: us.anthropic.claude-3-5-haiku-20241022-v1:0
label:
en_US: Claude 3.5 Haiku(US.Cross Region Inference)
model_type: llm
features:
- agent-thought
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 200000
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
parameter_rules:
- name: max_tokens
use_template: max_tokens
required: true
type: int
default: 8192
min: 1
max: 8192
help:
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
# docs: https://docs.anthropic.com/claude/docs/system-prompts
- name: temperature
use_template: temperature
required: false
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。
en_US: The amount of randomness injected into the response.
- name: top_p
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
- name: top_k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
- name: response_format
use_template: response_format
pricing:
input: '0.001'
output: '0.005'
unit: '0.001'
currency: USD
@@ -0,0 +1,60 @@
model: us.anthropic.claude-3-5-sonnet-20241022-v2:0
label:
en_US: Claude 3.5 Sonnet V2(US.Cross Region Inference)
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 200000
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
parameter_rules:
- name: max_tokens
use_template: max_tokens
required: true
type: int
default: 8192
min: 1
max: 8192
help:
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
- name: temperature
use_template: temperature
required: false
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。
en_US: The amount of randomness injected into the response.
- name: top_p
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
- name: top_k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
- name: response_format
use_template: response_format
pricing:
input: '0.003'
output: '0.015'
unit: '0.001'
currency: USD
@@ -0,0 +1,29 @@
model: us.meta.llama3-2-11b-instruct-v1:0
label:
en_US: US Meta Llama 3.2 11B Instruct
model_type: llm
features:
- vision
- tool-call
model_properties:
mode: completion
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
default: 0.5
min: 0.0
max: 1
- name: top_p
use_template: top_p
- name: max_gen_len
use_template: max_tokens
required: true
default: 512
min: 1
max: 2048
pricing:
input: '0.00035'
output: '0.00035'
unit: '0.001'
currency: USD
@@ -0,0 +1,26 @@
model: us.meta.llama3-2-1b-instruct-v1:0
label:
en_US: US Meta Llama 3.2 1B Instruct
model_type: llm
model_properties:
mode: completion
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
default: 0.5
min: 0.0
max: 1
- name: top_p
use_template: top_p
- name: max_gen_len
use_template: max_tokens
required: true
default: 512
min: 1
max: 2048
pricing:
input: '0.0001'
output: '0.0001'
unit: '0.001'
currency: USD
@@ -0,0 +1,26 @@
model: us.meta.llama3-2-3b-instruct-v1:0
label:
en_US: US Meta Llama 3.2 3B Instruct
model_type: llm
model_properties:
mode: completion
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
default: 0.5
min: 0.0
max: 1
- name: top_p
use_template: top_p
- name: max_gen_len
use_template: max_tokens
required: true
default: 512
min: 1
max: 2048
pricing:
input: '0.00015'
output: '0.00015'
unit: '0.001'
currency: USD
@@ -0,0 +1,31 @@
model: us.meta.llama3-2-90b-instruct-v1:0
label:
en_US: US Meta Llama 3.2 90B Instruct
model_type: llm
features:
- tool-call
model_properties:
mode: completion
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
default: 0.5
min: 0.0
max: 1
- name: top_p
use_template: top_p
default: 0.9
min: 0
max: 1
- name: max_gen_len
use_template: max_tokens
required: true
default: 512
min: 1
max: 2048
pricing:
input: '0.002'
output: '0.002'
unit: '0.001'
currency: USD
@@ -0,0 +1,21 @@
model: deepseek-reasoner
label:
zh_Hans: deepseek-reasoner
en_US: deepseek-reasoner
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 64000
parameter_rules:
- name: max_tokens
use_template: max_tokens
min: 1
max: 8192
default: 4096
pricing:
input: "4"
output: "16"
unit: "0.000001"
currency: RMB
@@ -0,0 +1,46 @@
model: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
label:
zh_Hans: Llama 3.2 11B Vision Instruct
en_US: Llama 3.2 11B Vision Instruct
model_type: llm
features:
- agent-thought
- tool-call
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
- name: max_tokens
use_template: max_tokens
- name: context_length_exceeded_behavior
default: None
label:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
help:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
type: string
options:
- None
- truncate
- error
- name: response_format
use_template: response_format
pricing:
input: '0.2'
output: '0.2'
unit: '0.000001'
currency: USD
@@ -0,0 +1,46 @@
model: accounts/fireworks/models/llama-v3p2-1b-instruct
label:
zh_Hans: Llama 3.2 1B Instruct
en_US: Llama 3.2 1B Instruct
model_type: llm
features:
- agent-thought
- tool-call
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
- name: max_tokens
use_template: max_tokens
- name: context_length_exceeded_behavior
default: None
label:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
help:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
type: string
options:
- None
- truncate
- error
- name: response_format
use_template: response_format
pricing:
input: '0.1'
output: '0.1'
unit: '0.000001'
currency: USD
@@ -0,0 +1,46 @@
model: accounts/fireworks/models/llama-v3p2-3b-instruct
label:
zh_Hans: Llama 3.2 3B Instruct
en_US: Llama 3.2 3B Instruct
model_type: llm
features:
- agent-thought
- tool-call
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
- name: max_tokens
use_template: max_tokens
- name: context_length_exceeded_behavior
default: None
label:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
help:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
type: string
options:
- None
- truncate
- error
- name: response_format
use_template: response_format
pricing:
input: '0.1'
output: '0.1'
unit: '0.000001'
currency: USD
@@ -0,0 +1,46 @@
model: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
label:
zh_Hans: Llama 3.2 90B Vision Instruct
en_US: Llama 3.2 90B Vision Instruct
model_type: llm
features:
- agent-thought
- tool-call
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
- name: max_tokens
use_template: max_tokens
- name: context_length_exceeded_behavior
default: None
label:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
help:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
type: string
options:
- None
- truncate
- error
- name: response_format
use_template: response_format
pricing:
input: '0.9'
output: '0.9'
unit: '0.000001'
currency: USD
@@ -0,0 +1,46 @@
model: accounts/fireworks/models/qwen2p5-72b-instruct
label:
zh_Hans: Qwen2.5 72B Instruct
en_US: Qwen2.5 72B Instruct
model_type: llm
features:
- agent-thought
- tool-call
model_properties:
mode: chat
context_size: 32768
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
- name: max_tokens
use_template: max_tokens
- name: context_length_exceeded_behavior
default: None
label:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
help:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
type: string
options:
- None
- truncate
- error
- name: response_format
use_template: response_format
pricing:
input: '0.9'
output: '0.9'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-1.5-flash-001
label:
en_US: Gemini 1.5 Flash 001
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-1.5-flash-002
label:
en_US: Gemini 1.5 Flash 002
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-1.5-flash-8b-exp-0924
label:
en_US: Gemini 1.5 Flash 8B 0924
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-1.5-flash
label:
en_US: Gemini 1.5 Flash
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-1.5-pro-001
label:
en_US: Gemini 1.5 Pro 001
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 2097152
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-1.5-pro-002
label:
en_US: Gemini 1.5 Pro 002
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 2097152
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-1.5-pro
label:
en_US: Gemini 1.5 Pro
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 2097152
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-2.0-flash-001
label:
en_US: Gemini 2.0 Flash 001
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-2.0-flash-exp
label:
en_US: Gemini 2.0 Flash Exp
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-2.0-flash-lite-preview-02-05
label:
en_US: Gemini 2.0 Flash Lite Preview 0205
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,39 @@
model: gemini-2.0-flash-thinking-exp-01-21
label:
en_US: Gemini 2.0 Flash Thinking Exp 01-21
model_type: llm
features:
- agent-thought
- vision
- document
- video
- audio
model_properties:
mode: chat
context_size: 32767
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,39 @@
model: gemini-2.0-flash-thinking-exp-1219
label:
en_US: Gemini 2.0 Flash Thinking Exp 1219
model_type: llm
features:
- agent-thought
- vision
- document
- video
- audio
model_properties:
mode: chat
context_size: 32767
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-2.0-pro-exp-02-05
label:
en_US: Gemini 2.0 pro exp 02-05
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-exp-1114
label:
en_US: Gemini exp 1114
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 32767
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-exp-1121
label:
en_US: Gemini exp 1121
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 32767
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: gemini-exp-1206
label:
en_US: Gemini exp 1206
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 2097152
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,41 @@
model: learnlm-1.5-pro-experimental
label:
en_US: LearnLM 1.5 Pro Experimental
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 32767
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,36 @@
model: deepseek-r1-distill-llama-70b
label:
en_US: DeepSeek R1 Distill Llama 70b
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '3.00'
output: '3.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,37 @@
model: gemma-7b-it
label:
zh_Hans: Gemma 7B Instruction Tuned
en_US: Gemma 7B Instruction Tuned
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 8192
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD
@@ -0,0 +1,37 @@
model: gemma2-9b-it
label:
zh_Hans: Gemma 2 9B Instruction Tuned
en_US: Gemma 2 9B Instruction Tuned
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 8192
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD
@@ -0,0 +1,38 @@
model: llama-3.2-11b-text-preview
deprecated: true
label:
zh_Hans: Llama 3.2 11B Text (Preview)
en_US: Llama 3.2 11B Text (Preview)
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD
@@ -0,0 +1,38 @@
model: llama-3.2-11b-vision-preview
label:
zh_Hans: Llama 3.2 11B Vision (Preview)
en_US: Llama 3.2 11B Vision (Preview)
model_type: llm
features:
- agent-thought
- vision
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD
@@ -0,0 +1,37 @@
model: llama-3.2-1b-preview
label:
zh_Hans: Llama 3.2 1B Text (Preview)
en_US: Llama 3.2 1B Text (Preview)
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD
@@ -0,0 +1,37 @@
model: llama-3.2-3b-preview
label:
zh_Hans: Llama 3.2 3B Text (Preview)
en_US: Llama 3.2 3B Text (Preview)
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD
@@ -0,0 +1,38 @@
model: llama-3.2-90b-text-preview
depraceted: true
label:
zh_Hans: Llama 3.2 90B Text (Preview)
en_US: Llama 3.2 90B Text (Preview)
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD
@@ -0,0 +1,38 @@
model: llama-3.2-90b-vision-preview
label:
zh_Hans: Llama 3.2 90B Vision (Preview)
en_US: Llama 3.2 90B Vision (Preview)
model_type: llm
features:
- agent-thought
- vision
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD
@@ -0,0 +1,38 @@
model: llama-3.3-70b-specdec
label:
zh_Hans: Llama 3.3 70B Specdec
en_US: Llama 3.3 70B Specdec
model_type: llm
features:
- agent-thought
- multi-tool-call
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 1024
min: 1
max: 32768
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: "0.05"
output: "0.1"
unit: "0.000001"
currency: USD
@@ -0,0 +1,38 @@
model: llama-3.3-70b-versatile
label:
zh_Hans: Llama 3.3 70B Versatile
en_US: Llama 3.3 70B Versatile
model_type: llm
features:
- agent-thought
- multi-tool-call
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 1024
min: 1
max: 32768
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: "0.05"
output: "0.1"
unit: "0.000001"
currency: USD
@@ -0,0 +1,37 @@
model: llama-guard-3-8b
label:
zh_Hans: Llama-Guard-3-8B
en_US: Llama-Guard-3-8B
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 8192
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.20'
output: '0.20'
unit: '0.000001'
currency: USD
@@ -0,0 +1,38 @@
model: llama3-groq-70b-8192-tool-use-preview
label:
zh_Hans: Llama3-groq-70b-8192-tool-use (PREVIEW)
en_US: Llama3-groq-70b-8192-tool-use (PREVIEW)
model_type: llm
features:
- agent-thought
- multi-tool-call
model_properties:
mode: chat
context_size: 8192
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '0.05'
output: '0.08'
unit: '0.000001'
currency: USD
@@ -0,0 +1,38 @@
model: hunyuan-functioncall
label:
zh_Hans: hunyuan-functioncall
en_US: hunyuan-functioncall
model_type: llm
features:
- agent-thought
- tool-call
- multi-tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 32000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 1024
min: 1
max: 32000
- name: enable_enhance
label:
zh_Hans: 功能增强
en_US: Enable Enhancement
type: boolean
help:
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
en_US: Allow the model to perform external search to enhance the generation results.
required: false
default: true
pricing:
input: '0.004'
output: '0.008'
unit: '0.001'
currency: RMB
@@ -0,0 +1,38 @@
model: hunyuan-large-longcontext
label:
zh_Hans: hunyuan-large-longcontext
en_US: hunyuan-large-longcontext
model_type: llm
features:
- agent-thought
- tool-call
- multi-tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 134000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 1024
min: 1
max: 134000
- name: enable_enhance
label:
zh_Hans: 功能增强
en_US: Enable Enhancement
type: boolean
help:
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
en_US: Allow the model to perform external search to enhance the generation results.
required: false
default: true
pricing:
input: '0.006'
output: '0.018'
unit: '0.001'
currency: RMB
@@ -0,0 +1,38 @@
model: hunyuan-large-role
label:
zh_Hans: hunyuan-large-role
en_US: hunyuan-large-role
model_type: llm
features:
- agent-thought
- tool-call
- multi-tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 32000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 1024
min: 1
max: 32000
- name: enable_enhance
label:
zh_Hans: 功能增强
en_US: Enable Enhancement
type: boolean
help:
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
en_US: Allow the model to perform external search to enhance the generation results.
required: false
default: true
pricing:
input: '0.004'
output: '0.008'
unit: '0.001'
currency: RMB
@@ -0,0 +1,38 @@
model: hunyuan-large
label:
zh_Hans: hunyuan-large
en_US: hunyuan-large
model_type: llm
features:
- agent-thought
- tool-call
- multi-tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 32000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 1024
min: 1
max: 32000
- name: enable_enhance
label:
zh_Hans: 功能增强
en_US: Enable Enhancement
type: boolean
help:
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
en_US: Allow the model to perform external search to enhance the generation results.
required: false
default: true
pricing:
input: '0.004'
output: '0.012'
unit: '0.001'
currency: RMB
@@ -0,0 +1,38 @@
model: hunyuan-role
label:
zh_Hans: hunyuan-role
en_US: hunyuan-role
model_type: llm
features:
- agent-thought
- tool-call
- multi-tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 32000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 1024
min: 1
max: 32000
- name: enable_enhance
label:
zh_Hans: 功能增强
en_US: Enable Enhancement
type: boolean
help:
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
en_US: Allow the model to perform external search to enhance the generation results.
required: false
default: true
pricing:
input: '0.004'
output: '0.008'
unit: '0.001'
currency: RMB
@@ -0,0 +1,38 @@
model: hunyuan-turbo-latest
label:
zh_Hans: hunyuan-turbo-latest
en_US: hunyuan-turbo-latest
model_type: llm
features:
- agent-thought
- tool-call
- multi-tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 32000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 1024
min: 1
max: 32000
- name: enable_enhance
label:
zh_Hans: 功能增强
en_US: Enable Enhancement
type: boolean
help:
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
en_US: Allow the model to perform external search to enhance the generation results.
required: false
default: true
pricing:
input: '0.015'
output: '0.05'
unit: '0.001'
currency: RMB
@@ -0,0 +1,39 @@
model: hunyuan-turbo-vision
label:
zh_Hans: hunyuan-turbo-vision
en_US: hunyuan-turbo-vision
model_type: llm
features:
- agent-thought
- tool-call
- multi-tool-call
- stream-tool-call
- vision
model_properties:
mode: chat
context_size: 8000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 1024
min: 1
max: 8000
- name: enable_enhance
label:
zh_Hans: 功能增强
en_US: Enable Enhancement
type: boolean
help:
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
en_US: Allow the model to perform external search to enhance the generation results.
required: false
default: true
pricing:
input: '0.08'
output: '0.08'
unit: '0.001'
currency: RMB
@@ -0,0 +1,44 @@
model: abab6.5t-chat
label:
en_US: Abab6.5t-Chat
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 8192
parameter_rules:
- name: temperature
use_template: temperature
min: 0.01
max: 1
default: 0.9
- name: top_p
use_template: top_p
min: 0.01
max: 1
default: 0.95
- name: max_tokens
use_template: max_tokens
required: true
default: 3072
min: 1
max: 8192
- name: mask_sensitive_info
type: boolean
default: true
label:
zh_Hans: 隐私保护
en_US: Moderate
help:
zh_Hans: 对输出中易涉及隐私问题的文本信息进行打码,目前包括但不限于邮箱、域名、链接、证件号、家庭住址等,默认true,即开启打码
en_US: Mask the sensitive info of the generated content, such as email/domain/link/address/phone/id..
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty
use_template: frequency_penalty
pricing:
input: '0.005'
output: '0.005'
unit: '0.001'
currency: RMB
@@ -0,0 +1,46 @@
model: abab7-chat-preview
label:
en_US: Abab7-chat-preview
model_type: llm
features:
- agent-thought
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 245760
parameter_rules:
- name: temperature
use_template: temperature
min: 0.01
max: 1
default: 0.1
- name: top_p
use_template: top_p
min: 0.01
max: 1
default: 0.95
- name: max_tokens
use_template: max_tokens
required: true
default: 2048
min: 1
max: 245760
- name: mask_sensitive_info
type: boolean
default: true
label:
zh_Hans: 隐私保护
en_US: Moderate
help:
zh_Hans: 对输出中易涉及隐私问题的文本信息进行打码,目前包括但不限于邮箱、域名、链接、证件号、家庭住址等,默认true,即开启打码
en_US: Mask the sensitive info of the generated content, such as email/domain/link/address/phone/id..
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty
use_template: frequency_penalty
pricing:
input: '0.1'
output: '0.1'
unit: '0.001'
currency: RMB
@@ -0,0 +1,46 @@
model: minimax-text-01
label:
en_US: Minimax-Text-01
model_type: llm
features:
- agent-thought
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 1000192
parameter_rules:
- name: temperature
use_template: temperature
min: 0.01
max: 1
default: 0.1
- name: top_p
use_template: top_p
min: 0.01
max: 1
default: 0.95
- name: max_tokens
use_template: max_tokens
required: true
default: 2048
min: 1
max: 1000192
- name: mask_sensitive_info
type: boolean
default: true
label:
zh_Hans: 隐私保护
en_US: Moderate
help:
zh_Hans: 对输出中易涉及隐私问题的文本信息进行打码,目前包括但不限于邮箱、域名、链接、证件号、家庭住址等,默认true,即开启打码
en_US: Mask the sensitive info of the generated content, such as email/domain/link/address/phone/id..
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty
use_template: frequency_penalty
pricing:
input: '0.001'
output: '0.008'
unit: '0.001'
currency: RMB
@@ -0,0 +1,52 @@
model: pixtral-large-2411
label:
zh_Hans: pixtral-large-2411
en_US: pixtral-large-2411
model_type: llm
features:
- agent-thought
- vision
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
default: 0.7
min: 0
max: 1
- name: top_p
use_template: top_p
default: 1
min: 0
max: 1
- name: max_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: safe_prompt
default: false
type: boolean
help:
en_US: Whether to inject a safety prompt before all conversations.
zh_Hans: 是否开启提示词审查
label:
en_US: SafePrompt
zh_Hans: 提示词审查
- name: random_seed
type: int
help:
en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
label:
en_US: RandomSeed
zh_Hans: 随机数种子
default: 0
min: 0
max: 2147483647
pricing:
input: '0.008'
output: '0.024'
unit: '0.001'
currency: USD
@@ -0,0 +1,52 @@
model: pixtral-large-latest
label:
zh_Hans: pixtral-large-latest
en_US: pixtral-large-latest
model_type: llm
features:
- agent-thought
- vision
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
default: 0.7
min: 0
max: 1
- name: top_p
use_template: top_p
default: 1
min: 0
max: 1
- name: max_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: safe_prompt
default: false
type: boolean
help:
en_US: Whether to inject a safety prompt before all conversations.
zh_Hans: 是否开启提示词审查
label:
en_US: SafePrompt
zh_Hans: 提示词审查
- name: random_seed
type: int
help:
en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
label:
en_US: RandomSeed
zh_Hans: 随机数种子
default: 0
min: 0
max: 2147483647
pricing:
input: '0.008'
output: '0.024'
unit: '0.001'
currency: USD
@@ -0,0 +1,41 @@
model: Sao10K/L3-8B-Stheno-v3.2
label:
zh_Hans: L3 8B Stheno V3.2
en_US: L3 8B Stheno V3.2
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 8192
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0005'
output: '0.0005'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: deepseek/deepseek-r1
label:
zh_Hans: DeepSeek R1
en_US: DeepSeek R1
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 64000
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.04'
output: '0.04'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: deepseek/deepseek_v3
label:
zh_Hans: DeepSeek V3
en_US: DeepSeek V3
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 64000
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0089'
output: '0.0089'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: sao10k/l3-8b-lunaris
label:
zh_Hans: "Sao10k L3 8B Lunaris"
en_US: "Sao10k L3 8B Lunaris"
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 8192
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0005'
output: '0.0005'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: sao10k/l31-70b-euryale-v2.2
label:
zh_Hans: L31 70B Euryale V2.2
en_US: L31 70B Euryale V2.2
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 16000
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0148'
output: '0.0148'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: meta-llama/llama-3.1-8b-instruct-bf16
label:
zh_Hans: Llama 3.1 8B Instruct BF16
en_US: Llama 3.1 8B Instruct BF16
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 8192
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0006'
output: '0.0006'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: meta-llama/llama-3.1-8b-instruct-max
label:
zh_Hans: "Llama3.1 8B Instruct Max\t"
en_US: "Llama3.1 8B Instruct Max\t"
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 16384
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0005'
output: '0.0005'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: meta-llama/llama-3.2-11b-vision-instruct
label:
zh_Hans: "Llama 3.2 11B Vision Instruct\t"
en_US: "Llama 3.2 11B Vision Instruct\t"
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 32768
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0006'
output: '0.0006'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: meta-llama/llama-3.2-1b-instruct
label:
zh_Hans: "Llama 3.2 1B Instruct\t"
en_US: "Llama 3.2 1B Instruct\t"
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131000
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0002'
output: '0.0002'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: meta-llama/llama-3.2-3b-instruct
label:
zh_Hans: Llama 3.2 3B Instruct
en_US: Llama 3.2 3B Instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 32768
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0003'
output: '0.0005'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: meta-llama/llama-3.3-70b-instruct
label:
zh_Hans: Llama 3.3 70B Instruct
en_US: Llama 3.3 70B Instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0039'
output: '0.0039'
unit: '0.0001'
currency: USD
@@ -1,69 +0,0 @@
from collections.abc import Generator
from typing import Optional, Union
from core.model_runtime.entities.llm_entities import LLMResult
from core.model_runtime.entities.message_entities import PromptMessage, PromptMessageTool
from core.model_runtime.entities.model_entities import AIModelEntity
from core.model_runtime.model_providers.openai_api_compatible.llm.llm import OAIAPICompatLargeLanguageModel
class NovitaLargeLanguageModel(OAIAPICompatLargeLanguageModel):
def _update_endpoint_url(self, credentials: dict):
credentials["endpoint_url"] = "https://api.novita.ai/v3/openai"
credentials["extra_headers"] = {"X-Novita-Source": "dify.ai"}
return credentials
def _invoke(
self,
model: str,
credentials: dict,
prompt_messages: list[PromptMessage],
model_parameters: dict,
tools: Optional[list[PromptMessageTool]] = None,
stop: Optional[list[str]] = None,
stream: bool = True,
user: Optional[str] = None,
) -> Union[LLMResult, Generator]:
cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
return super()._invoke(model, cred_with_endpoint, prompt_messages, model_parameters, tools, stop, stream, user)
def validate_credentials(self, model: str, credentials: dict) -> None:
cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
self._add_custom_parameters(credentials, model)
return super().validate_credentials(model, cred_with_endpoint)
@classmethod
def _add_custom_parameters(cls, credentials: dict, model: str) -> None:
credentials["mode"] = "chat"
def _generate(
self,
model: str,
credentials: dict,
prompt_messages: list[PromptMessage],
model_parameters: dict,
tools: Optional[list[PromptMessageTool]] = None,
stop: Optional[list[str]] = None,
stream: bool = True,
user: Optional[str] = None,
) -> Union[LLMResult, Generator]:
cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
return super()._generate(
model, cred_with_endpoint, prompt_messages, model_parameters, tools, stop, stream, user
)
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
return super().get_customizable_model_schema(model, cred_with_endpoint)
def get_num_tokens(
self,
model: str,
credentials: dict,
prompt_messages: list[PromptMessage],
tools: Optional[list[PromptMessageTool]] = None,
) -> int:
cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
return super().get_num_tokens(model, cred_with_endpoint, prompt_messages, tools)
@@ -0,0 +1,41 @@
model: mistralai/mistral-nemo
label:
zh_Hans: Mistral Nemo
en_US: Mistral Nemo
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0017'
output: '0.0017'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: openchat/openchat-7b
label:
zh_Hans: OpenChat 7B
en_US: OpenChat 7B
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 4096
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0006'
output: '0.0006'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: qwen/qwen-2-72b-instruct
label:
zh_Hans: Qwen2 72B Instruct
en_US: Qwen2 72B Instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 32768
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0034'
output: '0.0039'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: qwen/qwen-2-7b-instruct
label:
zh_Hans: Qwen 2 7B Instruct
en_US: Qwen 2 7B Instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 32768
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.00054'
output: '0.00054'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: qwen/qwen-2-vl-72b-instruct
label:
zh_Hans: Qwen 2 VL 72B Instruct
en_US: Qwen 2 VL 72B Instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 32768
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0045'
output: '0.0045'
unit: '0.0001'
currency: USD
@@ -0,0 +1,41 @@
model: qwen/qwen-2.5-72b-instruct
label:
zh_Hans: Qwen 2.5 72B Instruct
en_US: Qwen 2.5 72B Instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 32000
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 2
default: 1
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 2048
default: 512
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
pricing:
input: '0.0038'
output: '0.004'
unit: '0.0001'
currency: USD
@@ -0,0 +1,35 @@
model: deepseek-ai/deepseek-r1
label:
en_US: deepseek-ai/deepseek-r1
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 1
default: 0.5
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 1024
default: 1024
- name: frequency_penalty
use_template: frequency_penalty
min: -2
max: 2
default: 0
- name: presence_penalty
use_template: presence_penalty
min: -2
max: 2
default: 0
@@ -0,0 +1,47 @@
model: gpt-4o-2024-11-20
label:
zh_Hans: gpt-4o-2024-11-20
en_US: gpt-4o-2024-11-20
model_type: llm
features:
- multi-tool-call
- agent-thought
- stream-tool-call
- vision
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty
use_template: frequency_penalty
- name: max_tokens
use_template: max_tokens
default: 16384
min: 1
max: 16384
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
- json_schema
- name: json_schema
use_template: json_schema
pricing:
input: '2.50'
output: '10.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,44 @@
model: gpt-4o-audio-preview
label:
zh_Hans: gpt-4o-audio-preview
en_US: gpt-4o-audio-preview
model_type: llm
features:
- multi-tool-call
- agent-thought
- stream-tool-call
- audio
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty
use_template: frequency_penalty
- name: max_tokens
use_template: max_tokens
default: 16384
min: 1
max: 16384
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '5.00'
output: '15.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,48 @@
model: o1-2024-12-17
label:
en_US: o1-2024-12-17
model_type: llm
features:
- multi-tool-call
- agent-thought
- stream-tool-call
- vision
model_properties:
mode: chat
context_size: 200000
parameter_rules:
- name: max_tokens
use_template: max_tokens
default: 50000
min: 1
max: 50000
- name: reasoning_effort
label:
zh_Hans: 推理工作
en_US: reasoning_effort
type: string
help:
zh_Hans: 限制推理模型的推理工作
en_US: constrains effort on reasoning for reasoning models
required: false
options:
- low
- medium
- high
- name: response_format
label:
zh_Hans: 回复格式
en_US: response_format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '15.00'
output: '60.00'
unit: '0.000001'
currency: USD
@@ -0,0 +1,49 @@
model: o1
label:
zh_Hans: o1
en_US: o1
model_type: llm
features:
- multi-tool-call
- agent-thought
- stream-tool-call
- vision
model_properties:
mode: chat
context_size: 200000
parameter_rules:
- name: max_tokens
use_template: max_tokens
default: 50000
min: 1
max: 50000
- name: reasoning_effort
label:
zh_Hans: 推理工作
en_US: reasoning_effort
type: string
help:
zh_Hans: 限制推理模型的推理工作
en_US: constrains effort on reasoning for reasoning models
required: false
options:
- low
- medium
- high
- name: response_format
label:
zh_Hans: 回复格式
en_US: response_format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '15.00'
output: '60.00'
unit: '0.000001'
currency: USD

Some files were not shown because too many files have changed in this diff Show More