Skip to content

Commit d6c85d3

Browse files
committed
ck draft
1 parent 4904c2f commit d6c85d3

File tree

3 files changed

+16
-145
lines changed

3 files changed

+16
-145
lines changed

backend/modules/evaluation/infra/repo/experiment/ck/expt_turn_result_filter.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ func (d *exptTurnResultFilterDAOImpl) buildKeywordSearchConditions(ctx context.C
376376
// buildBaseSQL 构建基础SQL语句
377377
func (d *exptTurnResultFilterDAOImpl) buildBaseSQL(ctx context.Context, whereSQL, keywordCond string, args *[]interface{}) string {
378378
sql := "SELECT etrf.item_id, etrf.status FROM " + getClickHouseDatabaseName() + ".expt_turn_result_filter etrf"
379-
sql += " WHERE 1=1"
379+
sql += " FINAL WHERE 1=1"
380380
if keywordCond != "" {
381381
// 将 evalSetSyncCkDate 插入到 args 切片的第一个位置
382382
newArgs := make([]interface{}, 0, len(*args))
Lines changed: 9 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -1,159 +1,31 @@
11
models:
2-
# reasoning model
32
- id: 1
4-
workspace_id: 0 # In the future, there will be the concept of public/private workspaces. Public models are managed by the public workspace, private models by the private workspace. Currently, all models belong to the public workspace, and the public workspace id is temporarily set to 0.
5-
name: "deepseek-r1-distill-qwen-32b-250120"
3+
name: "doubao"
64
frame: "eino"
75
protocol: "ark"
86
protocol_config:
9-
api_key: "a715a14b-3b93-47da-8bc9-844c12fecff7"
10-
model: "ep-20250304143659-5bcjt"
7+
api_key: "***"
8+
model: "***"
119
param_config:
1210
param_schemas:
1311
- name: "temperature"
1412
label: "temperature"
15-
desc: "Increasing temperature will make the model output more diverse and creative. Conversely, lowering the temperature will make the output more compliant with instructions but reduce diversity. It is recommended not to adjust together with 'Top p'."
13+
desc: "Increasing temperature makes model output more diverse and creative, while decreasing it makes output more focused on instructions but less diverse. It's recommended not to adjust this simultaneously with 'Top p'."
1614
type: "float"
1715
min: "0"
1816
max: "1.0"
1917
default_val: "0.7"
2018
- name: "max_tokens"
2119
label: "max_tokens"
22-
desc: "Controls the maximum length of model output tokens. Typically, 100 tokens are about 150 Chinese characters."
20+
desc: "Controls the maximum number of tokens in model output. Typically, 100 tokens equals about 150 Chinese characters."
2321
type: "int"
2422
min: "1"
25-
max: "8192"
23+
max: "4096"
2624
default_val: "2048"
27-
# multimodal model
28-
- id: 2
29-
workspace_id: 0 # In the future, there will be the concept of public/private workspaces. Public models are managed by the public workspace, private models by the private workspace. Currently, all models belong to the public workspace, and the public workspace id is temporarily set to 0.
30-
name: "doubao-1.5-vision-pro-32k"
31-
desc: ""
32-
ability:
33-
max_context_tokens: 65536
34-
max_input_tokens: 65536
35-
max_output_tokens: 8192
36-
function_call: false
37-
json_mode: false
38-
multi_modal: true
39-
ability_multi_modal:
40-
image: true
41-
ability_image:
42-
url_enabled: true
43-
binary_enabled: true
44-
max_image_size: 20 # unit MB
45-
max_image_count: 20
46-
frame: "eino"
47-
protocol: "ark"
48-
protocol_config:
49-
base_url: "https://ark.cn-beijing.volces.com/api/v3"
50-
api_key: "a715a14b-3b93-47da-8bc9-844c12fecff7"
51-
model: "ep-20250304145131-ndcct"
52-
protocol_config_ark:
53-
region: "cn-beijing"
54-
scenario_configs:
55-
default:
56-
scenario: "default"
57-
quota:
58-
qpm: 0
59-
tpm: 0
60-
unavailable: false
61-
evaluator:
62-
scenario: "evaluator"
63-
quota:
64-
qpm: 0
65-
tpm: 0
66-
unavailable: false
67-
param_config:
68-
param_schemas:
69-
- name: "temperature"
70-
label: "temperature"
71-
desc: "Increasing temperature will make the model output more diverse and creative. Conversely, lowering the temperature will make the output more compliant with instructions but reduce diversity. It is recommended not to adjust together with 'Top p'."
72-
type: "float"
73-
min: "0"
74-
max: "1.0"
75-
default_val: "0.7"
76-
- name: "max_tokens"
77-
label: "max_tokens"
78-
desc: "Controls the maximum length of model output tokens. Typically, 100 tokens are about 150 Chinese characters."
79-
type: "int"
80-
min: "1"
81-
max: "8192"
82-
default_val: "2048"
83-
# fc model
84-
- id: 3
85-
workspace_id: 0 # In the future, there will be the concept of public/private workspaces. Public models are managed by the public workspace, private models by the private workspace. Currently, all models belong to the public workspace, and the public workspace id is temporarily set to 0.
86-
name: "doubao-1.5-lite-32k"
87-
desc: ""
88-
ability:
89-
max_context_tokens: 32000
90-
max_input_tokens: 32000
91-
max_output_tokens: 12000
92-
function_call: true
93-
json_mode: false
94-
multi_modal: false
95-
frame: "eino"
96-
protocol: "ark"
97-
protocol_config:
98-
base_url: "https://ark.cn-beijing.volces.com/api/v3"
99-
api_key: "a715a14b-3b93-47da-8bc9-844c12fecff7"
100-
model: "ep-20250227201314-frn9m"
101-
protocol_config_ark:
102-
region: "cn-beijing"
103-
scenario_configs:
104-
default:
105-
scenario: "default"
106-
quota:
107-
qpm: 0
108-
tpm: 0
109-
unavailable: false
110-
evaluator:
111-
scenario: "evaluator"
112-
quota:
113-
qpm: 0
114-
tpm: 0
115-
unavailable: false
116-
param_config:
117-
param_schemas:
118-
- name: "temperature"
119-
label: "temperature"
120-
desc: "Increasing temperature will make the model output more diverse and creative. Conversely, lowering the temperature will make the output more compliant with instructions but reduce diversity. It is recommended not to adjust together with 'Top p'."
121-
type: "float"
122-
min: "0"
123-
max: "1.0"
124-
default_val: "0.1"
12525
- name: "top_p"
12626
label: "top_p"
127-
desc: "The model will consider token results within top_p probability mass."
27+
desc: "Selects the minimum token set with cumulative probability reaching top_p during generation, excluding tokens outside the set, balancing diversity and reasonableness."
12828
type: "float"
129-
min: "0"
29+
min: "0.001"
13030
max: "1.0"
131-
default_val: "0.1"
132-
- name: "max_tokens"
133-
label: "max_tokens"
134-
desc: "Controls the maximum length of model output tokens. Typically, 100 tokens are about 150 Chinese characters."
135-
type: "int"
136-
min: "1"
137-
max: "8192"
138-
default_val: "2048"
139-
- name: "top_k"
140-
label: "top_k" # Displayed as a name on the front end
141-
desc: "Only sample from the top k tokens with the highest probability to limit the candidate range and improve generation stability." # Displayed as a description on the front end
142-
type: "int" # Required. Must be float, int, bool, string
143-
min: "1"
144-
max: "100"
145-
default_val: "50"
146-
- name: "frequency_penalty"
147-
label: "frequency_penalty" # Displayed as a name on the front end
148-
desc: "Penalizes generated tokens, with higher frequency resulting in higher penalties, suppressing repetitive content." # Displayed as a description on the front end
149-
type: "float" # Required. Must be float, int, bool, string
150-
min: "0"
151-
max: "2.0"
152-
default_val: "0"
153-
- name: "presence_penalty"
154-
label: "presence_penalty" # Displayed as a name on the front end
155-
desc: "Penalizes all tokens that have appeared, preventing the same content from appearing repeatedly, increasing content diversity." # Displayed as a description on the front end
156-
type: "float" # Required. Must be float, int, bool, string
157-
min: "0"
158-
max: "2.0"
159-
default_val: "0"
31+
default_val: "0.7"

release/deployment/helm-chart/charts/app/bootstrap/init/clickhouse/init-sql/evaluation.sql

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
-- Copyright (c) 2025 coze-dev Authors
22
-- SPDX-License-Identifier: Apache-2.0
33

4-
-- Create expt_turn_result_filter table for kubernetes environment
4+
-- Create expt_turn_result_filter table for docker environment
55
CREATE TABLE IF NOT EXISTS expt_turn_result_filter
66
(
77
`space_id` String,
@@ -24,9 +24,8 @@ CREATE TABLE IF NOT EXISTS expt_turn_result_filter
2424
INDEX idx_expt_id expt_id TYPE bloom_filter() GRANULARITY 1,
2525
INDEX idx_item_id item_id TYPE bloom_filter() GRANULARITY 1,
2626
INDEX idx_turn_id turn_id TYPE bloom_filter() GRANULARITY 1
27-
)
28-
ENGINE = ReplicatedReplacingMergeTree('/clickhouse/stone_dataengine_commercial/cozeloop_evaluation/{shard}', '{replica}')
29-
PARTITION BY created_date
30-
ORDER BY (expt_id, cityHash64(item_id), turn_id)
31-
SAMPLE BY cityHash64(item_id)
32-
SETTINGS index_granularity = 8192;
27+
)
28+
ENGINE = ReplacingMergeTree(updated_at)
29+
PARTITION BY created_date
30+
ORDER BY (expt_id, item_id, turn_id)
31+
SETTINGS index_granularity = 8192;

0 commit comments

Comments
 (0)