Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit28ec67c

Browse files
committed
Migrate searchEvaluationRuns
1 parente25f9b2 commit28ec67c

File tree

14 files changed

+552
-108
lines changed

14 files changed

+552
-108
lines changed

‎gateway/src/routes/internal.rs‎

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,4 +67,8 @@ pub fn build_internal_non_otel_enabled_routes() -> Router<AppStateData> {
6767
"/internal/evaluations/run-stats",
6868
get(endpoints::internal::evaluations::get_evaluation_run_stats_handler),
6969
)
70+
.route(
71+
"/internal/evaluations/runs",
72+
get(endpoints::internal::evaluations::list_evaluation_runs_handler),
73+
)
7074
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
2+
3+
/**
4+
* Information about a single evaluation run.
5+
*/
6+
exporttypeEvaluationRunInfo={
7+
evaluation_run_id:string;
8+
evaluation_name:string;
9+
dataset_name:string;
10+
function_name:string;
11+
variant_name:string;
12+
last_inference_timestamp:string;
13+
};
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
2+
importtype{EvaluationRunInfo}from"./EvaluationRunInfo";
3+
4+
/**
5+
* Response containing a list of evaluation runs.
6+
*/
7+
exporttypeListEvaluationRunsResponse={runs:Array<EvaluationRunInfo>};

‎internal/tensorzero-node/lib/bindings/index.ts‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ export * from "./EvaluationRunCompleteEvent";
7676
export*from"./EvaluationRunErrorEvent";
7777
export*from"./EvaluationRunEvent";
7878
export*from"./EvaluationRunFatalErrorEvent";
79+
export*from"./EvaluationRunInfo";
7980
export*from"./EvaluationRunStartEvent";
8081
export*from"./EvaluationRunStatsResponse";
8182
export*from"./EvaluationRunSuccessEvent";
@@ -152,6 +153,7 @@ export * from "./LaunchOptimizationParams";
152153
export*from"./LaunchOptimizationWorkflowParams";
153154
export*from"./ListDatapointsRequest";
154155
export*from"./ListDatasetsResponse";
156+
export*from"./ListEvaluationRunsResponse";
155157
export*from"./ListInferencesRequest";
156158
export*from"./MetricConfig";
157159
export*from"./MetricConfigLevel";

‎tensorzero-core/src/db/clickhouse/evaluation_queries.rs‎

Lines changed: 221 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@ use std::collections::HashMap;
55
use async_trait::async_trait;
66

77
usesuper::ClickHouseConnectionInfo;
8-
usesuper::select_queries::parse_count;
8+
usesuper::select_queries::{parse_count, parse_json_rows};
99
usecrate::db::evaluation_queries::EvaluationQueries;
10+
usecrate::db::evaluation_queries::EvaluationRunInfoRow;
1011
usecrate::error::Error;
1112

1213
#[async_trait]
@@ -21,4 +22,223 @@ impl EvaluationQueries for ClickHouseConnectionInfo {
2122
let response =self.run_query_synchronous(query,&HashMap::new()).await?;
2223
parse_count(&response.response)
2324
}
25+
26+
asyncfnlist_evaluation_runs(
27+
&self,
28+
limit:u32,
29+
offset:u32,
30+
) ->Result<Vec<EvaluationRunInfoRow>,Error>{
31+
let query =r"
32+
SELECT
33+
evaluation_run_id,
34+
any(evaluation_name) AS evaluation_name,
35+
any(inference_function_name) AS function_name,
36+
any(variant_name) AS variant_name,
37+
any(dataset_name) AS dataset_name,
38+
formatDateTime(UUIDv7ToDateTime(uint_to_uuid(max(max_inference_id))), '%Y-%m-%dT%H:%i:%SZ') AS last_inference_timestamp
39+
FROM (
40+
SELECT
41+
maxIf(value, key = 'tensorzero::evaluation_run_id') AS evaluation_run_id,
42+
maxIf(value, key = 'tensorzero::evaluation_name') AS evaluation_name,
43+
maxIf(value, key = 'tensorzero::dataset_name') AS dataset_name,
44+
any(function_name) AS inference_function_name,
45+
any(variant_name) AS variant_name,
46+
max(toUInt128(inference_id)) AS max_inference_id
47+
FROM TagInference FINAL
48+
WHERE key IN ('tensorzero::evaluation_run_id', 'tensorzero::evaluation_name', 'tensorzero::dataset_name')
49+
GROUP BY inference_id
50+
)
51+
WHERE NOT startsWith(inference_function_name, 'tensorzero::')
52+
GROUP BY evaluation_run_id
53+
ORDER BY toUInt128(toUUID(evaluation_run_id)) DESC
54+
LIMIT {limit:UInt32}
55+
OFFSET {offset:UInt32}
56+
FORMAT JSONEachRow
57+
"
58+
.to_string();
59+
60+
let limit_str = limit.to_string();
61+
let offset_str = offset.to_string();
62+
letmut params =HashMap::new();
63+
params.insert("limit", limit_str.as_str());
64+
params.insert("offset", offset_str.as_str());
65+
66+
let response =self.run_query_synchronous(query,&params).await?;
67+
68+
parse_json_rows(response.response.as_str())
69+
}
70+
}
71+
72+
#[cfg(test)]
73+
mod tests{
74+
use std::sync::Arc;
75+
76+
usecrate::db::{
77+
clickhouse::{
78+
ClickHouseConnectionInfo,ClickHouseResponse,ClickHouseResponseMetadata,
79+
clickhouse_client::MockClickHouseClient,
80+
query_builder::test_util::assert_query_contains,
81+
},
82+
evaluation_queries::EvaluationQueries,
83+
};
84+
85+
#[tokio::test]
86+
asyncfntest_count_total_evaluation_runs(){
87+
letmut mock_clickhouse_client =MockClickHouseClient::new();
88+
89+
mock_clickhouse_client
90+
.expect_run_query_synchronous()
91+
.withf(|query, params|{
92+
assert_query_contains(
93+
query,
94+
"SELECT toUInt32(uniqExact(value)) as count
95+
FROM TagInference
96+
WHERE key = 'tensorzero::evaluation_run_id'
97+
FORMAT JSONEachRow",
98+
);
99+
assert_eq!(params.len(),0,"Should have no parameters");
100+
true
101+
})
102+
.returning(|_, _|{
103+
Ok(ClickHouseResponse{
104+
response:r#"{"count":42}"#.to_string(),
105+
metadata:ClickHouseResponseMetadata{
106+
read_rows:1,
107+
written_rows:0,
108+
},
109+
})
110+
});
111+
112+
let conn =ClickHouseConnectionInfo::new_mock(Arc::new(mock_clickhouse_client));
113+
114+
let result = conn.count_total_evaluation_runs().await.unwrap();
115+
116+
assert_eq!(result,42,"Should return count of 42");
117+
}
118+
119+
#[tokio::test]
120+
asyncfntest_list_evaluation_runs_with_defaults(){
121+
letmut mock_clickhouse_client =MockClickHouseClient::new();
122+
123+
mock_clickhouse_client
124+
.expect_run_query_synchronous()
125+
.withf(|query, params|{
126+
// Verify the query contains the expected structure
127+
assert_query_contains(query,"SELECT");
128+
assert_query_contains(query,"evaluation_run_id");
129+
assert_query_contains(query,"FROM TagInference FINAL");
130+
assert_query_contains(query,"LIMIT {limit:UInt32}");
131+
assert_query_contains(query,"OFFSET {offset:UInt32}");
132+
133+
// Verify parameters
134+
assert_eq!(params.get("limit"),Some(&"100"));
135+
assert_eq!(params.get("offset"),Some(&"0"));
136+
true
137+
})
138+
.returning(|_, _|{
139+
Ok(ClickHouseResponse{
140+
response:r#"{"evaluation_run_id":"0196ee9c-d808-74f3-8000-02ec7409b95d","evaluation_name":"test_eval","function_name":"test_func","variant_name":"test_variant","dataset_name":"test_dataset","last_inference_timestamp":"2025-05-20T16:52:58Z"}"#.to_string(),
141+
metadata:ClickHouseResponseMetadata{
142+
read_rows:1,
143+
written_rows:0,
144+
},
145+
})
146+
});
147+
148+
let conn =ClickHouseConnectionInfo::new_mock(Arc::new(mock_clickhouse_client));
149+
150+
let result = conn.list_evaluation_runs(100,0).await.unwrap();
151+
152+
assert_eq!(result.len(),1,"Should return one evaluation run");
153+
assert_eq!(result[0].evaluation_name,"test_eval");
154+
assert_eq!(result[0].function_name,"test_func");
155+
assert_eq!(result[0].variant_name,"test_variant");
156+
assert_eq!(result[0].dataset_name,"test_dataset");
157+
}
158+
159+
#[tokio::test]
160+
asyncfntest_list_evaluation_runs_with_custom_pagination(){
161+
letmut mock_clickhouse_client =MockClickHouseClient::new();
162+
163+
mock_clickhouse_client
164+
.expect_run_query_synchronous()
165+
.withf(|_query, params|{
166+
// Verify custom pagination parameters
167+
assert_eq!(params.get("limit"),Some(&"50"));
168+
assert_eq!(params.get("offset"),Some(&"100"));
169+
true
170+
})
171+
.returning(|_, _|{
172+
Ok(ClickHouseResponse{
173+
response:String::new(),
174+
metadata:ClickHouseResponseMetadata{
175+
read_rows:0,
176+
written_rows:0,
177+
},
178+
})
179+
});
180+
181+
let conn =ClickHouseConnectionInfo::new_mock(Arc::new(mock_clickhouse_client));
182+
183+
let result = conn.list_evaluation_runs(50,100).await.unwrap();
184+
185+
assert_eq!(result.len(),0,"Should return empty results");
186+
}
187+
188+
#[tokio::test]
189+
asyncfntest_list_evaluation_runs_multiple_results(){
190+
letmut mock_clickhouse_client =MockClickHouseClient::new();
191+
192+
mock_clickhouse_client
193+
.expect_run_query_synchronous()
194+
.returning(|_, _|{
195+
Ok(ClickHouseResponse{
196+
response:r#"{"evaluation_run_id":"0196ee9c-d808-74f3-8000-02ec7409b95d","evaluation_name":"eval1","function_name":"func1","variant_name":"variant1","dataset_name":"dataset1","last_inference_timestamp":"2025-05-20T16:52:58Z"}
197+
{"evaluation_run_id":"0196ee9c-d808-74f3-8000-02ec7409b95e","evaluation_name":"eval2","function_name":"func2","variant_name":"variant2","dataset_name":"dataset2","last_inference_timestamp":"2025-05-20T17:52:58Z"}
198+
{"evaluation_run_id":"0196ee9c-d808-74f3-8000-02ec7409b95f","evaluation_name":"eval3","function_name":"func3","variant_name":"variant3","dataset_name":"dataset3","last_inference_timestamp":"2025-05-20T18:52:58Z"}"#.to_string(),
199+
metadata:ClickHouseResponseMetadata{
200+
read_rows:3,
201+
written_rows:0,
202+
},
203+
})
204+
});
205+
206+
let conn =ClickHouseConnectionInfo::new_mock(Arc::new(mock_clickhouse_client));
207+
208+
let result = conn.list_evaluation_runs(100,0).await.unwrap();
209+
210+
assert_eq!(result.len(),3,"Should return three evaluation runs");
211+
assert_eq!(result[0].evaluation_name,"eval1");
212+
assert_eq!(result[1].evaluation_name,"eval2");
213+
assert_eq!(result[2].evaluation_name,"eval3");
214+
}
215+
216+
#[tokio::test]
217+
asyncfntest_list_evaluation_runs_filters_out_tensorzero_functions(){
218+
letmut mock_clickhouse_client =MockClickHouseClient::new();
219+
220+
mock_clickhouse_client
221+
.expect_run_query_synchronous()
222+
.withf(|query, _params|{
223+
// Verify the query filters out tensorzero:: functions
224+
assert_query_contains(
225+
query,
226+
"NOT startsWith(inference_function_name, 'tensorzero::')",
227+
);
228+
true
229+
})
230+
.returning(|_, _|{
231+
Ok(ClickHouseResponse{
232+
response:String::new(),
233+
metadata:ClickHouseResponseMetadata{
234+
read_rows:0,
235+
written_rows:0,
236+
},
237+
})
238+
});
239+
240+
let conn =ClickHouseConnectionInfo::new_mock(Arc::new(mock_clickhouse_client));
241+
242+
let _result = conn.list_evaluation_runs(100,0).await.unwrap();
243+
}
24244
}

‎tensorzero-core/src/db/evaluation_queries.rs‎

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,36 @@
22
33
use async_trait::async_trait;
44

5+
use chrono::{DateTime,Utc};
6+
#[cfg(test)]
7+
use mockall::automock;
8+
use serde::Deserialize;
9+
use uuid::Uuid;
10+
511
usecrate::error::Error;
612

13+
/// Database struct for deserializing evaluation run info from ClickHouse.
14+
#[derive(Debug,Deserialize)]
15+
pubstructEvaluationRunInfoRow{
16+
pubevaluation_run_id:Uuid,
17+
pubevaluation_name:String,
18+
pubfunction_name:String,
19+
pubvariant_name:String,
20+
pubdataset_name:String,
21+
publast_inference_timestamp:DateTime<Utc>,
22+
}
23+
724
/// Trait for evaluation-related queries.
825
#[async_trait]
26+
#[cfg_attr(test, automock)]
927
pubtraitEvaluationQueries{
1028
/// Counts the total number of unique evaluation runs across all functions.
1129
asyncfncount_total_evaluation_runs(&self) ->Result<u64,Error>;
30+
31+
/// Lists evaluation runs with pagination.
32+
asyncfnlist_evaluation_runs(
33+
&self,
34+
limit:u32,
35+
offset:u32,
36+
) ->Result<Vec<EvaluationRunInfoRow>,Error>;
1237
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp