NotificationsYou must be signed in to change notification settings
Fork746
Star10.7k

Commit28ec67c

committed

Migrate searchEvaluationRuns

1 parente25f9b2 commit28ec67cCopy full SHA for 28ec67c

File tree

14 files changed

+552

-108

lines changed

gateway/src/routes
- internal.rs
internal/tensorzero-node/lib/bindings
tensorzero-core/src
- db
  - clickhouse
    - evaluation_queries.rs
  - evaluation_queries.rs
- endpoints/internal/evaluations
ui/app
- routes/evaluations
  - EvaluationRunsTable.tsx
  - route.tsx
- utils
  - clickhouse
    - evaluations.server.ts
    - evaluations.test.ts
  - tensorzero
    - tensorzero.ts

14 files changed

+552

-108

lines changed

`‎gateway/src/routes/internal.rs‎`

Lines changed: 4 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -67,4 +67,8 @@ pub fn build_internal_non_otel_enabled_routes() -> Router<AppStateData> {`
`67`	`67`	`"/internal/evaluations/run-stats",`
`68`	`68`	`get(endpoints::internal::evaluations::get_evaluation_run_stats_handler),`
`69`	`69`	`)`
	`70`	`+.route(`
	`71`	`+"/internal/evaluations/runs",`
	`72`	`+get(endpoints::internal::evaluations::list_evaluation_runs_handler),`
	`73`	`+)`
`70`	`74`	`}`

`‎internal/tensorzero-node/lib/bindings/EvaluationRunInfo.ts‎`

Lines changed: 13 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,13 @@`
	`1`	`+// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.`
	`2`	`+`
	`3`	`+/**`
	`4`	`+ * Information about a single evaluation run.`
	`5`	`+ */`
	`6`	`+exporttypeEvaluationRunInfo={`
	`7`	`+evaluation_run_id:string;`
	`8`	`+evaluation_name:string;`
	`9`	`+dataset_name:string;`
	`10`	`+function_name:string;`
	`11`	`+variant_name:string;`
	`12`	`+last_inference_timestamp:string;`
	`13`	`+};`

`‎internal/tensorzero-node/lib/bindings/ListEvaluationRunsResponse.ts‎`

Lines changed: 7 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,7 @@`
	`1`	`+// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.`
	`2`	`+importtype{EvaluationRunInfo}from"./EvaluationRunInfo";`
	`3`	`+`
	`4`	`+/**`
	`5`	`+ * Response containing a list of evaluation runs.`
	`6`	`+ */`
	`7`	`+exporttypeListEvaluationRunsResponse={runs:Array<EvaluationRunInfo>};`

`‎internal/tensorzero-node/lib/bindings/index.ts‎`

Lines changed: 2 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -76,6 +76,7 @@ export * from "./EvaluationRunCompleteEvent";`
`76`	`76`	`export*from"./EvaluationRunErrorEvent";`
`77`	`77`	`export*from"./EvaluationRunEvent";`
`78`	`78`	`export*from"./EvaluationRunFatalErrorEvent";`
	`79`	`+export*from"./EvaluationRunInfo";`
`79`	`80`	`export*from"./EvaluationRunStartEvent";`
`80`	`81`	`export*from"./EvaluationRunStatsResponse";`
`81`	`82`	`export*from"./EvaluationRunSuccessEvent";`
`@@ -152,6 +153,7 @@ export * from "./LaunchOptimizationParams";`
`152`	`153`	`export*from"./LaunchOptimizationWorkflowParams";`
`153`	`154`	`export*from"./ListDatapointsRequest";`
`154`	`155`	`export*from"./ListDatasetsResponse";`
	`156`	`+export*from"./ListEvaluationRunsResponse";`
`155`	`157`	`export*from"./ListInferencesRequest";`
`156`	`158`	`export*from"./MetricConfig";`
`157`	`159`	`export*from"./MetricConfigLevel";`

`‎tensorzero-core/src/db/clickhouse/evaluation_queries.rs‎`

Lines changed: 221 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -5,8 +5,9 @@ use std::collections::HashMap;`
`5`	`5`	`use async_trait::async_trait;`
`6`	`6`
`7`	`7`	`usesuper::ClickHouseConnectionInfo;`
`8`		`-usesuper::select_queries::parse_count;`
	`8`	`+usesuper::select_queries::{parse_count, parse_json_rows};`
`9`	`9`	`usecrate::db::evaluation_queries::EvaluationQueries;`
	`10`	`+usecrate::db::evaluation_queries::EvaluationRunInfoRow;`
`10`	`11`	`usecrate::error::Error;`
`11`	`12`
`12`	`13`	`#[async_trait]`
`@@ -21,4 +22,223 @@ impl EvaluationQueries for ClickHouseConnectionInfo {`
`21`	`22`	`let response =self.run_query_synchronous(query,&HashMap::new()).await?;`
`22`	`23`	`parse_count(&response.response)`
`23`	`24`	`}`
	`25`	`+`
	`26`	`+asyncfnlist_evaluation_runs(`
	`27`	`+&self,`
	`28`	`+limit:u32,`
	`29`	`+offset:u32,`
	`30`	`+) ->Result<Vec<EvaluationRunInfoRow>,Error>{`
	`31`	`+let query =r"`
	`32`	`+ SELECT`
	`33`	`+ evaluation_run_id,`
	`34`	`+ any(evaluation_name) AS evaluation_name,`
	`35`	`+ any(inference_function_name) AS function_name,`
	`36`	`+ any(variant_name) AS variant_name,`
	`37`	`+ any(dataset_name) AS dataset_name,`
	`38`	`+ formatDateTime(UUIDv7ToDateTime(uint_to_uuid(max(max_inference_id))), '%Y-%m-%dT%H:%i:%SZ') AS last_inference_timestamp`
	`39`	`+ FROM (`
	`40`	`+ SELECT`
	`41`	`+ maxIf(value, key = 'tensorzero::evaluation_run_id') AS evaluation_run_id,`
	`42`	`+ maxIf(value, key = 'tensorzero::evaluation_name') AS evaluation_name,`
	`43`	`+ maxIf(value, key = 'tensorzero::dataset_name') AS dataset_name,`
	`44`	`+ any(function_name) AS inference_function_name,`
	`45`	`+ any(variant_name) AS variant_name,`
	`46`	`+ max(toUInt128(inference_id)) AS max_inference_id`
	`47`	`+ FROM TagInference FINAL`
	`48`	`+ WHERE key IN ('tensorzero::evaluation_run_id', 'tensorzero::evaluation_name', 'tensorzero::dataset_name')`
	`49`	`+ GROUP BY inference_id`
	`50`	`+ )`
	`51`	`+ WHERE NOT startsWith(inference_function_name, 'tensorzero::')`
	`52`	`+ GROUP BY evaluation_run_id`
	`53`	`+ ORDER BY toUInt128(toUUID(evaluation_run_id)) DESC`
	`54`	`+ LIMIT {limit:UInt32}`
	`55`	`+ OFFSET {offset:UInt32}`
	`56`	`+ FORMAT JSONEachRow`
	`57`	`+ "`
	`58`	`+.to_string();`
	`59`	`+`
	`60`	`+let limit_str = limit.to_string();`
	`61`	`+let offset_str = offset.to_string();`
	`62`	`+letmut params =HashMap::new();`
	`63`	`+ params.insert("limit", limit_str.as_str());`
	`64`	`+ params.insert("offset", offset_str.as_str());`
	`65`	`+`
	`66`	`+let response =self.run_query_synchronous(query,&params).await?;`
	`67`	`+`
	`68`	`+parse_json_rows(response.response.as_str())`
	`69`	`+}`
	`70`	`+}`
	`71`	`+`
	`72`	`+#[cfg(test)]`
	`73`	`+mod tests{`
	`74`	`+use std::sync::Arc;`
	`75`	`+`
	`76`	`+usecrate::db::{`
	`77`	`+ clickhouse::{`
	`78`	`+ClickHouseConnectionInfo,ClickHouseResponse,ClickHouseResponseMetadata,`
	`79`	`+ clickhouse_client::MockClickHouseClient,`
	`80`	`+ query_builder::test_util::assert_query_contains,`
	`81`	`+},`
	`82`	`+ evaluation_queries::EvaluationQueries,`
	`83`	`+};`
	`84`	`+`
	`85`	`+#[tokio::test]`
	`86`	`+asyncfntest_count_total_evaluation_runs(){`
	`87`	`+letmut mock_clickhouse_client =MockClickHouseClient::new();`
	`88`	`+`
	`89`	`+ mock_clickhouse_client`
	`90`	`+.expect_run_query_synchronous()`
	`91`	`+.withf(\|query, params\|{`
	`92`	`+assert_query_contains(`
	`93`	`+ query,`
	`94`	`+"SELECT toUInt32(uniqExact(value)) as count`
	`95`	`+ FROM TagInference`
	`96`	`+ WHERE key = 'tensorzero::evaluation_run_id'`
	`97`	`+ FORMAT JSONEachRow",`
	`98`	`+);`
	`99`	`+assert_eq!(params.len(),0,"Should have no parameters");`
	`100`	`+true`
	`101`	`+})`
	`102`	`+.returning(\|_, _\|{`
	`103`	`+Ok(ClickHouseResponse{`
	`104`	`+response:r#"{"count":42}"#.to_string(),`
	`105`	`+metadata:ClickHouseResponseMetadata{`
	`106`	`+read_rows:1,`
	`107`	`+written_rows:0,`
	`108`	`+},`
	`109`	`+})`
	`110`	`+});`
	`111`	`+`
	`112`	`+let conn =ClickHouseConnectionInfo::new_mock(Arc::new(mock_clickhouse_client));`
	`113`	`+`
	`114`	`+let result = conn.count_total_evaluation_runs().await.unwrap();`
	`115`	`+`
	`116`	`+assert_eq!(result,42,"Should return count of 42");`
	`117`	`+}`
	`118`	`+`
	`119`	`+#[tokio::test]`
	`120`	`+asyncfntest_list_evaluation_runs_with_defaults(){`
	`121`	`+letmut mock_clickhouse_client =MockClickHouseClient::new();`
	`122`	`+`
	`123`	`+ mock_clickhouse_client`
	`124`	`+.expect_run_query_synchronous()`
	`125`	`+.withf(\|query, params\|{`
	`126`	`+// Verify the query contains the expected structure`
	`127`	`+assert_query_contains(query,"SELECT");`
	`128`	`+assert_query_contains(query,"evaluation_run_id");`
	`129`	`+assert_query_contains(query,"FROM TagInference FINAL");`
	`130`	`+assert_query_contains(query,"LIMIT {limit:UInt32}");`
	`131`	`+assert_query_contains(query,"OFFSET {offset:UInt32}");`
	`132`	`+`
	`133`	`+// Verify parameters`
	`134`	`+assert_eq!(params.get("limit"),Some(&"100"));`
	`135`	`+assert_eq!(params.get("offset"),Some(&"0"));`
	`136`	`+true`
	`137`	`+})`
	`138`	`+.returning(\|_, _\|{`
	`139`	`+Ok(ClickHouseResponse{`
	`140`	`+response:r#"{"evaluation_run_id":"0196ee9c-d808-74f3-8000-02ec7409b95d","evaluation_name":"test_eval","function_name":"test_func","variant_name":"test_variant","dataset_name":"test_dataset","last_inference_timestamp":"2025-05-20T16:52:58Z"}"#.to_string(),`
	`141`	`+metadata:ClickHouseResponseMetadata{`
	`142`	`+read_rows:1,`
	`143`	`+written_rows:0,`
	`144`	`+},`
	`145`	`+})`
	`146`	`+});`
	`147`	`+`
	`148`	`+let conn =ClickHouseConnectionInfo::new_mock(Arc::new(mock_clickhouse_client));`
	`149`	`+`
	`150`	`+let result = conn.list_evaluation_runs(100,0).await.unwrap();`
	`151`	`+`
	`152`	`+assert_eq!(result.len(),1,"Should return one evaluation run");`
	`153`	`+assert_eq!(result[0].evaluation_name,"test_eval");`
	`154`	`+assert_eq!(result[0].function_name,"test_func");`
	`155`	`+assert_eq!(result[0].variant_name,"test_variant");`
	`156`	`+assert_eq!(result[0].dataset_name,"test_dataset");`
	`157`	`+}`
	`158`	`+`
	`159`	`+#[tokio::test]`
	`160`	`+asyncfntest_list_evaluation_runs_with_custom_pagination(){`
	`161`	`+letmut mock_clickhouse_client =MockClickHouseClient::new();`
	`162`	`+`
	`163`	`+ mock_clickhouse_client`
	`164`	`+.expect_run_query_synchronous()`
	`165`	`+.withf(\|_query, params\|{`
	`166`	`+// Verify custom pagination parameters`
	`167`	`+assert_eq!(params.get("limit"),Some(&"50"));`
	`168`	`+assert_eq!(params.get("offset"),Some(&"100"));`
	`169`	`+true`
	`170`	`+})`
	`171`	`+.returning(\|_, _\|{`
	`172`	`+Ok(ClickHouseResponse{`
	`173`	`+response:String::new(),`
	`174`	`+metadata:ClickHouseResponseMetadata{`
	`175`	`+read_rows:0,`
	`176`	`+written_rows:0,`
	`177`	`+},`
	`178`	`+})`
	`179`	`+});`
	`180`	`+`
	`181`	`+let conn =ClickHouseConnectionInfo::new_mock(Arc::new(mock_clickhouse_client));`
	`182`	`+`
	`183`	`+let result = conn.list_evaluation_runs(50,100).await.unwrap();`
	`184`	`+`
	`185`	`+assert_eq!(result.len(),0,"Should return empty results");`
	`186`	`+}`
	`187`	`+`
	`188`	`+#[tokio::test]`
	`189`	`+asyncfntest_list_evaluation_runs_multiple_results(){`
	`190`	`+letmut mock_clickhouse_client =MockClickHouseClient::new();`
	`191`	`+`
	`192`	`+ mock_clickhouse_client`
	`193`	`+.expect_run_query_synchronous()`
	`194`	`+.returning(\|_, _\|{`
	`195`	`+Ok(ClickHouseResponse{`
	`196`	`+response:r#"{"evaluation_run_id":"0196ee9c-d808-74f3-8000-02ec7409b95d","evaluation_name":"eval1","function_name":"func1","variant_name":"variant1","dataset_name":"dataset1","last_inference_timestamp":"2025-05-20T16:52:58Z"}`
	`197`	`+{"evaluation_run_id":"0196ee9c-d808-74f3-8000-02ec7409b95e","evaluation_name":"eval2","function_name":"func2","variant_name":"variant2","dataset_name":"dataset2","last_inference_timestamp":"2025-05-20T17:52:58Z"}`
	`198`	`+{"evaluation_run_id":"0196ee9c-d808-74f3-8000-02ec7409b95f","evaluation_name":"eval3","function_name":"func3","variant_name":"variant3","dataset_name":"dataset3","last_inference_timestamp":"2025-05-20T18:52:58Z"}"#.to_string(),`
	`199`	`+metadata:ClickHouseResponseMetadata{`
	`200`	`+read_rows:3,`
	`201`	`+written_rows:0,`
	`202`	`+},`
	`203`	`+})`
	`204`	`+});`
	`205`	`+`
	`206`	`+let conn =ClickHouseConnectionInfo::new_mock(Arc::new(mock_clickhouse_client));`
	`207`	`+`
	`208`	`+let result = conn.list_evaluation_runs(100,0).await.unwrap();`
	`209`	`+`
	`210`	`+assert_eq!(result.len(),3,"Should return three evaluation runs");`
	`211`	`+assert_eq!(result[0].evaluation_name,"eval1");`
	`212`	`+assert_eq!(result[1].evaluation_name,"eval2");`
	`213`	`+assert_eq!(result[2].evaluation_name,"eval3");`
	`214`	`+}`
	`215`	`+`
	`216`	`+#[tokio::test]`
	`217`	`+asyncfntest_list_evaluation_runs_filters_out_tensorzero_functions(){`
	`218`	`+letmut mock_clickhouse_client =MockClickHouseClient::new();`
	`219`	`+`
	`220`	`+ mock_clickhouse_client`
	`221`	`+.expect_run_query_synchronous()`
	`222`	`+.withf(\|query, _params\|{`
	`223`	`+// Verify the query filters out tensorzero:: functions`
	`224`	`+assert_query_contains(`
	`225`	`+ query,`
	`226`	`+"NOT startsWith(inference_function_name, 'tensorzero::')",`
	`227`	`+);`
	`228`	`+true`
	`229`	`+})`
	`230`	`+.returning(\|_, _\|{`
	`231`	`+Ok(ClickHouseResponse{`
	`232`	`+response:String::new(),`
	`233`	`+metadata:ClickHouseResponseMetadata{`
	`234`	`+read_rows:0,`
	`235`	`+written_rows:0,`
	`236`	`+},`
	`237`	`+})`
	`238`	`+});`
	`239`	`+`
	`240`	`+let conn =ClickHouseConnectionInfo::new_mock(Arc::new(mock_clickhouse_client));`
	`241`	`+`
	`242`	`+let _result = conn.list_evaluation_runs(100,0).await.unwrap();`
	`243`	`+}`
`24`	`244`	`}`

`‎tensorzero-core/src/db/evaluation_queries.rs‎`

Lines changed: 25 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -2,11 +2,36 @@`
`2`	`2`
`3`	`3`	`use async_trait::async_trait;`
`4`	`4`
	`5`	`+use chrono::{DateTime,Utc};`
	`6`	`+#[cfg(test)]`
	`7`	`+use mockall::automock;`
	`8`	`+use serde::Deserialize;`
	`9`	`+use uuid::Uuid;`
	`10`	`+`
`5`	`11`	`usecrate::error::Error;`
`6`	`12`
	`13`	`+/// Database struct for deserializing evaluation run info from ClickHouse.`
	`14`	`+#[derive(Debug,Deserialize)]`
	`15`	`+pubstructEvaluationRunInfoRow{`
	`16`	`+pubevaluation_run_id:Uuid,`
	`17`	`+pubevaluation_name:String,`
	`18`	`+pubfunction_name:String,`
	`19`	`+pubvariant_name:String,`
	`20`	`+pubdataset_name:String,`
	`21`	`+publast_inference_timestamp:DateTime<Utc>,`
	`22`	`+}`
	`23`	`+`
`7`	`24`	`/// Trait for evaluation-related queries.`
`8`	`25`	`#[async_trait]`
	`26`	`+#[cfg_attr(test, automock)]`
`9`	`27`	`pubtraitEvaluationQueries{`
`10`	`28`	`/// Counts the total number of unique evaluation runs across all functions.`
`11`	`29`	`asyncfncount_total_evaluation_runs(&self) ->Result<u64,Error>;`
	`30`	`+`
	`31`	`+/// Lists evaluation runs with pagination.`
	`32`	`+asyncfnlist_evaluation_runs(`
	`33`	`+&self,`
	`34`	`+limit:u32,`
	`35`	`+offset:u32,`
	`36`	`+) ->Result<Vec<EvaluationRunInfoRow>,Error>;`
`12`	`37`	`}`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit28ec67c

File tree

14 files changed

14 files changed

`‎gateway/src/routes/internal.rs‎`

`‎internal/tensorzero-node/lib/bindings/EvaluationRunInfo.ts‎`

`‎internal/tensorzero-node/lib/bindings/ListEvaluationRunsResponse.ts‎`

`‎internal/tensorzero-node/lib/bindings/index.ts‎`

`‎tensorzero-core/src/db/clickhouse/evaluation_queries.rs‎`

`‎tensorzero-core/src/db/evaluation_queries.rs‎`

0 commit comments