@@ -151,6 +151,7 @@ pub async fn build_search_query(
151
151
score_cte_non_recursive
152
152
. from_as ( embeddings_table. to_table_tuple ( ) , Alias :: new ( "embeddings" ) )
153
153
. column ( ( SIden :: Str ( "documents" ) , SIden :: Str ( "id" ) ) )
154
+ . column ( ( SIden :: Str ( "chunks" ) , SIden :: Str ( "chunk" ) ) )
154
155
. join_as (
155
156
JoinType :: InnerJoin ,
156
157
chunks_table. to_table_tuple ( ) ,
@@ -177,6 +178,7 @@ pub async fn build_search_query(
177
178
score_cte_recurisive
178
179
. from_as ( embeddings_table. to_table_tuple ( ) , Alias :: new ( "embeddings" ) )
179
180
. column ( ( SIden :: Str ( "documents" ) , SIden :: Str ( "id" ) ) )
181
+ . column ( ( SIden :: Str ( "chunks" ) , SIden :: Str ( "chunk" ) ) )
180
182
. expr ( Expr :: cust ( format ! ( r#""{cte_name}".previous_document_ids || documents.id"# ) ) )
181
183
. expr ( Expr :: cust ( format ! (
182
184
r#"(1 - (embeddings.embedding <=> (SELECT embedding FROM "{key}_embedding")::vector)) * {boost} AS score"#
@@ -233,6 +235,7 @@ pub async fn build_search_query(
233
235
score_cte_non_recursive
234
236
. from_as ( embeddings_table. to_table_tuple ( ) , Alias :: new ( "embeddings" ) )
235
237
. column ( ( SIden :: Str ( "documents" ) , SIden :: Str ( "id" ) ) )
238
+ . column ( ( SIden :: Str ( "chunks" ) , SIden :: Str ( "chunk" ) ) )
236
239
. expr ( Expr :: cust ( "ARRAY[documents.id] as previous_document_ids" ) )
237
240
. expr ( Expr :: cust_with_values (
238
241
format ! ( "(1 - (embeddings.embedding <=> $1::vector)) * {boost} AS score" ) ,
@@ -269,6 +272,7 @@ pub async fn build_search_query(
269
272
Expr :: cust ( "1 = 1" ) ,
270
273
)
271
274
. column ( ( SIden :: Str ( "documents" ) , SIden :: Str ( "id" ) ) )
275
+ . column ( ( SIden :: Str ( "chunks" ) , SIden :: Str ( "chunk" ) ) )
272
276
. expr ( Expr :: cust ( format ! (
273
277
r#""{cte_name}".previous_document_ids || documents.id"#
274
278
) ) )
@@ -324,6 +328,7 @@ pub async fn build_search_query(
324
328
let mut row_number_pre_rerank =Query :: select ( ) ;
325
329
row_number_pre_rerank
326
330
. column ( SIden :: Str ( "id" ) )
331
+ . column ( SIden :: Str ( "chunk" ) )
327
332
. from ( SIden :: String ( cte_name. clone ( ) ) )
328
333
. expr_as ( Expr :: cust ( "ROW_NUMBER() OVER ()" ) , Alias :: new ( "row_number" ) )
329
334
. limit ( rerank. num_documents_to_rerank ) ;
@@ -335,7 +340,10 @@ pub async fn build_search_query(
335
340
// Our actual CTE
336
341
let mut query =Query :: select ( ) ;
337
342
query. column ( SIden :: Str ( "id" ) ) ;
338
- query. expr_as ( Expr :: cust ( "(rank).score" ) , Alias :: new ( "score" ) ) ;
343
+ query. expr_as (
344
+ Expr :: cust ( format ! ( "(rank).score * {boost}" ) ) ,
345
+ Alias :: new ( "score" ) ,
346
+ ) ;
339
347
340
348
// Build the actual CTE
341
349
let mut sub_query_rank_call =Query :: select ( ) ;
@@ -347,14 +355,7 @@ pub async fn build_search_query(
347
355
format ! ( r#"pgml.rank($1, $2, array_agg("chunk"), '{{"return_documents": false, "top_k": {}}}'::jsonb || $3)"# , valid_query. limit) ,
348
356
[ model_expr, query_expr, parameters_expr] ,
349
357
) , Alias :: new ( "rank" ) )
350
- . from ( SIden :: String ( format ! ( "row_number_{cte_name}" ) ) )
351
- . join_as (
352
- JoinType :: InnerJoin ,
353
- chunks_table. to_table_tuple ( ) ,
354
- Alias :: new ( "chunks" ) ,
355
- Expr :: col ( ( SIden :: Str ( "chunks" ) , SIden :: Str ( "id" ) ) )
356
- . equals ( ( SIden :: String ( format ! ( "row_number_{cte_name}" ) ) , SIden :: Str ( "id" ) ) ) ,
357
- ) ;
358
+ . from ( SIden :: String ( format ! ( "row_number_{cte_name}" ) ) ) ;
358
359
359
360
let mut sub_query =Query :: select ( ) ;
360
361
sub_query
@@ -403,6 +404,7 @@ pub async fn build_search_query(
403
404
404
405
let mut score_cte_non_recursive =Query :: select ( )
405
406
. column ( ( SIden :: Str ( "documents" ) , SIden :: Str ( "id" ) ) )
407
+ . column ( ( SIden :: Str ( "chunks" ) , SIden :: Str ( "chunk" ) ) )
406
408
. expr_as (
407
409
Expr :: cust_with_values (
408
410
format ! (
@@ -445,6 +447,7 @@ pub async fn build_search_query(
445
447
446
448
let mut score_cte_recursive =Query :: select ( )
447
449
. column ( ( SIden :: Str ( "documents" ) , SIden :: Str ( "id" ) ) )
450
+ . column ( ( SIden :: Str ( "chunks" ) , SIden :: Str ( "chunk" ) ) )
448
451
. expr_as (
449
452
Expr :: cust_with_values (
450
453
format ! (
@@ -514,6 +517,7 @@ pub async fn build_search_query(
514
517
let mut row_number_pre_rerank =Query :: select ( ) ;
515
518
row_number_pre_rerank
516
519
. column ( SIden :: Str ( "id" ) )
520
+ . column ( SIden :: Str ( "chunk" ) )
517
521
. from ( SIden :: String ( cte_name. clone ( ) ) )
518
522
. expr_as ( Expr :: cust ( "ROW_NUMBER() OVER ()" ) , Alias :: new ( "row_number" ) )
519
523
. limit ( rerank. num_documents_to_rerank ) ;
@@ -525,7 +529,10 @@ pub async fn build_search_query(
525
529
// Our actual CTE
526
530
let mut query =Query :: select ( ) ;
527
531
query. column ( SIden :: Str ( "id" ) ) ;
528
- query. expr_as ( Expr :: cust ( "(rank).score" ) , Alias :: new ( "score" ) ) ;
532
+ query. expr_as (
533
+ Expr :: cust ( format ! ( "(rank).score * {boost}" ) ) ,
534
+ Alias :: new ( "score" ) ,
535
+ ) ;
529
536
530
537
// Build the actual CTE
531
538
let mut sub_query_rank_call =Query :: select ( ) ;
@@ -537,14 +544,7 @@ pub async fn build_search_query(
537
544
format ! ( r#"pgml.rank($1, $2, array_agg("chunk"), '{{"return_documents": false, "top_k": {}}}'::jsonb || $3)"# , valid_query. limit) ,
538
545
[ model_expr, query_expr, parameters_expr] ,
539
546
) , Alias :: new ( "rank" ) )
540
- . from ( SIden :: String ( format ! ( "row_number_{cte_name}" ) ) )
541
- . join_as (
542
- JoinType :: InnerJoin ,
543
- chunks_table. to_table_tuple ( ) ,
544
- Alias :: new ( "chunks" ) ,
545
- Expr :: col ( ( SIden :: Str ( "chunks" ) , SIden :: Str ( "id" ) ) )
546
- . equals ( ( SIden :: String ( format ! ( "row_number_{cte_name}" ) ) , SIden :: Str ( "id" ) ) ) ,
547
- ) ;
547
+ . from ( SIden :: String ( format ! ( "row_number_{cte_name}" ) ) ) ;
548
548
549
549
let mut sub_query =Query :: select ( ) ;
550
550
sub_query