- Notifications
You must be signed in to change notification settings - Fork328
SDK - Added re-ranking into vector search#1516
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.
Already on GitHub?Sign in to your account
Uh oh!
There was an error while loading.Please reload this page.
Changes fromall commits
File filter
Filter by extension
Conversations
Uh oh!
There was an error while loading.Please reload this page.
Jump to
Uh oh!
There was an error while loading.Please reload this page.
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more abouthow customized files appear on GitHub.
Uh oh!
There was an error while loading.Please reload this page.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1553,6 +1553,88 @@ mod tests { | ||
Ok(()) | ||
} | ||
#[tokio::test] | ||
async fn can_vector_search_with_local_embeddings_and_rerank() -> anyhow::Result<()> { | ||
internal_init_logger(None, None).ok(); | ||
let collection_name = "test r_c_cvswlear_1"; | ||
let mut collection = Collection::new(collection_name, None)?; | ||
let documents = generate_dummy_documents(10); | ||
collection.upsert_documents(documents.clone(), None).await?; | ||
let pipeline_name = "0"; | ||
let mut pipeline = Pipeline::new( | ||
pipeline_name, | ||
Some( | ||
json!({ | ||
"title": { | ||
"semantic_search": { | ||
"model": "intfloat/e5-small-v2", | ||
"parameters": { | ||
"prompt": "passage: " | ||
} | ||
}, | ||
"full_text_search": { | ||
"configuration": "english" | ||
} | ||
}, | ||
"body": { | ||
"splitter": { | ||
"model": "recursive_character" | ||
}, | ||
"semantic_search": { | ||
"model": "intfloat/e5-small-v2", | ||
"parameters": { | ||
"prompt": "passage: " | ||
} | ||
}, | ||
}, | ||
}) | ||
.into(), | ||
), | ||
)?; | ||
collection.add_pipeline(&mut pipeline).await?; | ||
let results = collection | ||
.vector_search( | ||
json!({ | ||
"query": { | ||
"fields": { | ||
"title": { | ||
"query": "Test document: 2", | ||
"parameters": { | ||
"prompt": "passage: " | ||
}, | ||
"full_text_filter": "test", | ||
"boost": 1.2 | ||
}, | ||
"body": { | ||
"query": "Test document: 2", | ||
"parameters": { | ||
"prompt": "passage: " | ||
}, | ||
"boost": 1.0 | ||
}, | ||
} | ||
}, | ||
ContributorAuthor
| ||
"rerank": { | ||
"query": "Test document 2", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. Seems like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. Got it, I will think more on making that optional and reusing it, but will merge this and get it out in the meantime. | ||
"model": "mixedbread-ai/mxbai-rerank-base-v1", | ||
"num_documents_to_rerank": 100 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. What about calling this just There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. Oh sorry missed this before merging. I think it might be a little confusing if we make it limit as we already have a limit key, and this isn't actually the limit. We already defined limit with llama index to mean the final number of items returned, but I'm not sure if they or langchain use it elsewhere. | ||
}, | ||
"limit": 5 | ||
}) | ||
.into(), | ||
&mut pipeline, | ||
) | ||
.await?; | ||
assert!(results[0]["rerank_score"].as_f64().is_some()); | ||
let ids: Vec<u64> = results | ||
.into_iter() | ||
.map(|r| r["document"]["id"].as_u64().unwrap()) | ||
.collect(); | ||
assert_eq!(ids, vec![2, 1, 3, 8, 6]); | ||
collection.archive().await?; | ||
Ok(()) | ||
} | ||
/////////////////////////////// | ||
// Working With Documents ///// | ||
/////////////////////////////// | ||
@@ -2207,6 +2289,11 @@ mod tests { | ||
"id" | ||
] | ||
}, | ||
"rerank": { | ||
"query": "Test document 2", | ||
"model": "mixedbread-ai/mxbai-rerank-base-v1", | ||
"num_documents_to_rerank": 100 | ||
}, | ||
"limit": 5 | ||
}, | ||
"aggregate": { | ||