Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

HNSW and Migrations Done#988

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
SilasMarvin merged 11 commits intomasterfromsilas-hnsw-and-migrations
Sep 6, 2023
Merged
Show file tree
Hide file tree
Changes from1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
PrevPrevious commit
NextNext commit
Updated queries to use hnsw indices
  • Loading branch information
@SilasMarvin
SilasMarvin committedSep 6, 2023
commitbbfdcb674ad8eca7b9fa521afc417bc005bfb2cb
2 changes: 0 additions & 2 deletionspgml-sdks/pgml/src/collection.rs
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -926,7 +926,6 @@ impl Collection {
queries::EMBED_AND_VECTOR_SEARCH,
self.pipelines_table_name,
embeddings_table_name,
embeddings_table_name,
self.chunks_table_name,
self.documents_table_name
))
Expand DownExpand Up@@ -1012,7 +1011,6 @@ impl Collection {
sqlx::query_as(&query_builder!(
queries::VECTOR_SEARCH,
embeddings_table_name,
embeddings_table_name,
self.chunks_table_name,
self.documents_table_name
))
Expand Down
66 changes: 60 additions & 6 deletionspgml-sdks/pgml/src/lib.rs
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -467,7 +467,7 @@ mod tests {
.into(),
),
);
let mut collection = Collection::new("test_r_c_cvswre_20", None);
let mut collection = Collection::new("test_r_c_cvswre_21", None);
collection.add_pipeline(&mut pipeline).await?;

// Recreate the pipeline to replicate a more accurate example
Expand All@@ -476,7 +476,7 @@ mod tests {
.upsert_documents(generate_dummy_documents(3))
.await?;
let results = collection
.vector_search("Here is some query", &mut pipeline, None,None)
.vector_search("Here is some query", &mut pipeline, None,Some(10))
.await?;
assert!(results.len() == 3);
collection.archive().await?;
Expand All@@ -502,17 +502,70 @@ mod tests {
.into(),
),
);
let mut collection = Collection::new("test_r_c_cvswqb_3", None);
let mut collection = Collection::new("test_r_c_cvswqb_4", None);
collection.add_pipeline(&mut pipeline).await?;

// Recreate the pipeline to replicate a more accurate example
let mut pipeline = Pipeline::new("test_r_p_cvswqb_1", None, None, None);
collection
.upsert_documents(generate_dummy_documents(30000))
.upsert_documents(generate_dummy_documents(4))
.await?;
let results = collection
.query()
.vector_recall("Here is some query", &mut pipeline, None)
.limit(3)
.fetch_all()
.await?;
assert!(results.len() == 3);
collection.archive().await?;
Ok(())
}

#[sqlx::test]
async fn can_vector_search_with_query_builder_and_pass_model_parameters_in_search(
) -> anyhow::Result<()> {
internal_init_logger(None, None).ok();
let model = Model::new(
Some("hkunlp/instructor-base".to_string()),
Some("python".to_string()),
Some(json!({"instruction": "Represent the Wikipedia document for retrieval: "}).into()),
);
let splitter = Splitter::default();
let mut pipeline = Pipeline::new(
"test_r_p_cvswqbapmpis_1",
Some(model),
Some(splitter),
Some(
serde_json::json!({
"full_text_search": {
"active": true,
"configuration": "english"
}
})
.into(),
),
);
let mut collection = Collection::new("test_r_c_cvswqbapmpis_4", None);
collection.add_pipeline(&mut pipeline).await?;

// Recreate the pipeline to replicate a more accurate example
let mut pipeline = Pipeline::new("test_r_p_cvswqbapmpis_1", None, None, None);
collection
.upsert_documents(generate_dummy_documents(3))
.await?;
let results = collection
.query()
.vector_recall(
"Here is some query",
&mut pipeline,
Some(
json!({
"instruction": "Represent the Wikipedia document for retrieval: "
})
.into(),
),
)
.limit(10)
.fetch_all()
.await?;
assert!(results.len() == 3);
Expand DownExpand Up@@ -543,17 +596,18 @@ mod tests {
.into(),
),
);
let mut collection = Collection::new("test_r_c_cvswqbwre_3", None);
let mut collection = Collection::new("test_r_c_cvswqbwre_5", None);
collection.add_pipeline(&mut pipeline).await?;

// Recreate the pipeline to replicate a more accurate example
let mut pipeline = Pipeline::new("test_r_p_cvswqbwre_1", None, None, None);
collection
.upsert_documents(generate_dummy_documents(3))
.upsert_documents(generate_dummy_documents(4))
.await?;
let results = collection
.query()
.vector_recall("Here is some query", &mut pipeline, None)
.limit(3)
.fetch_all()
.await?;
assert!(results.len() == 3);
Expand Down
34 changes: 8 additions & 26 deletionspgml-sdks/pgml/src/queries.rs
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -188,50 +188,32 @@ embedding AS (
text => $2,
kwargs => $3
)::vector AS embedding
),
comparison AS (
SELECT
chunk_id,
1 - (
%s.embedding <=> (SELECT embedding FROM embedding)
) AS score
FROM
%s
)
SELECT
comparison.score,
embeddings.embedding <=> (SELECT embedding FROM embedding)score,
chunks.chunk,
documents.metadata
FROM
comparison
INNER JOIN %s chunks ON chunks.id =comparison.chunk_id
%s embeddings
INNER JOIN %s chunks ON chunks.id =embeddings.chunk_id
INNER JOIN %s documents ON documents.id = chunks.document_id
ORDER BY
comparison.scoreDESC
scoreASC
LIMIT
$4;
"#;

pub const VECTOR_SEARCH: &str = r#"
WITH comparison AS (
SELECT
chunk_id,
1 - (
%s.embedding <=> $1::vector
) AS score
FROM
%s
)
SELECT
comparison.score,
embeddings.embedding <=> $1::vectorscore,
chunks.chunk,
documents.metadata
FROM
comparison
INNER JOIN %s chunks ON chunks.id =comparison.chunk_id
%s embeddings
INNER JOIN %s chunks ON chunks.id =embeddings.chunk_id
INNER JOIN %s documents ON documents.id = chunks.document_id
ORDER BY
comparison.scoreDESC
scoreASC
LIMIT
$2;
"#;
Expand Down
53 changes: 15 additions & 38 deletionspgml-sdks/pgml/src/query_builder.rs
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -178,43 +178,33 @@ impl QueryBuilder {
let mut embedding_cte = CommonTableExpression::from_select(embedding_cte);
embedding_cte.table_name(Alias::new("embedding"));

// Build the comparison CTE
let mut comparison_cte = Query::select();
comparison_cte
.from_as(
embeddings_table_name.to_table_tuple(),
SIden::Str("embeddings"),
)
.columns([models::EmbeddingIden::ChunkId])
.expr(Expr::cust(
"1 - (embeddings.embedding <=> (select embedding from embedding)) as score",
));
let mut comparison_cte = CommonTableExpression::from_select(comparison_cte);
comparison_cte.table_name(Alias::new("comparison"));

// Build the where clause
let mut with_clause = WithClause::new();
self.with = with_clause
.cte(pipeline_cte)
.cte(model_cte)
.cte(embedding_cte)
.cte(comparison_cte)
.to_owned();

// Build the query
self.query
.expr(Expr::cust(
"(embeddings.embedding <=> (SELECT embedding from embedding)) score",
))
.columns([
(SIden::Str("comparison"), SIden::Str("score")),
(SIden::Str("chunks"), SIden::Str("chunk")),
(SIden::Str("documents"), SIden::Str("metadata")),
])
.from(SIden::Str("comparison"))
.from_as(
embeddings_table_name.to_table_tuple(),
SIden::Str("embeddings"),
)
.join_as(
JoinType::InnerJoin,
self.collection.chunks_table_name.to_table_tuple(),
Alias::new("chunks"),
Expr::col((SIden::Str("chunks"), SIden::Str("id")))
.equals((SIden::Str("comparison"), SIden::Str("chunk_id"))),
.equals((SIden::Str("embeddings"), SIden::Str("chunk_id"))),
)
.join_as(
JoinType::InnerJoin,
Expand All@@ -223,7 +213,7 @@ impl QueryBuilder {
Expr::col((SIden::Str("documents"), SIden::Str("id")))
.equals((SIden::Str("chunks"), SIden::Str("document_id"))),
)
.order_by((SIden::Str("comparison"), SIden::Str("score")), Order::Desc);
.order_by(SIden::Str("score"), Order::Asc);

self
}
Expand DownExpand Up@@ -296,27 +286,14 @@ impl QueryBuilder {
.await?;
let embedding = std::mem::take(&mut embeddings[0]);

// Explicit drop required here or we can't borrow the pipeline immutably
drop(remote_embeddings);
let embeddings_table_name =
format!("{}.{}_embeddings", self.collection.name, pipeline.name);

let mut comparison_cte = Query::select();
comparison_cte
.from_as(
embeddings_table_name.to_table_tuple(),
SIden::Str("embeddings"),
)
.columns([models::EmbeddingIden::ChunkId])
.expr(Expr::cust_with_values(
"1 - (embeddings.embedding <=> $1::vector) as score",
[embedding],
));
let mut embedding_cte = Query::select();
embedding_cte
.expr(Expr::cust_with_values("$1::vector embedding", [embedding]));

let mutcomparison_cte = CommonTableExpression::from_select(comparison_cte);
comparison_cte.table_name(Alias::new("comparison"));
let mutembedding_cte = CommonTableExpression::from_select(embedding_cte);
embedding_cte.table_name(Alias::new("embedding"));
let mut with_clause = WithClause::new();
with_clause.cte(comparison_cte);
with_clause.cte(embedding_cte);

let (sql, values) = self
.query
Expand Down

[8]ページ先頭

©2009-2025 Movatter.jp