Sep 6, 2023 · Sep 1, 2023 · Sep 1, 2023 · Sep 5, 2023 · Sep 5, 2023 · Sep 5, 2023
diff --git a/pgml-sdks/pgml/Cargo.lock b/pgml-sdks/pgml/Cargo.lock
diff --git a/pgml-sdks/pgml/Cargo.toml b/pgml-sdks/pgml/Cargo.toml
 anyhow = "1.0.9"
 tokio = { version = "1.28.2", features = [ "macros" ] }
 chrono = "0.4.9"
 pyo3 = { version = "0.18.3", optional = true, features = ["extension-module"] }
 pyo3 = { version = "0.18.3", optional = true, features = ["extension-module", "anyhow"] }
 pyo3-asyncio = { version = "0.18", features = ["attributes", "tokio-runtime"], optional = true }
 neon = { version = "0.10", optional = true, default-features = false, features = ["napi-6", "promise-api", "channel-api"] }
 itertools = "0.10.5"
diff --git a/pgml-sdks/pgml/build.rs b/pgml-sdks/pgml/build.rs
 use std::io::Write;

 const ADDITIONAL_DEFAULTS_FOR_PYTHON: &[u8] = br#"
 def py_init_logger(level: Optional[str] = "", format: Optional[str] = "") -> None
 def init_logger(level: Optional[str] = "", format: Optional[str] = "") -> None
 async def migrate() -> None

 Json = Any
 DateTime = int
 "#;

 const ADDITIONAL_DEFAULTS_FOR_#"2b83e2559d74b48bbbf1cfda26b8bddc6a369082140691474f9d743cb88918d3">
 export function js_init_logger(level?: string, format?: string): void;
 export function init_logger(level?: string, format?: string): void;
 export function migrate(): Promise<void>;

 export type Json = { [key: string]: any };
 export type DateTime = Date;
diff --git a/pgml-sdks/pgml/javascript/README.md b/pgml-sdks/pgml/javascript/README.md
 await collection.add_pipeline(pipeline)
 ```

 ### Configuring HNSW Indexing Parameters

 Our SDK utilizes [pgvector](https://github.com/pgvector/pgvector) for storing vectors and performing recall. We use HNSW indexing as it is the most performant mix of performance and recall.

 Our SDK allows for configuration of `m` (the maximum number of connections per layer (16 by default)) and `ef_construction` (the size of the dynamic candidate list when constructing the graph (64 by default)) per pipeline.

 ```javascript
 const model = pgml.newModel()
 const splitter = pgml.newSplitter()
 const pipeline = pgml.newPipeline("test_pipeline", model, splitter, {
    hnsw: {
        m: 100,
        ef_construction: 200
    }
 })
 await collection.add_pipeline(pipeline)
 ```

 ### Searching with Pipelines

 Pipelines are a required argument when performing vector search. After a Pipeline has been added to a Collection, the Model and Splitter can be omitted when instantiating it.
diff --git a/pgml-sdks/pgml/javascript/examples/extractive_question_answering.js b/pgml-sdks/pgml/javascript/examples/extractive_question_answering.js
 const pgml = require("pgml");
 require("dotenv").config();

 pgml.js_init_logger();

 const main = async () => {
  // Initialize the collection
diff --git a/pgml-sdks/pgml/javascript/examples/summarizing_question_answering.js b/pgml-sdks/pgml/javascript/examples/summarizing_question_answering.js
 const pgml = require("pgml");
 require("dotenv").config();

 pgml.js_init_logger();

 const main = async () => {
  // Initialize the collection
  const collection = pgml.newCollection("my_javascript_sqa_collection");
diff --git a/pgml-sdks/pgml/javascript/tests/typescript-tests/test.ts b/pgml-sdks/pgml/javascript/tests/typescript-tests/test.ts
 ////////////////////////////////////////////////////////////////////////////////////

 const LOG_LEVEL = process.env.LOG_LEVEL ? process.env.LOG_LEVEL : "ERROR";
 pgml.js_init_logger(LOG_LEVEL);
 pgml.init_logger(LOG_LEVEL);

 const generate_dummy_documents = (count: number) => {
  let docs = [];
  await collection.archive();
 });

 it("can vector search with query builder and custom hnsfw ef_search value", async () => {
  let model = pgml.newModel();
  let splitter = pgml.newSplitter();
  let pipeline = pgml.newPipeline("test_j_p_cvswqbachesv_0", model, splitter);
  let collection = pgml.newCollection("test_j_c_cvswqbachesv_0");
  await collection.upsert_documents(generate_dummy_documents(3));
  await collection.add_pipeline(pipeline);
  let results = await collection
    .query()
    .vector_recall("Here is some query", pipeline)
    .filter({
      hnsw: {
        ef_search: 2,
      },
    })
    .limit(10)
    .fetch_all();
  expect(results).toHaveLength(3);
  await collection.archive();
 });

 it("can vector search with query builder and custom hnsfw ef_search value and remote embeddings", async () => {
  let model = pgml.newModel("text-embedding-ada-002", "openai");
  let splitter = pgml.newSplitter();
  let pipeline = pgml.newPipeline(
    "test_j_p_cvswqbachesvare_0",
    model,
    splitter,
  );
  let collection = pgml.newCollection("test_j_c_cvswqbachesvare_0");
  await collection.upsert_documents(generate_dummy_documents(3));
  await collection.add_pipeline(pipeline);
  let results = await collection
    .query()
    .vector_recall("Here is some query", pipeline)
    .filter({
      hnsw: {
        ef_search: 2,
      },
    })
    .limit(10)
    .fetch_all();
  expect(results).toHaveLength(3);
  await collection.archive();
 });

 ///////////////////////////////////////////////////
 // Test user output facing functions //////////////
 ///////////////////////////////////////////////////

  await collection.archive();
 });

 ///////////////////////////////////////////////////
 // Test migrations ////////////////////////////////
 ///////////////////////////////////////////////////

 it("can migrate", async () => {
  await pgml.migrate();
 });
diff --git a/pgml-sdks/pgml/python/README.md b/pgml-sdks/pgml/python/README.md
 await collection.add_pipeline(pipeline)
 ```

 ### Configuring HNSW Indexing Parameters

 Our SDK utilizes [pgvector](https://github.com/pgvector/pgvector) for storing vectors and performing recall. We use HNSW indexing as it is the most performant mix of performance and recall.

 Our SDK allows for configuration of `m` (the maximum number of connections per layer (16 by default)) and `ef_construction` (the size of the dynamic candidate list when constructing the graph (64 by default)) per pipeline.

 ```python
 model = Model()
 splitter = Splitter()
 pipeline = Pipeline("test_pipeline", model, splitter, {
    "hnsw": {
        "m": 100,
        "ef_construction": 200
    }
 })
 await collection.add_pipeline(pipeline)
 ```

 ### Searching with Pipelines

 Pipelines are a required argument when performing vector search. After a Pipeline has been added to a Collection, the Model and Splitter can be omitted when instantiating it.
diff --git a/pgml-sdks/pgml/python/examples/summarizing_question_answering.py b/pgml-sdks/pgml/python/examples/summarizing_question_answering.py
 from pgml import Collection, Model, Splitter, Pipeline, Builtins, py_init_logger
 from pgml import Collection, Model, Splitter, Pipeline, Builtins
 import json
 from datasets import load_dataset
 from time import time
 import asyncio


 py_init_logger()


 async def main():
    load_dotenv()
    console = Console()
diff --git a/pgml-sdks/pgml/python/pgml/pgml.pyi b/pgml-sdks/pgml/python/pgml/pgml.pyi

 def py_init_logger(level: Optional[str] = "", format: Optional[str] = "") -> None
 def init_logger(level: Optional[str] = "", format: Optional[str] = "") -> None
 async def migrate() -> None

 Json = Any
 DateTime = int

 # Top of file key: A12BECOD!
 from typing import List, Dict, Optional, Self, Any


 class Builtins:
 def __init__(self, database_url: Optional[str] = "Default set in Rust. Please check the documentation.") -> Self
 ...
 def query(self, query: str) -> QueryRunner
 ...
 async def transform(self, task: Json, inputs: List[str], args: Optional[Json] = Any) -> Json
 ...

 class Collection:
 def __init__(self, name: str, database_url: Optional[str] = "Default set in Rust. Please check the documentation.") -> Self
 ...
 async def add_pipeline(self, pipeline: Pipeline) -> None
 ...
 async def remove_pipeline(self, pipeline: Pipeline) -> None
 ...
 async def enable_pipeline(self, pipeline: Pipeline) -> None
 ...
 async def disable_pipeline(self, pipeline: Pipeline) -> None
 ...
 async def upsert_documents(self, documents: List[Json]) -> None
 ...
 async def get_documents(self, args: Optional[Json] = Any) -> List[Json]
 ...
 async def delete_documents(self, filter: Json) -> None
 ...
 async def vector_search(self, query: str, pipeline: Pipeline, query_parameters: Optional[Json] = Any, top_k: Optional[int] = 1) -> List[tuple[float, str, Json]]
 ...
 async def archive(self) -> None
 ...
 def query(self) -> QueryBuilder
 ...
 async def get_pipelines(self) -> List[Pipeline]
 ...
 async def get_pipeline(self, name: str) -> Pipeline
 ...
 async def exists(self) -> bool
 ...

 class Model:
 def __init__(self, name: Optional[str] = "Default set in Rust. Please check the documentation.", source: Optional[str] = "Default set in Rust. Please check the documentation.", parameters: Optional[Json] = Any) -> Self
 ...

 class Pipeline:
 def __init__(self, name: str, model: Optional[Model] = Any, splitter: Optional[Splitter] = Any, parameters: Optional[Json] = Any) -> Self
 ...
 async def get_status(self) -> PipelineSyncData
 ...
 async def to_dict(self) -> Json
 ...

 class QueryBuilder:
 def limit(self, limit: int) -> Self
 ...
 def filter(self, filter: Json) -> Self
 ...
 def vector_recall(self, query: str, pipeline: Pipeline, query_parameters: Optional[Json] = Any) -> Self
 ...
 async def fetch_all(self) -> List[tuple[float, str, Json]]
 ...
 def to_full_string(self) -> str
 ...

 class QueryRunner:
 async def fetch_all(self) -> Json
 ...
 async def execute(self) -> None
 ...
 def bind_string(self, bind_value: str) -> Self
 ...
 def bind_int(self, bind_value: int) -> Self
 ...
 def bind_float(self, bind_value: float) -> Self
 ...
 def bind_bool(self, bind_value: bool) -> Self
 ...
 def bind_json(self, bind_value: Json) -> Self
 ...

 class Splitter:
 def __init__(self, name: Optional[str] = "Default set in Rust. Please check the documentation.", parameters: Optional[Json] = Any) -> Self
 ...
diff --git a/pgml-sdks/pgml/python/tests/test.py b/pgml-sdks/pgml/python/tests/test.py
    print("No DATABASE_URL environment variable found. Please set one")
    exit(1)

 pgml.py_init_logger()
 pgml.init_logger()


 def generate_dummy_documents(count: int) -> List[Dict[str, Any]]:
    await collection.archive()


 @pytest.mark.asyncio
 async def test_can_vector_search_with_query_builder_and_custom_hnsw_ef_search_value():
    model = pgml.Model()
    splitter = pgml.Splitter()
    pipeline = pgml.Pipeline("test_p_p_tcvswqbachesv_0", model, splitter)
    collection = pgml.Collection(name="test_p_c_tcvswqbachesv_0")
    await collection.upsert_documents(generate_dummy_documents(3))
    await collection.add_pipeline(pipeline)
    results = (
        await collection.query()
        .vector_recall("Here is some query", pipeline)
        .filter({"hnsw": {"ef_search": 2}})
        .limit(10)
        .fetch_all()
    )
    assert len(results) == 3
    await collection.archive()


 @pytest.mark.asyncio
 async def test_can_vector_search_with_query_builder_and_custom_hnsw_ef_search_value_and_remote_embeddings():
    model = pgml.Model(name="text-embedding-ada-002", source="openai")
    splitter = pgml.Splitter()
    pipeline = pgml.Pipeline("test_p_p_tcvswqbachesvare_0", model, splitter)
    collection = pgml.Collection(name="test_p_c_tcvswqbachesvare_0")
    await collection.upsert_documents(generate_dummy_documents(3))
    await collection.add_pipeline(pipeline)
    results = (
        await collection.query()
        .vector_recall("Here is some query", pipeline)
        .filter({"hnsw": {"ef_search": 2}})
        .limit(10)
        .fetch_all()
    )
    assert len(results) == 3
    await collection.archive()


 ###################################################
 ## Test user output facing functions ##############
 ###################################################
    await collection.archive()


 ###################################################
 ## Migration tests ################################
 ###################################################


 @pytest.mark.asyncio
 async def test_migrate():
    await pgml.migrate()


 ###################################################
 ## Test with multiprocessing ######################
 ###################################################
diff --git a/pgml-sdks/pgml/src/builtins.rs b/pgml-sdks/pgml/src/builtins.rs
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::init_logger;
    use crate::internal_init_logger;

    #[sqlx::test]
    async fn can_query() -> anyhow::Result<()> {
 init_logger(None, None).ok();
 internal_init_logger(None, None).ok();
        let builtins = Builtins::new(None);
        let query = "SELECT10";
        let query = "SELECT* from pgml.collections";
        let results = builtins.query(query).fetch_all().await?;
        assert!(results.as_array().is_some());
        Ok(())
    }

    #[sqlx::test]
    async fn can_transform() -> anyhow::Result<()> {
 init_logger(None, None).ok();
 internal_init_logger(None, None).ok();
        let builtins = Builtins::new(None);
        let task = Json::from(serde_json::json!("translation_en_to_fr"));
        let inputs = vec!["test1".to_string(), "test2".to_string()];
Original file line number	Diff line number	Diff line change
Expand Up		@@ -20,7 +20,7 @@ serde_json = "1.0.9"
		anyhow = "1.0.9"
		tokio = { version = "1.28.2", features = [ "macros" ] }
		chrono = "0.4.9"
		pyo3 = { version = "0.18.3", optional = true, features = ["extension-module"] }
		pyo3 = { version = "0.18.3", optional = true, features = ["extension-module", "anyhow"] }
		pyo3-asyncio = { version = "0.18", features = ["attributes", "tokio-runtime"], optional = true }
		neon = { version = "0.10", optional = true, default-features = false, features = ["napi-6", "promise-api", "channel-api"] }
		itertools = "0.10.5"
Expand Down
Original file line number	Diff line change
Expand Up	@@ -3,14 +3,16 @@ use std::fs::OpenOptions;
	use std::io::Write;

	const ADDITIONAL_DEFAULTS_FOR_PYTHON: &[u8] = br#"
	def py_init_logger(level: Optional[str] = "", format: Optional[str] = "") -> None
	def init_logger(level: Optional[str] = "", format: Optional[str] = "") -> None
	async def migrate() -> None

	Json = Any
	DateTime = int
	"#;

	const ADDITIONAL_DEFAULTS_FOR_#"2b83e2559d74b48bbbf1cfda26b8bddc6a369082140691474f9d743cb88918d3">	export function js_init_logger(level?: string, format?: string): void;
	export function init_logger(level?: string, format?: string): void;
	export function migrate(): Promise<void>;

	export type Json = { [key: string]: any };
	export type DateTime = Date;
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -519,6 +519,24 @@ const pipeline = pgml.newPipeline("test_pipeline", model, splitter, {
		await collection.add_pipeline(pipeline)
		```

		### Configuring HNSW Indexing Parameters

		Our SDK utilizes [pgvector](https://github.com/pgvector/pgvector) for storing vectors and performing recall. We use HNSW indexing as it is the most performant mix of performance and recall.

		Our SDK allows for configuration of `m` (the maximum number of connections per layer (16 by default)) and `ef_construction` (the size of the dynamic candidate list when constructing the graph (64 by default)) per pipeline.

		```javascript
		const model = pgml.newModel()
		const splitter = pgml.newSplitter()
		const pipeline = pgml.newPipeline("test_pipeline", model, splitter, {
		hnsw: {
		m: 100,
		ef_construction: 200
		}
		})
		await collection.add_pipeline(pipeline)
		```

		### Searching with Pipelines

		Pipelines are a required argument when performing vector search. After a Pipeline has been added to a Collection, the Model and Splitter can be omitted when instantiating it.
Expand Down
Original file line number	Diff line number	Diff line change
		@@ -1,7 +1,6 @@
		const pgml = require("pgml");
		require("dotenv").config();

		pgml.js_init_logger();

		const main = async () => {
		// Initialize the collection
Expand Down
Original file line number	Diff line number	Diff line change
		@@ -1,8 +1,6 @@
		const pgml = require("pgml");
		require("dotenv").config();

		pgml.js_init_logger();

		const main = async () => {
		// Initialize the collection
		const collection = pgml.newCollection("my_javascript_sqa_collection");
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -10,7 +10,7 @@ import pgml from "../../index.js";
		////////////////////////////////////////////////////////////////////////////////////

		const LOG_LEVEL = process.env.LOG_LEVEL ? process.env.LOG_LEVEL : "ERROR";
		pgml.js_init_logger(LOG_LEVEL);
		pgml.init_logger(LOG_LEVEL);

		const generate_dummy_documents = (count: number) => {
		let docs = [];
Expand DownExpand Up		@@ -143,6 +143,52 @@ it("can vector search with query builder and metadata filtering", async () => {
		await collection.archive();
		});

		it("can vector search with query builder and custom hnsfw ef_search value", async () => {
		let model = pgml.newModel();
		let splitter = pgml.newSplitter();
		let pipeline = pgml.newPipeline("test_j_p_cvswqbachesv_0", model, splitter);
		let collection = pgml.newCollection("test_j_c_cvswqbachesv_0");
		await collection.upsert_documents(generate_dummy_documents(3));
		await collection.add_pipeline(pipeline);
		let results = await collection
		.query()
		.vector_recall("Here is some query", pipeline)
		.filter({
		hnsw: {
		ef_search: 2,
		},
		})
		.limit(10)
		.fetch_all();
		expect(results).toHaveLength(3);
		await collection.archive();
		});

		it("can vector search with query builder and custom hnsfw ef_search value and remote embeddings", async () => {
		let model = pgml.newModel("text-embedding-ada-002", "openai");
		let splitter = pgml.newSplitter();
		let pipeline = pgml.newPipeline(
		"test_j_p_cvswqbachesvare_0",
		model,
		splitter,
		);
		let collection = pgml.newCollection("test_j_c_cvswqbachesvare_0");
		await collection.upsert_documents(generate_dummy_documents(3));
		await collection.add_pipeline(pipeline);
		let results = await collection
		.query()
		.vector_recall("Here is some query", pipeline)
		.filter({
		hnsw: {
		ef_search: 2,
		},
		})
		.limit(10)
		.fetch_all();
		expect(results).toHaveLength(3);
		await collection.archive();
		});

		///////////////////////////////////////////////////
		// Test user output facing functions //////////////
		///////////////////////////////////////////////////
Expand DownExpand Up		@@ -220,3 +266,11 @@ it("can delete documents", async () => {

		await collection.archive();
		});

		///////////////////////////////////////////////////
		// Test migrations ////////////////////////////////
		///////////////////////////////////////////////////

		it("can migrate", async () => {
		await pgml.migrate();
		});
Original file line number	Diff line number	Diff line change
Expand Up		@@ -530,6 +530,24 @@ pipeline = Pipeline("test_pipeline", model, splitter, {
		await collection.add_pipeline(pipeline)
		```

		### Configuring HNSW Indexing Parameters

		Our SDK utilizes [pgvector](https://github.com/pgvector/pgvector) for storing vectors and performing recall. We use HNSW indexing as it is the most performant mix of performance and recall.

		Our SDK allows for configuration of `m` (the maximum number of connections per layer (16 by default)) and `ef_construction` (the size of the dynamic candidate list when constructing the graph (64 by default)) per pipeline.

		```python
		model = Model()
		splitter = Splitter()
		pipeline = Pipeline("test_pipeline", model, splitter, {
		"hnsw": {
		"m": 100,
		"ef_construction": 200
		}
		})
		await collection.add_pipeline(pipeline)
		```

		### Searching with Pipelines

		Pipelines are a required argument when performing vector search. After a Pipeline has been added to a Collection, the Model and Splitter can be omitted when instantiating it.
Expand Down
Original file line number	Diff line number	Diff line change
		@@ -1,4 +1,4 @@
		from pgml import Collection, Model, Splitter, Pipeline, Builtins, py_init_logger
		from pgml import Collection, Model, Splitter, Pipeline, Builtins
		import json
		from datasets import load_dataset
		from time import time
Expand All		@@ -7,9 +7,6 @@
		import asyncio


		py_init_logger()


		async def main():
		load_dotenv()
		console = Console()
Expand Down
Original file line number	Diff line number	Diff line change
		@@ -1,91 +1,6 @@

		def py_init_logger(level: Optional[str] = "", format: Optional[str] = "") -> None
		def init_logger(level: Optional[str] = "", format: Optional[str] = "") -> None
		async def migrate() -> None

		Json = Any
		DateTime = int

		# Top of file key: A12BECOD!
		from typing import List, Dict, Optional, Self, Any


		class Builtins:
		def __init__(self, database_url: Optional[str] = "Default set in Rust. Please check the documentation.") -> Self
		...
		def query(self, query: str) -> QueryRunner
		...
		async def transform(self, task: Json, inputs: List[str], args: Optional[Json] = Any) -> Json
		...

		class Collection:
		def __init__(self, name: str, database_url: Optional[str] = "Default set in Rust. Please check the documentation.") -> Self
		...
		async def add_pipeline(self, pipeline: Pipeline) -> None
		...
		async def remove_pipeline(self, pipeline: Pipeline) -> None
		...
		async def enable_pipeline(self, pipeline: Pipeline) -> None
		...
		async def disable_pipeline(self, pipeline: Pipeline) -> None
		...
		async def upsert_documents(self, documents: List[Json]) -> None
		...
		async def get_documents(self, args: Optional[Json] = Any) -> List[Json]
		...
		async def delete_documents(self, filter: Json) -> None
		...
		async def vector_search(self, query: str, pipeline: Pipeline, query_parameters: Optional[Json] = Any, top_k: Optional[int] = 1) -> List[tuple[float, str, Json]]
		...
		async def archive(self) -> None
		...
		def query(self) -> QueryBuilder
		...
		async def get_pipelines(self) -> List[Pipeline]
		...
		async def get_pipeline(self, name: str) -> Pipeline
		...
		async def exists(self) -> bool
		...

		class Model:
		def __init__(self, name: Optional[str] = "Default set in Rust. Please check the documentation.", source: Optional[str] = "Default set in Rust. Please check the documentation.", parameters: Optional[Json] = Any) -> Self
		...

		class Pipeline:
		def __init__(self, name: str, model: Optional[Model] = Any, splitter: Optional[Splitter] = Any, parameters: Optional[Json] = Any) -> Self
		...
		async def get_status(self) -> PipelineSyncData
		...
		async def to_dict(self) -> Json
		...

		class QueryBuilder:
		def limit(self, limit: int) -> Self
		...
		def filter(self, filter: Json) -> Self
		...
		def vector_recall(self, query: str, pipeline: Pipeline, query_parameters: Optional[Json] = Any) -> Self
		...
		async def fetch_all(self) -> List[tuple[float, str, Json]]
		...
		def to_full_string(self) -> str
		...

		class QueryRunner:
		async def fetch_all(self) -> Json
		...
		async def execute(self) -> None
		...
		def bind_string(self, bind_value: str) -> Self
		...
		def bind_int(self, bind_value: int) -> Self
		...
		def bind_float(self, bind_value: float) -> Self
		...
		def bind_bool(self, bind_value: bool) -> Self
		...
		def bind_json(self, bind_value: Json) -> Self
		...

		class Splitter:
		def __init__(self, name: Optional[str] = "Default set in Rust. Please check the documentation.", parameters: Optional[Json] = Any) -> Self
		...
Original file line number	Diff line number	Diff line change
Expand Up		@@ -19,7 +19,7 @@
		print("No DATABASE_URL environment variable found. Please set one")
		exit(1)

		pgml.py_init_logger()
		pgml.init_logger()


		def generate_dummy_documents(count: int) -> List[Dict[str, Any]]:
Expand DownExpand Up		@@ -164,6 +164,44 @@ async def test_can_vector_search_with_query_builder_and_metadata_filtering():
		await collection.archive()


		@pytest.mark.asyncio
		async def test_can_vector_search_with_query_builder_and_custom_hnsw_ef_search_value():
		model = pgml.Model()
		splitter = pgml.Splitter()
		pipeline = pgml.Pipeline("test_p_p_tcvswqbachesv_0", model, splitter)
		collection = pgml.Collection(name="test_p_c_tcvswqbachesv_0")
		await collection.upsert_documents(generate_dummy_documents(3))
		await collection.add_pipeline(pipeline)
		results = (
		await collection.query()
		.vector_recall("Here is some query", pipeline)
		.filter({"hnsw": {"ef_search": 2}})
		.limit(10)
		.fetch_all()
		)
		assert len(results) == 3
		await collection.archive()


		@pytest.mark.asyncio
		async def test_can_vector_search_with_query_builder_and_custom_hnsw_ef_search_value_and_remote_embeddings():
		model = pgml.Model(name="text-embedding-ada-002", source="openai")
		splitter = pgml.Splitter()
		pipeline = pgml.Pipeline("test_p_p_tcvswqbachesvare_0", model, splitter)
		collection = pgml.Collection(name="test_p_c_tcvswqbachesvare_0")
		await collection.upsert_documents(generate_dummy_documents(3))
		await collection.add_pipeline(pipeline)
		results = (
		await collection.query()
		.vector_recall("Here is some query", pipeline)
		.filter({"hnsw": {"ef_search": 2}})
		.limit(10)
		.fetch_all()
		)
		assert len(results) == 3
		await collection.archive()


		###################################################
		## Test user output facing functions ##############
		###################################################
Expand DownExpand Up		@@ -250,6 +288,16 @@ async def test_delete_documents():
		await collection.archive()


		###################################################
		## Migration tests ################################
		###################################################


		@pytest.mark.asyncio
		async def test_migrate():
		await pgml.migrate()


		###################################################
		## Test with multiprocessing ######################
		###################################################
Expand Down