Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit620847c

Browse files
authored
HNSW and Migrations Done (#988)
1 parent40c9b9c commit620847c

21 files changed

+706
-236
lines changed

‎pgml-sdks/pgml/Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more aboutcustomizing how changed files appear on GitHub.

‎pgml-sdks/pgml/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ serde_json = "1.0.9"
2020
anyhow ="1.0.9"
2121
tokio = {version ="1.28.2",features = ["macros" ] }
2222
chrono ="0.4.9"
23-
pyo3 = {version ="0.18.3",optional =true,features = ["extension-module"] }
23+
pyo3 = {version ="0.18.3",optional =true,features = ["extension-module","anyhow"] }
2424
pyo3-asyncio = {version ="0.18",features = ["attributes","tokio-runtime"],optional =true }
2525
neon = {version ="0.10",optional =true,default-features =false,features = ["napi-6","promise-api","channel-api"] }
2626
itertools ="0.10.5"

‎pgml-sdks/pgml/build.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,16 @@ use std::fs::OpenOptions;
33
use std::io::Write;
44

55
constADDITIONAL_DEFAULTS_FOR_PYTHON:&[u8] =br#"
6-
def py_init_logger(level: Optional[str] = "", format: Optional[str] = "") -> None
6+
def init_logger(level: Optional[str] = "", format: Optional[str] = "") -> None
7+
async def migrate() -> None
78
89
Json = Any
910
DateTime = int
1011
"#;
1112

1213
constADDITIONAL_DEFAULTS_FOR_JAVASCRIPT:&[u8] =br#"
13-
export function js_init_logger(level?: string, format?: string): void;
14+
export function init_logger(level?: string, format?: string): void;
15+
export function migrate(): Promise<void>;
1416
1517
export type Json = { [key: string]: any };
1618
export type DateTime = Date;

‎pgml-sdks/pgml/javascript/README.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,24 @@ const pipeline = pgml.newPipeline("test_pipeline", model, splitter, {
519519
awaitcollection.add_pipeline(pipeline)
520520
```
521521
522+
### Configuring HNSW Indexing Parameters
523+
524+
Our SDK utilizes [pgvector](https://github.com/pgvector/pgvector) for storing vectors and performing recall. We use HNSW indexing as it is the most performant mix of performance and recall.
525+
526+
Our SDK allows for configuration of`m` (the maximum number of connections per layer (16 by default)) and`ef_construction` (the size of the dynamic candidate list when constructing the graph (64 by default)) per pipeline.
527+
528+
```javascript
529+
constmodel=pgml.newModel()
530+
constsplitter=pgml.newSplitter()
531+
constpipeline=pgml.newPipeline("test_pipeline", model, splitter, {
532+
hnsw: {
533+
m:100,
534+
ef_construction:200
535+
}
536+
})
537+
awaitcollection.add_pipeline(pipeline)
538+
```
539+
522540
### Searching with Pipelines
523541
524542
Pipelines are a required argument when performing vector search. After a Pipeline has been added to a Collection, the Model and Splitter can be omitted when instantiating it.

‎pgml-sdks/pgml/javascript/examples/extractive_question_answering.js

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
constpgml=require("pgml");
22
require("dotenv").config();
33

4-
pgml.js_init_logger();
54

65
constmain=async()=>{
76
// Initialize the collection

‎pgml-sdks/pgml/javascript/examples/summarizing_question_answering.js

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
constpgml=require("pgml");
22
require("dotenv").config();
33

4-
pgml.js_init_logger();
5-
64
constmain=async()=>{
75
// Initialize the collection
86
constcollection=pgml.newCollection("my_javascript_sqa_collection");

‎pgml-sdks/pgml/javascript/tests/typescript-tests/test.ts

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import pgml from "../../index.js";
1010
////////////////////////////////////////////////////////////////////////////////////
1111

1212
constLOG_LEVEL=process.env.LOG_LEVEL ?process.env.LOG_LEVEL :"ERROR";
13-
pgml.js_init_logger(LOG_LEVEL);
13+
pgml.init_logger(LOG_LEVEL);
1414

1515
constgenerate_dummy_documents=(count:number)=>{
1616
letdocs=[];
@@ -143,6 +143,52 @@ it("can vector search with query builder and metadata filtering", async () => {
143143
awaitcollection.archive();
144144
});
145145

146+
it("can vector search with query builder and custom hnsfw ef_search value",async()=>{
147+
letmodel=pgml.newModel();
148+
letsplitter=pgml.newSplitter();
149+
letpipeline=pgml.newPipeline("test_j_p_cvswqbachesv_0",model,splitter);
150+
letcollection=pgml.newCollection("test_j_c_cvswqbachesv_0");
151+
awaitcollection.upsert_documents(generate_dummy_documents(3));
152+
awaitcollection.add_pipeline(pipeline);
153+
letresults=awaitcollection
154+
.query()
155+
.vector_recall("Here is some query",pipeline)
156+
.filter({
157+
hnsw:{
158+
ef_search:2,
159+
},
160+
})
161+
.limit(10)
162+
.fetch_all();
163+
expect(results).toHaveLength(3);
164+
awaitcollection.archive();
165+
});
166+
167+
it("can vector search with query builder and custom hnsfw ef_search value and remote embeddings",async()=>{
168+
letmodel=pgml.newModel("text-embedding-ada-002","openai");
169+
letsplitter=pgml.newSplitter();
170+
letpipeline=pgml.newPipeline(
171+
"test_j_p_cvswqbachesvare_0",
172+
model,
173+
splitter,
174+
);
175+
letcollection=pgml.newCollection("test_j_c_cvswqbachesvare_0");
176+
awaitcollection.upsert_documents(generate_dummy_documents(3));
177+
awaitcollection.add_pipeline(pipeline);
178+
letresults=awaitcollection
179+
.query()
180+
.vector_recall("Here is some query",pipeline)
181+
.filter({
182+
hnsw:{
183+
ef_search:2,
184+
},
185+
})
186+
.limit(10)
187+
.fetch_all();
188+
expect(results).toHaveLength(3);
189+
awaitcollection.archive();
190+
});
191+
146192
///////////////////////////////////////////////////
147193
// Test user output facing functions //////////////
148194
///////////////////////////////////////////////////
@@ -220,3 +266,11 @@ it("can delete documents", async () => {
220266

221267
awaitcollection.archive();
222268
});
269+
270+
///////////////////////////////////////////////////
271+
// Test migrations ////////////////////////////////
272+
///////////////////////////////////////////////////
273+
274+
it("can migrate",async()=>{
275+
awaitpgml.migrate();
276+
});

‎pgml-sdks/pgml/python/README.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -530,6 +530,24 @@ pipeline = Pipeline("test_pipeline", model, splitter, {
530530
await collection.add_pipeline(pipeline)
531531
```
532532

533+
### Configuring HNSW Indexing Parameters
534+
535+
OurSDK utilizes [pgvector](https://github.com/pgvector/pgvector)for storing vectorsand performing recall. We useHNSW indexingas itis the most performant mix of performanceand recall.
536+
537+
OurSDK allowsfor configuration of`m` (the maximum number of connections per layer (16 by default))and`ef_construction` (the size of the dynamic candidatelist when constructing the graph (64 by default)) per pipeline.
538+
539+
```python
540+
model= Model()
541+
splitter= Splitter()
542+
pipeline= Pipeline("test_pipeline", model, splitter, {
543+
"hnsw": {
544+
"m":100,
545+
"ef_construction":200
546+
}
547+
})
548+
await collection.add_pipeline(pipeline)
549+
```
550+
533551
### Searching with Pipelines
534552

535553
Pipelines are a required argument when performing vector search. After a Pipeline has been added to a Collection, the Modeland Splitter can be omitted when instantiating it.

‎pgml-sdks/pgml/python/examples/summarizing_question_answering.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
frompgmlimportCollection,Model,Splitter,Pipeline,Builtins,py_init_logger
1+
frompgmlimportCollection,Model,Splitter,Pipeline,Builtins
22
importjson
33
fromdatasetsimportload_dataset
44
fromtimeimporttime
@@ -7,9 +7,6 @@
77
importasyncio
88

99

10-
py_init_logger()
11-
12-
1310
asyncdefmain():
1411
load_dotenv()
1512
console=Console()

‎pgml-sdks/pgml/python/pgml/pgml.pyi

Lines changed: 2 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1,91 +1,6 @@
11

2-
defpy_init_logger(level:Optional[str]="",format:Optional[str]="")->None
2+
definit_logger(level:Optional[str]="",format:Optional[str]="")->None
3+
asyncdefmigrate()->None
34

45
Json=Any
56
DateTime=int
6-
7-
# Top of file key: A12BECOD!
8-
fromtypingimportList,Dict,Optional,Self,Any
9-
10-
11-
classBuiltins:
12-
def__init__(self,database_url:Optional[str]="Default set in Rust. Please check the documentation.")->Self
13-
...
14-
defquery(self,query:str)->QueryRunner
15-
...
16-
asyncdeftransform(self,task:Json,inputs:List[str],args:Optional[Json]=Any)->Json
17-
...
18-
19-
classCollection:
20-
def__init__(self,name:str,database_url:Optional[str]="Default set in Rust. Please check the documentation.")->Self
21-
...
22-
asyncdefadd_pipeline(self,pipeline:Pipeline)->None
23-
...
24-
asyncdefremove_pipeline(self,pipeline:Pipeline)->None
25-
...
26-
asyncdefenable_pipeline(self,pipeline:Pipeline)->None
27-
...
28-
asyncdefdisable_pipeline(self,pipeline:Pipeline)->None
29-
...
30-
asyncdefupsert_documents(self,documents:List[Json])->None
31-
...
32-
asyncdefget_documents(self,args:Optional[Json]=Any)->List[Json]
33-
...
34-
asyncdefdelete_documents(self,filter:Json)->None
35-
...
36-
asyncdefvector_search(self,query:str,pipeline:Pipeline,query_parameters:Optional[Json]=Any,top_k:Optional[int]=1)->List[tuple[float,str,Json]]
37-
...
38-
asyncdefarchive(self)->None
39-
...
40-
defquery(self)->QueryBuilder
41-
...
42-
asyncdefget_pipelines(self)->List[Pipeline]
43-
...
44-
asyncdefget_pipeline(self,name:str)->Pipeline
45-
...
46-
asyncdefexists(self)->bool
47-
...
48-
49-
classModel:
50-
def__init__(self,name:Optional[str]="Default set in Rust. Please check the documentation.",source:Optional[str]="Default set in Rust. Please check the documentation.",parameters:Optional[Json]=Any)->Self
51-
...
52-
53-
classPipeline:
54-
def__init__(self,name:str,model:Optional[Model]=Any,splitter:Optional[Splitter]=Any,parameters:Optional[Json]=Any)->Self
55-
...
56-
asyncdefget_status(self)->PipelineSyncData
57-
...
58-
asyncdefto_dict(self)->Json
59-
...
60-
61-
classQueryBuilder:
62-
deflimit(self,limit:int)->Self
63-
...
64-
deffilter(self,filter:Json)->Self
65-
...
66-
defvector_recall(self,query:str,pipeline:Pipeline,query_parameters:Optional[Json]=Any)->Self
67-
...
68-
asyncdeffetch_all(self)->List[tuple[float,str,Json]]
69-
...
70-
defto_full_string(self)->str
71-
...
72-
73-
classQueryRunner:
74-
asyncdeffetch_all(self)->Json
75-
...
76-
asyncdefexecute(self)->None
77-
...
78-
defbind_string(self,bind_value:str)->Self
79-
...
80-
defbind_int(self,bind_value:int)->Self
81-
...
82-
defbind_float(self,bind_value:float)->Self
83-
...
84-
defbind_bool(self,bind_value:bool)->Self
85-
...
86-
defbind_json(self,bind_value:Json)->Self
87-
...
88-
89-
classSplitter:
90-
def__init__(self,name:Optional[str]="Default set in Rust. Please check the documentation.",parameters:Optional[Json]=Any)->Self
91-
...

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp