Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit961cb17

Browse files
committed
Added iterative search for HNSW [skip ci]
1 parentc91ed7b commit961cb17

File tree

9 files changed

+457
-30
lines changed

9 files changed

+457
-30
lines changed

‎CHANGELOG.md‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
##0.8.0 (unreleased)
22

3+
- Added support for iterative index scans
34
- Added casts for arrays to`sparsevec`
45
- Improved cost estimation
56
- Improved performance of HNSW inserts and on-disk index builds

‎src/hnsw.c‎

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,16 @@
1818
#defineMarkGUCPrefixReserved(x) EmitWarningsOnPlaceholders(x)
1919
#endif
2020

21+
staticconststructconfig_enum_entryhnsw_iterative_search_options[]= {
22+
{"off",HNSW_ITERATIVE_SEARCH_OFF, false},
23+
{"on",HNSW_ITERATIVE_SEARCH_RELAXED, false},
24+
{"strict",HNSW_ITERATIVE_SEARCH_STRICT, false},
25+
{NULL,0, false}
26+
};
27+
2128
inthnsw_ef_search;
29+
inthnsw_iterative_search_max_tuples;
30+
inthnsw_iterative_search;
2231
inthnsw_lock_tranche_id;
2332
staticrelopt_kindhnsw_relopt_kind;
2433

@@ -69,6 +78,15 @@ HnswInit(void)
6978
"Valid range is 1..1000.",&hnsw_ef_search,
7079
HNSW_DEFAULT_EF_SEARCH,HNSW_MIN_EF_SEARCH,HNSW_MAX_EF_SEARCH,PGC_USERSET,0,NULL,NULL,NULL);
7180

81+
DefineCustomEnumVariable("hnsw.iterative_search","Sets iterative search",
82+
NULL,&hnsw_iterative_search,
83+
HNSW_ITERATIVE_SEARCH_OFF,hnsw_iterative_search_options,PGC_USERSET,0,NULL,NULL,NULL);
84+
85+
/* TODO Ensure ivfflat.max_probes uses same value for "all" */
86+
DefineCustomIntVariable("hnsw.iterative_search_max_tuples","Sets the max number of candidates to visit for iterative search",
87+
"-1 means all",&hnsw_iterative_search_max_tuples,
88+
-1,-1,INT_MAX,PGC_USERSET,0,NULL,NULL,NULL);
89+
7290
MarkGUCPrefixReserved("hnsw");
7391
}
7492

‎src/hnsw.h‎

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,17 @@
109109

110110
/* Variables */
111111
externinthnsw_ef_search;
112+
externinthnsw_iterative_search;
113+
externinthnsw_iterative_search_max_tuples;
112114
externinthnsw_lock_tranche_id;
113115

116+
typedefenumHnswIterativeSearchType
117+
{
118+
HNSW_ITERATIVE_SEARCH_OFF,
119+
HNSW_ITERATIVE_SEARCH_RELAXED,
120+
HNSW_ITERATIVE_SEARCH_STRICT
121+
}HnswIterativeSearchType;
122+
114123
typedefstructHnswElementDataHnswElementData;
115124
typedefstructHnswNeighborArrayHnswNeighborArray;
116125

@@ -132,6 +141,7 @@ struct HnswElementData
132141
uint8heaptidsLength;
133142
uint8level;
134143
uint8deleted;
144+
uint8version;
135145
uint32hash;
136146
HnswNeighborsPtrneighbors;
137147
BlockNumberblkno;
@@ -319,10 +329,10 @@ typedef struct HnswElementTupleData
319329
uint8type;
320330
uint8level;
321331
uint8deleted;
322-
uint8unused;
332+
uint8version;
323333
ItemPointerDataheaptids[HNSW_HEAPTIDS];
324334
ItemPointerDataneighbortid;
325-
uint16unused2;
335+
uint16unused;
326336
Vectordata;
327337
}HnswElementTupleData;
328338

@@ -331,7 +341,7 @@ typedef HnswElementTupleData * HnswElementTuple;
331341
typedefstructHnswNeighborTupleData
332342
{
333343
uint8type;
334-
uint8unused;
344+
uint8version;
335345
uint16count;
336346
ItemPointerDataindextids[FLEXIBLE_ARRAY_MEMBER];
337347
}HnswNeighborTupleData;
@@ -356,6 +366,12 @@ typedef struct HnswScanOpaqueData
356366
constHnswTypeInfo*typeInfo;
357367
boolfirst;
358368
List*w;
369+
visited_hashv;
370+
pairingheap*discarded;
371+
HnswQueryq;
372+
intm;
373+
int64tuples;
374+
doublepreviousDistance;
359375
MemoryContexttmpCtx;
360376

361377
/* Support functions */
@@ -399,7 +415,7 @@ boolHnswCheckNorm(HnswSupport * support, Datum value);
399415
BufferHnswNewBuffer(Relationindex,ForkNumberforkNum);
400416
voidHnswInitPage(Bufferbuf,Pagepage);
401417
voidHnswInit(void);
402-
List*HnswSearchLayer(char*base,HnswQuery*q,List*ep,intef,intlc,Relationindex,HnswSupport*support,intm,boolinserting,HnswElementskipElement);
418+
List*HnswSearchLayer(char*base,HnswQuery*q,List*ep,intef,intlc,Relationindex,HnswSupport*support,intm,boolinserting,HnswElementskipElement,visited_hash*v,pairingheap**discarded,boolinitVisited,int64*tuples);
403419
HnswElementHnswGetEntryPoint(Relationindex);
404420
voidHnswGetMetaPageInfo(Relationindex,int*m,HnswElement*entryPoint);
405421
void*HnswAlloc(HnswAllocator*allocator,Sizesize);

‎src/hnswinsert.c‎

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ GetInsertPage(Relation index)
3636
* Check for a free offset
3737
*/
3838
staticbool
39-
HnswFreeOffset(Relationindex,Bufferbuf,Pagepage,HnswElementelement,SizeetupSize,SizentupSize,Buffer*nbuf,Page*npage,OffsetNumber*freeOffno,OffsetNumber*freeNeighborOffno,BlockNumber*newInsertPage)
39+
HnswFreeOffset(Relationindex,Bufferbuf,Pagepage,HnswElementelement,SizeetupSize,SizentupSize,Buffer*nbuf,Page*npage,OffsetNumber*freeOffno,OffsetNumber*freeNeighborOffno,BlockNumber*newInsertPage,uint8*tupleVersion)
4040
{
4141
OffsetNumberoffno;
4242
OffsetNumbermaxoffno=PageGetMaxOffsetNumber(page);
@@ -98,6 +98,7 @@ HnswFreeOffset(Relation index, Buffer buf, Page page, HnswElement element, Size
9898
{
9999
*freeOffno=offno;
100100
*freeNeighborOffno=neighborOffno;
101+
*tupleVersion=etup->version;
101102
return true;
102103
}
103104
elseif (*nbuf!=buf)
@@ -153,6 +154,7 @@ AddElementOnDisk(Relation index, HnswElement e, int m, BlockNumber insertPage, B
153154
OffsetNumberfreeOffno=InvalidOffsetNumber;
154155
OffsetNumberfreeNeighborOffno=InvalidOffsetNumber;
155156
BlockNumbernewInsertPage=InvalidBlockNumber;
157+
uint8tupleVersion;
156158
char*base=NULL;
157159

158160
/* Calculate sizes */
@@ -202,7 +204,7 @@ AddElementOnDisk(Relation index, HnswElement e, int m, BlockNumber insertPage, B
202204
}
203205

204206
/* Next, try space from a deleted element */
205-
if (HnswFreeOffset(index,buf,page,e,etupSize,ntupSize,&nbuf,&npage,&freeOffno,&freeNeighborOffno,&newInsertPage))
207+
if (HnswFreeOffset(index,buf,page,e,etupSize,ntupSize,&nbuf,&npage,&freeOffno,&freeNeighborOffno,&newInsertPage,&tupleVersion))
206208
{
207209
if (nbuf!=buf)
208210
{
@@ -212,6 +214,10 @@ AddElementOnDisk(Relation index, HnswElement e, int m, BlockNumber insertPage, B
212214
npage=GenericXLogRegisterBuffer(state,nbuf,0);
213215
}
214216

217+
/* Set tuple version */
218+
etup->version=tupleVersion;
219+
ntup->version=tupleVersion;
220+
215221
break;
216222
}
217223

‎src/hnswscan.c‎

Lines changed: 131 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#include"postgres.h"
22

3+
#include<math.h>
4+
35
#include"access/relscan.h"
46
#include"hnsw.h"
57
#include"pgstat.h"
@@ -21,25 +23,57 @@ GetScanItems(IndexScanDesc scan, Datum value)
2123
intm;
2224
HnswElemententryPoint;
2325
char*base=NULL;
24-
HnswQueryq;
25-
26-
q.value=value;
26+
HnswQuery*q=&so->q;
2727

2828
/* Get m and entry point */
2929
HnswGetMetaPageInfo(index,&m,&entryPoint);
3030

31+
q->value=value;
32+
so->m=m;
33+
3134
if (entryPoint==NULL)
3235
returnNIL;
3336

34-
ep=list_make1(HnswEntryCandidate(base,entryPoint,&q,index,support, false));
37+
ep=list_make1(HnswEntryCandidate(base,entryPoint,q,index,support, false));
3538

3639
for (intlc=entryPoint->level;lc >=1;lc--)
3740
{
38-
w=HnswSearchLayer(base,&q,ep,1,lc,index,support,m, false,NULL);
41+
w=HnswSearchLayer(base,q,ep,1,lc,index,support,m, false,NULL,NULL,NULL, true,NULL);
3942
ep=w;
4043
}
4144

42-
returnHnswSearchLayer(base,&q,ep,hnsw_ef_search,0,index,support,m, false,NULL);
45+
returnHnswSearchLayer(base,q,ep,hnsw_ef_search,0,index,support,m, false,NULL,&so->v,hnsw_iterative_search!=HNSW_ITERATIVE_SEARCH_OFF ?&so->discarded :NULL, true,&so->tuples);
46+
}
47+
48+
/*
49+
* Resume scan at ground level with discarded candidates
50+
*/
51+
staticList*
52+
ResumeScanItems(IndexScanDescscan)
53+
{
54+
HnswScanOpaqueso= (HnswScanOpaque)scan->opaque;
55+
Relationindex=scan->indexRelation;
56+
List*ep=NIL;
57+
char*base=NULL;
58+
intbatch_size=hnsw_ef_search;
59+
60+
if (pairingheap_is_empty(so->discarded))
61+
returnNIL;
62+
63+
/* Get next batch of candidates */
64+
for (inti=0;i<batch_size;i++)
65+
{
66+
HnswSearchCandidate*sc;
67+
68+
if (pairingheap_is_empty(so->discarded))
69+
break;
70+
71+
sc=HnswGetSearchCandidate(w_node,pairingheap_remove_first(so->discarded));
72+
73+
ep=lappend(ep,sc);
74+
}
75+
76+
returnHnswSearchLayer(base,&so->q,ep,batch_size,0,index,&so->support,so->m, false,NULL,&so->v,&so->discarded, false,&so->tuples);
4377
}
4478

4579
/*
@@ -83,6 +117,8 @@ hnswbeginscan(Relation index, int nkeys, int norderbys)
83117
so= (HnswScanOpaque)palloc(sizeof(HnswScanOpaqueData));
84118
so->typeInfo=HnswGetTypeInfo(index);
85119
so->first= true;
120+
so->v.tids=NULL;
121+
so->discarded=NULL;
86122
so->tmpCtx=AllocSetContextCreate(CurrentMemoryContext,
87123
"Hnsw scan temporary context",
88124
ALLOCSET_DEFAULT_SIZES);
@@ -103,7 +139,15 @@ hnswrescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int no
103139
{
104140
HnswScanOpaqueso= (HnswScanOpaque)scan->opaque;
105141

142+
if (so->v.tids!=NULL)
143+
tidhash_reset(so->v.tids);
144+
145+
if (so->discarded!=NULL)
146+
pairingheap_reset(so->discarded);
147+
106148
so->first= true;
149+
so->tuples=0;
150+
so->previousDistance=-INFINITY;
107151
MemoryContextReset(so->tmpCtx);
108152

109153
if (keys&&scan->numberOfKeys>0)
@@ -165,22 +209,100 @@ hnswgettuple(IndexScanDesc scan, ScanDirection dir)
165209
#endif
166210
}
167211

168-
while (list_length(so->w)>0)
212+
for (;;)
169213
{
170214
char*base=NULL;
171-
HnswSearchCandidate*sc=llast(so->w);
172-
HnswElementelement=HnswPtrAccess(base,sc->element);
215+
HnswSearchCandidate*sc;
216+
HnswElementelement;
173217
ItemPointerheaptid;
174218

219+
if (list_length(so->w)==0)
220+
{
221+
if (hnsw_iterative_search==HNSW_ITERATIVE_SEARCH_OFF)
222+
break;
223+
224+
/* Empty index */
225+
if (so->discarded==NULL)
226+
break;
227+
228+
/* Reached max number of additional tuples */
229+
if (hnsw_iterative_search_max_tuples!=-1&&so->tuples >=hnsw_iterative_search_max_tuples)
230+
{
231+
if (pairingheap_is_empty(so->discarded))
232+
break;
233+
234+
/* Return remaining tuples */
235+
so->w=lappend(so->w,HnswGetSearchCandidate(w_node,pairingheap_remove_first(so->discarded)));
236+
}
237+
/* Prevent scans from consuming too much memory */
238+
elseif (MemoryContextMemAllocated(so->tmpCtx, false)> (Size)work_mem*1024L)
239+
{
240+
if (pairingheap_is_empty(so->discarded))
241+
{
242+
ereport(DEBUG1,
243+
(errmsg("hnsw index scan exceeded work_mem after "INT64_FORMAT" tuples",so->tuples),
244+
errhint("Increase work_mem to scan more tuples.")));
245+
246+
break;
247+
}
248+
249+
/* Return remaining tuples */
250+
so->w=lappend(so->w,HnswGetSearchCandidate(w_node,pairingheap_remove_first(so->discarded)));
251+
}
252+
else
253+
{
254+
/*
255+
* Locking ensures when neighbors are read, the elements they
256+
* reference will not be deleted (and replaced) during the
257+
* iteration.
258+
*
259+
* Elements loaded into memory on previous iterations may have
260+
* been deleted (and replaced), so when reading neighbors, the
261+
* element version must be checked.
262+
*/
263+
LockPage(scan->indexRelation,HNSW_SCAN_LOCK,ShareLock);
264+
265+
so->w=ResumeScanItems(scan);
266+
267+
UnlockPage(scan->indexRelation,HNSW_SCAN_LOCK,ShareLock);
268+
269+
#if defined(HNSW_MEMORY)
270+
elog(INFO,"memory: %zu KB",MemoryContextMemAllocated(so->tmpCtx, false) /1024);
271+
#endif
272+
}
273+
274+
if (list_length(so->w)==0)
275+
break;
276+
}
277+
278+
sc=llast(so->w);
279+
element=HnswPtrAccess(base,sc->element);
280+
175281
/* Move to next element if no valid heap TIDs */
176282
if (element->heaptidsLength==0)
177283
{
178284
so->w=list_delete_last(so->w);
285+
286+
/* Mark memory as free for next iteration */
287+
if (hnsw_iterative_search!=HNSW_ITERATIVE_SEARCH_OFF)
288+
{
289+
pfree(element);
290+
pfree(sc);
291+
}
292+
179293
continue;
180294
}
181295

182296
heaptid=&element->heaptids[--element->heaptidsLength];
183297

298+
if (hnsw_iterative_search==HNSW_ITERATIVE_SEARCH_STRICT)
299+
{
300+
if (sc->distance<so->previousDistance)
301+
continue;
302+
303+
so->previousDistance=sc->distance;
304+
}
305+
184306
MemoryContextSwitchTo(oldCtx);
185307

186308
scan->xs_heaptid=*heaptid;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp