Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit244407a

Browse files
committed
Fix efficiency problems in tuplestore_trim().
The original coding in tuplestore_trim() was only meant to work efficientlyin cases where each trim call deleted most of the tuples in the store.Which, in fact, was the pattern of the original usage with a Material nodesupporting mark/restore operations underneath a MergeJoin. However,WindowAgg now uses tuplestores and it has considerably less friendlytrimming behavior. In particular it can attempt to trim one tuple at atime off a large tuplestore. tuplestore_trim() had O(N^2) runtime in thissituation because of repeatedly shifting its tuple pointer array. Fix byavoiding shifting the array until a reasonably large number of tuples havebeen deleted. This can waste some pointer space, but we do still reclaimthe tuples themselves, so the percentage wastage should be pretty small.Per Jie Li's report of slow percent_rank() evaluation. cume_dist() andntile() would certainly be affected as well, along with any other windowfunction that has a moving frame start and requires reading substantiallyahead of the current row.Back-patch to 8.4, where window functions were introduced. There's noneed to tweak it before that.
1 parent663fc32 commit244407a

File tree

1 file changed

+36
-16
lines changed

1 file changed

+36
-16
lines changed

‎src/backend/utils/sort/tuplestore.c

Lines changed: 36 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,15 @@ struct Tuplestorestate
145145
/*
146146
* This array holds pointers to tuples in memory if we are in state INMEM.
147147
* In states WRITEFILE and READFILE it's not used.
148+
*
149+
* When memtupdeleted > 0, the first memtupdeleted pointers are already
150+
* released due to a tuplestore_trim() operation, but we haven't expended
151+
* the effort to slide the remaining pointers down. These unused pointers
152+
* are set to NULL to catch any invalid accesses. Note that memtupcount
153+
* includes the deleted pointers.
148154
*/
149155
void**memtuples;/* array of pointers to palloc'd tuples */
156+
intmemtupdeleted;/* the first N slots are currently unused */
150157
intmemtupcount;/* number of tuples currently present */
151158
intmemtupsize;/* allocated length of memtuples array */
152159

@@ -252,6 +259,7 @@ tuplestore_begin_common(int eflags, bool interXact, int maxKBytes)
252259
state->context=CurrentMemoryContext;
253260
state->resowner=CurrentResourceOwner;
254261

262+
state->memtupdeleted=0;
255263
state->memtupcount=0;
256264
state->memtupsize=1024;/* initial guess */
257265
state->memtuples= (void**)palloc(state->memtupsize*sizeof(void*));
@@ -401,14 +409,15 @@ tuplestore_clear(Tuplestorestate *state)
401409
state->myfile=NULL;
402410
if (state->memtuples)
403411
{
404-
for (i=0;i<state->memtupcount;i++)
412+
for (i=state->memtupdeleted;i<state->memtupcount;i++)
405413
{
406414
FREEMEM(state,GetMemoryChunkSpace(state->memtuples[i]));
407415
pfree(state->memtuples[i]);
408416
}
409417
}
410418
state->status=TSS_INMEM;
411419
state->truncated= false;
420+
state->memtupdeleted=0;
412421
state->memtupcount=0;
413422
readptr=state->readptrs;
414423
for (i=0;i<state->readptrcount;readptr++,i++)
@@ -432,7 +441,7 @@ tuplestore_end(Tuplestorestate *state)
432441
BufFileClose(state->myfile);
433442
if (state->memtuples)
434443
{
435-
for (i=0;i<state->memtupcount;i++)
444+
for (i=state->memtupdeleted;i<state->memtupcount;i++)
436445
pfree(state->memtuples[i]);
437446
pfree(state->memtuples);
438447
}
@@ -774,14 +783,14 @@ tuplestore_gettuple(Tuplestorestate *state, bool forward,
774783
}
775784
else
776785
{
777-
if (readptr->current <=0)
786+
if (readptr->current <=state->memtupdeleted)
778787
{
779788
Assert(!state->truncated);
780789
returnNULL;
781790
}
782791
readptr->current--;/* last returned tuple */
783792
}
784-
if (readptr->current <=0)
793+
if (readptr->current <=state->memtupdeleted)
785794
{
786795
Assert(!state->truncated);
787796
returnNULL;
@@ -969,7 +978,7 @@ dumptuples(Tuplestorestate *state)
969978
{
970979
inti;
971980

972-
for (i=0;;i++)
981+
for (i=state->memtupdeleted;;i++)
973982
{
974983
TSReadPointer*readptr=state->readptrs;
975984
intj;
@@ -984,6 +993,7 @@ dumptuples(Tuplestorestate *state)
984993
break;
985994
WRITETUP(state,state->memtuples[i]);
986995
}
996+
state->memtupdeleted=0;
987997
state->memtupcount=0;
988998
}
989999

@@ -1153,40 +1163,50 @@ tuplestore_trim(Tuplestorestate *state)
11531163
nremove=oldest-1;
11541164
if (nremove <=0)
11551165
return;/* nothing to do */
1166+
1167+
Assert(nremove >=state->memtupdeleted);
11561168
Assert(nremove <=state->memtupcount);
11571169

11581170
/* Release no-longer-needed tuples */
1159-
for (i=0;i<nremove;i++)
1171+
for (i=state->memtupdeleted;i<nremove;i++)
11601172
{
11611173
FREEMEM(state,GetMemoryChunkSpace(state->memtuples[i]));
11621174
pfree(state->memtuples[i]);
1175+
state->memtuples[i]=NULL;
11631176
}
1177+
state->memtupdeleted=nremove;
1178+
1179+
/* mark tuplestore as truncated (used for Assert crosschecks only) */
1180+
state->truncated= true;
1181+
1182+
/*
1183+
* If nremove is less than 1/8th memtupcount, just stop here, leaving the
1184+
* "deleted" slots as NULL. This prevents us from expending O(N^2) time
1185+
* repeatedly memmove-ing a large pointer array. The worst case space
1186+
* wastage is pretty small, since it's just pointers and not whole tuples.
1187+
*/
1188+
if (nremove<state->memtupcount /8)
1189+
return;
11641190

11651191
/*
1166-
* Slide the array down and readjust pointers.This may look pretty
1167-
* stupid, but we expect that there will usually not be very many
1168-
* tuple-pointers to move, so this isn't that expensive; and it keeps a
1169-
* lot of other logic simple.
1192+
* Slide the array down and readjust pointers.
11701193
*
1171-
* In fact, in the current usage for merge joins, it's demonstrable that
1172-
* there will always be exactly one non-removed tuple; so optimize that
1173-
* case.
1194+
* In mergejoin's current usage, it's demonstrable that there will always
1195+
* be exactly one non-removed tuple; so optimize that case.
11741196
*/
11751197
if (nremove+1==state->memtupcount)
11761198
state->memtuples[0]=state->memtuples[nremove];
11771199
else
11781200
memmove(state->memtuples,state->memtuples+nremove,
11791201
(state->memtupcount-nremove)*sizeof(void*));
11801202

1203+
state->memtupdeleted=0;
11811204
state->memtupcount-=nremove;
11821205
for (i=0;i<state->readptrcount;i++)
11831206
{
11841207
if (!state->readptrs[i].eof_reached)
11851208
state->readptrs[i].current-=nremove;
11861209
}
1187-
1188-
/* mark tuplestore as truncated (used for Assert crosschecks only) */
1189-
state->truncated= true;
11901210
}
11911211

11921212
/*

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp