Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit569174f

Browse files
committed
btree: Support parallel index scans.
This isn't exposed to the optimizer or the executor yet; we'll addsupport for those things in a separate patch. But this puts thebasic mechanism in place: several processes can attach to a parallelbtree index scan, and each one will get a subset of the tuples thatwould have been produced by a non-parallel scan. Each index pagebecomes the responsibility of a single worker, which then returnsall of the TIDs on that page.Rahila Syed, Amit Kapila, Robert Haas, reviewed and tested byAnastasia Lubennikova, Tushar Ahuja, and Haribabu Kommi.
1 parent8569955 commit569174f

File tree

8 files changed

+527
-50
lines changed

8 files changed

+527
-50
lines changed

‎doc/src/sgml/monitoring.sgml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1207,14 +1207,18 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
12071207
<entry>Waiting in an extension.</entry>
12081208
</row>
12091209
<row>
1210-
<entry morerows="9"><literal>IPC</></entry>
1210+
<entry morerows="10"><literal>IPC</></entry>
12111211
<entry><literal>BgWorkerShutdown</></entry>
12121212
<entry>Waiting for background worker to shut down.</entry>
12131213
</row>
12141214
<row>
12151215
<entry><literal>BgWorkerStartup</></entry>
12161216
<entry>Waiting for background worker to start up.</entry>
12171217
</row>
1218+
<row>
1219+
<entry><literal>BtreePage</></entry>
1220+
<entry>Waiting for the page number needed to continue a parallel btree scan to become available.</entry>
1221+
</row>
12181222
<row>
12191223
<entry><literal>ExecuteGather</></entry>
12201224
<entry>Waiting for activity from child process when executing <literal>Gather</> node.</entry>

‎src/backend/access/nbtree/nbtree.c

Lines changed: 256 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
#include"access/xlog.h"
2424
#include"catalog/index.h"
2525
#include"commands/vacuum.h"
26+
#include"pgstat.h"
27+
#include"storage/condition_variable.h"
2628
#include"storage/indexfsm.h"
2729
#include"storage/ipc.h"
2830
#include"storage/lmgr.h"
@@ -63,6 +65,45 @@ typedef struct
6365
MemoryContextpagedelcontext;
6466
}BTVacState;
6567

68+
/*
69+
* BTPARALLEL_NOT_INITIALIZED indicates that the scan has not started.
70+
*
71+
* BTPARALLEL_ADVANCING indicates that some process is advancing the scan to
72+
* a new page; others must wait.
73+
*
74+
* BTPARALLEL_IDLE indicates that no backend is currently advancing the scan
75+
* to a new page; some process can start doing that.
76+
*
77+
* BTPARALLEL_DONE indicates that the scan is complete (including error exit).
78+
* We reach this state once for every distinct combination of array keys.
79+
*/
80+
typedefenum
81+
{
82+
BTPARALLEL_NOT_INITIALIZED,
83+
BTPARALLEL_ADVANCING,
84+
BTPARALLEL_IDLE,
85+
BTPARALLEL_DONE
86+
}BTPS_State;
87+
88+
/*
89+
* BTParallelScanDescData contains btree specific shared information required
90+
* for parallel scan.
91+
*/
92+
typedefstructBTParallelScanDescData
93+
{
94+
BlockNumberbtps_scanPage;/* latest or next page to be scanned */
95+
BTPS_Statebtps_pageStatus;/* indicates whether next page is available
96+
* for scan. see above for possible states of
97+
* parallel scan. */
98+
intbtps_arrayKeyCount;/* count indicating number of array
99+
* scan keys processed by parallel
100+
* scan */
101+
slock_tbtps_mutex;/* protects above variables */
102+
ConditionVariablebtps_cv;/* used to synchronize parallel scan */
103+
}BTParallelScanDescData;
104+
105+
typedefstructBTParallelScanDescData*BTParallelScanDesc;
106+
66107

67108
staticvoidbtbuildCallback(Relationindex,
68109
HeapTuplehtup,
@@ -118,9 +159,9 @@ bthandler(PG_FUNCTION_ARGS)
118159
amroutine->amendscan=btendscan;
119160
amroutine->ammarkpos=btmarkpos;
120161
amroutine->amrestrpos=btrestrpos;
121-
amroutine->amestimateparallelscan=NULL;
122-
amroutine->aminitparallelscan=NULL;
123-
amroutine->amparallelrescan=NULL;
162+
amroutine->amestimateparallelscan=btestimateparallelscan;
163+
amroutine->aminitparallelscan=btinitparallelscan;
164+
amroutine->amparallelrescan=btparallelrescan;
124165

125166
PG_RETURN_POINTER(amroutine);
126167
}
@@ -491,6 +532,7 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
491532
}
492533

493534
so->markItemIndex=-1;
535+
so->arrayKeyCount=0;
494536
BTScanPosUnpinIfPinned(so->markPos);
495537
BTScanPosInvalidate(so->markPos);
496538

@@ -652,6 +694,217 @@ btrestrpos(IndexScanDesc scan)
652694
}
653695
}
654696

697+
/*
698+
* btestimateparallelscan -- estimate storage for BTParallelScanDescData
699+
*/
700+
Size
701+
btestimateparallelscan(void)
702+
{
703+
returnsizeof(BTParallelScanDescData);
704+
}
705+
706+
/*
707+
* btinitparallelscan -- initialize BTParallelScanDesc for parallel btree scan
708+
*/
709+
void
710+
btinitparallelscan(void*target)
711+
{
712+
BTParallelScanDescbt_target= (BTParallelScanDesc)target;
713+
714+
SpinLockInit(&bt_target->btps_mutex);
715+
bt_target->btps_scanPage=InvalidBlockNumber;
716+
bt_target->btps_pageStatus=BTPARALLEL_NOT_INITIALIZED;
717+
bt_target->btps_arrayKeyCount=0;
718+
ConditionVariableInit(&bt_target->btps_cv);
719+
}
720+
721+
/*
722+
*btparallelrescan() -- reset parallel scan
723+
*/
724+
void
725+
btparallelrescan(IndexScanDescscan)
726+
{
727+
BTParallelScanDescbtscan;
728+
ParallelIndexScanDescparallel_scan=scan->parallel_scan;
729+
730+
Assert(parallel_scan);
731+
732+
btscan= (BTParallelScanDesc)OffsetToPointer((void*)parallel_scan,
733+
parallel_scan->ps_offset);
734+
735+
/*
736+
* In theory, we don't need to acquire the spinlock here, because there
737+
* shouldn't be any other workers running at this point, but we do so for
738+
* consistency.
739+
*/
740+
SpinLockAcquire(&btscan->btps_mutex);
741+
btscan->btps_scanPage=InvalidBlockNumber;
742+
btscan->btps_pageStatus=BTPARALLEL_NOT_INITIALIZED;
743+
btscan->btps_arrayKeyCount=0;
744+
SpinLockRelease(&btscan->btps_mutex);
745+
}
746+
747+
/*
748+
* _bt_parallel_seize() -- Begin the process of advancing the scan to a new
749+
*page. Other scans must wait until we call bt_parallel_release() or
750+
*bt_parallel_done().
751+
*
752+
* The return value is true if we successfully seized the scan and false
753+
* if we did not. The latter case occurs if no pages remain for the current
754+
* set of scankeys.
755+
*
756+
* If the return value is true, *pageno returns the next or current page
757+
* of the scan (depending on the scan direction). An invalid block number
758+
* means the scan hasn't yet started, and P_NONE means we've reached the end.
759+
* The first time a participating process reaches the last page, it will return
760+
* true and set *pageno to P_NONE; after that, further attempts to seize the
761+
* scan will return false.
762+
*
763+
* Callers should ignore the value of pageno if the return value is false.
764+
*/
765+
bool
766+
_bt_parallel_seize(IndexScanDescscan,BlockNumber*pageno)
767+
{
768+
BTScanOpaqueso= (BTScanOpaque)scan->opaque;
769+
BTPS_StatepageStatus;
770+
boolexit_loop= false;
771+
boolstatus= true;
772+
ParallelIndexScanDescparallel_scan=scan->parallel_scan;
773+
BTParallelScanDescbtscan;
774+
775+
*pageno=P_NONE;
776+
777+
btscan= (BTParallelScanDesc)OffsetToPointer((void*)parallel_scan,
778+
parallel_scan->ps_offset);
779+
780+
while (1)
781+
{
782+
SpinLockAcquire(&btscan->btps_mutex);
783+
pageStatus=btscan->btps_pageStatus;
784+
785+
if (so->arrayKeyCount<btscan->btps_arrayKeyCount)
786+
{
787+
/* Parallel scan has already advanced to a new set of scankeys. */
788+
status= false;
789+
}
790+
elseif (pageStatus==BTPARALLEL_DONE)
791+
{
792+
/*
793+
* We're done with this set of scankeys. This may be the end, or
794+
* there could be more sets to try.
795+
*/
796+
status= false;
797+
}
798+
elseif (pageStatus!=BTPARALLEL_ADVANCING)
799+
{
800+
/*
801+
* We have successfully seized control of the scan for the purpose
802+
* of advancing it to a new page!
803+
*/
804+
btscan->btps_pageStatus=BTPARALLEL_ADVANCING;
805+
*pageno=btscan->btps_scanPage;
806+
exit_loop= true;
807+
}
808+
SpinLockRelease(&btscan->btps_mutex);
809+
if (exit_loop|| !status)
810+
break;
811+
ConditionVariableSleep(&btscan->btps_cv,WAIT_EVENT_BTREE_PAGE);
812+
}
813+
ConditionVariableCancelSleep();
814+
815+
returnstatus;
816+
}
817+
818+
/*
819+
* _bt_parallel_release() -- Complete the process of advancing the scan to a
820+
*new page. We now have the new value btps_scanPage; some other backend
821+
*can now begin advancing the scan.
822+
*/
823+
void
824+
_bt_parallel_release(IndexScanDescscan,BlockNumberscan_page)
825+
{
826+
ParallelIndexScanDescparallel_scan=scan->parallel_scan;
827+
BTParallelScanDescbtscan;
828+
829+
btscan= (BTParallelScanDesc)OffsetToPointer((void*)parallel_scan,
830+
parallel_scan->ps_offset);
831+
832+
SpinLockAcquire(&btscan->btps_mutex);
833+
btscan->btps_scanPage=scan_page;
834+
btscan->btps_pageStatus=BTPARALLEL_IDLE;
835+
SpinLockRelease(&btscan->btps_mutex);
836+
ConditionVariableSignal(&btscan->btps_cv);
837+
}
838+
839+
/*
840+
* _bt_parallel_done() -- Mark the parallel scan as complete.
841+
*
842+
* When there are no pages left to scan, this function should be called to
843+
* notify other workers. Otherwise, they might wait forever for the scan to
844+
* advance to the next page.
845+
*/
846+
void
847+
_bt_parallel_done(IndexScanDescscan)
848+
{
849+
BTScanOpaqueso= (BTScanOpaque)scan->opaque;
850+
ParallelIndexScanDescparallel_scan=scan->parallel_scan;
851+
BTParallelScanDescbtscan;
852+
boolstatus_changed= false;
853+
854+
/* Do nothing, for non-parallel scans */
855+
if (parallel_scan==NULL)
856+
return;
857+
858+
btscan= (BTParallelScanDesc)OffsetToPointer((void*)parallel_scan,
859+
parallel_scan->ps_offset);
860+
861+
/*
862+
* Mark the parallel scan as done for this combination of scan keys,
863+
* unless some other process already did so. See also
864+
* _bt_advance_array_keys.
865+
*/
866+
SpinLockAcquire(&btscan->btps_mutex);
867+
if (so->arrayKeyCount >=btscan->btps_arrayKeyCount&&
868+
btscan->btps_pageStatus!=BTPARALLEL_DONE)
869+
{
870+
btscan->btps_pageStatus=BTPARALLEL_DONE;
871+
status_changed= true;
872+
}
873+
SpinLockRelease(&btscan->btps_mutex);
874+
875+
/* wake up all the workers associated with this parallel scan */
876+
if (status_changed)
877+
ConditionVariableBroadcast(&btscan->btps_cv);
878+
}
879+
880+
/*
881+
* _bt_parallel_advance_array_keys() -- Advances the parallel scan for array
882+
*keys.
883+
*
884+
* Updates the count of array keys processed for both local and parallel
885+
* scans.
886+
*/
887+
void
888+
_bt_parallel_advance_array_keys(IndexScanDescscan)
889+
{
890+
BTScanOpaqueso= (BTScanOpaque)scan->opaque;
891+
ParallelIndexScanDescparallel_scan=scan->parallel_scan;
892+
BTParallelScanDescbtscan;
893+
894+
btscan= (BTParallelScanDesc)OffsetToPointer((void*)parallel_scan,
895+
parallel_scan->ps_offset);
896+
897+
so->arrayKeyCount++;
898+
SpinLockAcquire(&btscan->btps_mutex);
899+
if (btscan->btps_pageStatus==BTPARALLEL_DONE)
900+
{
901+
btscan->btps_scanPage=InvalidBlockNumber;
902+
btscan->btps_pageStatus=BTPARALLEL_NOT_INITIALIZED;
903+
btscan->btps_arrayKeyCount++;
904+
}
905+
SpinLockRelease(&btscan->btps_mutex);
906+
}
907+
655908
/*
656909
* Bulk deletion of all index entries pointing to a set of heap tuples.
657910
* The set of target tuples is specified via a callback routine that tells

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp