@@ -188,6 +188,37 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan)
188188pscan ,flags );
189189}
190190
191+ TableScanDesc
192+ table_beginscan_parallel_tidrange (Relation relation ,
193+ ParallelTableScanDesc pscan )
194+ {
195+ Snapshot snapshot ;
196+ uint32 flags = SO_TYPE_TIDRANGESCAN |SO_ALLOW_PAGEMODE ;
197+ TableScanDesc sscan ;
198+
199+ Assert (RelFileLocatorEquals (relation -> rd_locator ,pscan -> phs_locator ));
200+
201+ /* disable syncscan in parallel tid range scan. */
202+ pscan -> phs_syncscan = false;
203+
204+ if (!pscan -> phs_snapshot_any )
205+ {
206+ /* Snapshot was serialized -- restore it */
207+ snapshot = RestoreSnapshot ((char * )pscan + pscan -> phs_snapshot_off );
208+ RegisterSnapshot (snapshot );
209+ flags |=SO_TEMP_SNAPSHOT ;
210+ }
211+ else
212+ {
213+ /* SnapshotAny passed by caller (not serialized) */
214+ snapshot = SnapshotAny ;
215+ }
216+
217+ sscan = relation -> rd_tableam -> scan_begin (relation ,snapshot ,0 ,NULL ,
218+ pscan ,flags );
219+ return sscan ;
220+ }
221+
191222
192223/* ----------------------------------------------------------------------------
193224 * Index scan related functions.
@@ -398,6 +429,7 @@ table_block_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan)
398429bpscan -> phs_nblocks > NBuffers /4 ;
399430SpinLockInit (& bpscan -> phs_mutex );
400431bpscan -> phs_startblock = InvalidBlockNumber ;
432+ bpscan -> phs_numblock = InvalidBlockNumber ;
401433pg_atomic_init_u64 (& bpscan -> phs_nallocated ,0 );
402434
403435return sizeof (ParallelBlockTableScanDescData );
@@ -416,57 +448,59 @@ table_block_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
416448 *
417449 * Determine where the parallel seq scan should start. This function may be
418450 * called many times, once by each parallel worker. We must be careful only
419- * to set the startblock once.
451+ * to set the phs_startblock and phs_numblock fields once.
452+ *
453+ * Callers may optionally specify a non-InvalidBlockNumber value for
454+ * 'startblock' to force the scan to start at the given page. Likewise,
455+ * 'numblocks' can be specified as a non-InvalidBlockNumber to limit the
456+ * number of blocks to scan to that many blocks.
420457 */
421458void
422459table_block_parallelscan_startblock_init (Relation rel ,
423460ParallelBlockTableScanWorker pbscanwork ,
424- ParallelBlockTableScanDesc pbscan )
461+ ParallelBlockTableScanDesc pbscan ,
462+ BlockNumber startblock ,
463+ BlockNumber numblocks )
425464{
426465BlockNumber sync_startpage = InvalidBlockNumber ;
466+ BlockNumber scan_nblocks ;
427467
428468/* Reset the state we use for controlling allocation size. */
429469memset (pbscanwork ,0 ,sizeof (* pbscanwork ));
430470
431471StaticAssertStmt (MaxBlockNumber <=0xFFFFFFFE ,
432472"pg_nextpower2_32 may be too small for non-standard BlockNumber width" );
433473
434- /*
435- * We determine the chunk size based on the size of the relation. First we
436- * split the relation into PARALLEL_SEQSCAN_NCHUNKS chunks but we then
437- * take the next highest power of 2 number of the chunk size. This means
438- * we split the relation into somewhere between PARALLEL_SEQSCAN_NCHUNKS
439- * and PARALLEL_SEQSCAN_NCHUNKS / 2 chunks.
440- */
441- pbscanwork -> phsw_chunk_size = pg_nextpower2_32 (Max (pbscan -> phs_nblocks /
442- PARALLEL_SEQSCAN_NCHUNKS ,1 ));
443-
444- /*
445- * Ensure we don't go over the maximum chunk size with larger tables. This
446- * means we may get much more than PARALLEL_SEQSCAN_NCHUNKS for larger
447- * tables. Too large a chunk size has been shown to be detrimental to
448- * synchronous scan performance.
449- */
450- pbscanwork -> phsw_chunk_size = Min (pbscanwork -> phsw_chunk_size ,
451- PARALLEL_SEQSCAN_MAX_CHUNK_SIZE );
452-
453474retry :
454475/* Grab the spinlock. */
455476SpinLockAcquire (& pbscan -> phs_mutex );
456477
457478/*
458- * If the scan's startblock has not yet been initialized, we must do so
459- * now. If this is not a synchronized scan, we just start at block 0, but
460- * if it is a synchronized scan, we must get the starting position from
461- * the synchronized scan machinery. We can't hold the spinlock while
462- * doing that, though, so release the spinlock, get the information we
463- * need, and retry. If nobody else has initialized the scan in the
464- * meantime, we'll fill in the value we fetched on the second time
465- * through.
479+ * When the caller specified a limit on the number of blocks to scan, set
480+ * that in the ParallelBlockTableScanDesc, if it's not been done by
481+ * another worker already.
482+ */
483+ if (numblocks != InvalidBlockNumber &&
484+ pbscan -> phs_numblock == InvalidBlockNumber )
485+ {
486+ pbscan -> phs_numblock = numblocks ;
487+ }
488+
489+ /*
490+ * If the scan's phs_startblock has not yet been initialized, we must do
491+ * so now. If a startblock was specified, start there, otherwise if this
492+ * is not a synchronized scan, we just start at block 0, but if it is a
493+ * synchronized scan, we must get the starting position from the
494+ * synchronized scan machinery. We can't hold the spinlock while doing
495+ * that, though, so release the spinlock, get the information we need, and
496+ * retry. If nobody else has initialized the scan in the meantime, we'll
497+ * fill in the value we fetched on the second time through.
466498 */
467499if (pbscan -> phs_startblock == InvalidBlockNumber )
468500{
469- if (!pbscan -> base .phs_syncscan )
501+ if (startblock != InvalidBlockNumber )
502+ pbscan -> phs_startblock = startblock ;
503+ else if (!pbscan -> base .phs_syncscan )
470504pbscan -> phs_startblock = 0 ;
471505else if (sync_startpage != InvalidBlockNumber )
472506pbscan -> phs_startblock = sync_startpage ;
@@ -478,6 +512,34 @@ table_block_parallelscan_startblock_init(Relation rel,
478512}
479513}
480514SpinLockRelease (& pbscan -> phs_mutex );
515+
516+ /*
517+ * Figure out how many blocks we're going to scan; either all of them, or
518+ * just phs_numblock's worth, if a limit has been imposed.
519+ */
520+ if (pbscan -> phs_numblock == InvalidBlockNumber )
521+ scan_nblocks = pbscan -> phs_nblocks ;
522+ else
523+ scan_nblocks = pbscan -> phs_numblock ;
524+
525+ /*
526+ * We determine the chunk size based on scan_nblocks. First we split
527+ * scan_nblocks into PARALLEL_SEQSCAN_NCHUNKS chunks then we calculate the
528+ * next highest power of 2 number of the result. This means we split the
529+ * blocks we're scanning into somewhere between PARALLEL_SEQSCAN_NCHUNKS
530+ * and PARALLEL_SEQSCAN_NCHUNKS / 2 chunks.
531+ */
532+ pbscanwork -> phsw_chunk_size = pg_nextpower2_32 (Max (scan_nblocks /
533+ PARALLEL_SEQSCAN_NCHUNKS ,1 ));
534+
535+ /*
536+ * Ensure we don't go over the maximum chunk size with larger tables. This
537+ * means we may get much more than PARALLEL_SEQSCAN_NCHUNKS for larger
538+ * tables. Too large a chunk size has been shown to be detrimental to
539+ * sequential scan performance.
540+ */
541+ pbscanwork -> phsw_chunk_size = Min (pbscanwork -> phsw_chunk_size ,
542+ PARALLEL_SEQSCAN_MAX_CHUNK_SIZE );
481543}
482544
483545/*
@@ -493,6 +555,7 @@ table_block_parallelscan_nextpage(Relation rel,
493555ParallelBlockTableScanWorker pbscanwork ,
494556ParallelBlockTableScanDesc pbscan )
495557{
558+ BlockNumber scan_nblocks ;
496559BlockNumber page ;
497560uint64 nallocated ;
498561
@@ -513,7 +576,7 @@ table_block_parallelscan_nextpage(Relation rel,
513576 *
514577 * Here we name these ranges of blocks "chunks". The initial size of
515578 * these chunks is determined in table_block_parallelscan_startblock_init
516- * based on thesize ofthe relation . Towards the end of the scan, we
579+ * based on thenumber ofblocks to scan . Towards the end of the scan, we
517580 * start making reductions in the size of the chunks in order to attempt
518581 * to divide the remaining work over all the workers as evenly as
519582 * possible.
@@ -530,17 +593,23 @@ table_block_parallelscan_nextpage(Relation rel,
530593 * phs_nallocated counter will exceed rs_nblocks, because workers will
531594 * still increment the value, when they try to allocate the next block but
532595 * all blocks have been allocated already. The counter must be 64 bits
533- * wide because of that, to avoid wrapping around whenrs_nblocks is close
534- * to 2^32.
596+ * wide because of that, to avoid wrapping around whenscan_nblocks is
597+ *close to 2^32.
535598 *
536599 * The actual block to return is calculated by adding the counter to the
537- * starting block number, modulonblocks .
600+ * starting block number, modulophs_nblocks .
538601 */
539602
603+ /* First, figure out how many blocks we're planning on scanning */
604+ if (pbscan -> phs_numblock == InvalidBlockNumber )
605+ scan_nblocks = pbscan -> phs_nblocks ;
606+ else
607+ scan_nblocks = pbscan -> phs_numblock ;
608+
540609/*
541- *First check if we have any remaining blocks in a previous chunk for
542- *this worker. We must consume all of the blocks from that before we
543- *allocate a new chunk to the worker.
610+ *Now check if we have any remaining blocks in a previous chunk for this
611+ * worker. We must consume all of the blocks from that before we allocate
612+ * a new chunk to the worker.
544613 */
545614if (pbscanwork -> phsw_chunk_remaining > 0 )
546615{
@@ -562,7 +631,7 @@ table_block_parallelscan_nextpage(Relation rel,
562631 * chunk size set to 1.
563632 */
564633if (pbscanwork -> phsw_chunk_size > 1 &&
565- pbscanwork -> phsw_nallocated > pbscan -> phs_nblocks -
634+ pbscanwork -> phsw_nallocated > scan_nblocks -
566635(pbscanwork -> phsw_chunk_size * PARALLEL_SEQSCAN_RAMPDOWN_CHUNKS ))
567636pbscanwork -> phsw_chunk_size >>=1 ;
568637
@@ -577,7 +646,8 @@ table_block_parallelscan_nextpage(Relation rel,
577646pbscanwork -> phsw_chunk_remaining = pbscanwork -> phsw_chunk_size - 1 ;
578647}
579648
580- if (nallocated >=pbscan -> phs_nblocks )
649+ /* Check if we've run out of blocks to scan */
650+ if (nallocated >=scan_nblocks )
581651page = InvalidBlockNumber ;/* all blocks have been allocated */
582652else
583653page = (nallocated + pbscan -> phs_startblock ) %pbscan -> phs_nblocks ;