88 *
99 *
1010 * IDENTIFICATION
11- * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.234 2007/05/30 20:11:53 tgl Exp $
11+ * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.235 2007/06/08 18:23:52 tgl Exp $
1212 *
1313 *
1414 * INTERFACE ROUTINES
@@ -78,29 +78,44 @@ initscan(HeapScanDesc scan, ScanKey key)
7878 * Determine the number of blocks we have to scan.
7979 *
8080 * It is sufficient to do this once at scan start, since any tuples added
81- * while the scan is in progress will be invisible to my transaction
82- * anyway...
81+ * while the scan is in progress will be invisible to my snapshot
82+ * anyway. (That is not true when using a non-MVCC snapshot. However,
83+ * we couldn't guarantee to return tuples added after scan start anyway,
84+ * since they might go into pages we already scanned. To guarantee
85+ * consistent results for a non-MVCC snapshot, the caller must hold some
86+ * higher-level lock that ensures the interesting tuple(s) won't change.)
8387 */
8488scan -> rs_nblocks = RelationGetNumberOfBlocks (scan -> rs_rd );
8589
8690/*
8791 * If the table is large relative to NBuffers, use a bulk-read access
88- * strategy, else use the default random-access strategy. During a
89- * rescan, don't make a new strategy object if we don't have to.
92+ * strategy and enable synchronized scanning (see syncscan.c). Although
93+ * the thresholds for these features could be different, we make them the
94+ * same so that there are only two behaviors to tune rather than four.
95+ *
96+ * During a rescan, don't make a new strategy object if we don't have to.
9097 */
9198if (scan -> rs_nblocks > NBuffers /4 &&
9299!scan -> rs_rd -> rd_istemp )
93100{
94101if (scan -> rs_strategy == NULL )
95102scan -> rs_strategy = GetAccessStrategy (BAS_BULKREAD );
103+
104+ scan -> rs_syncscan = true;
105+ scan -> rs_startblock = ss_get_location (scan -> rs_rd ,scan -> rs_nblocks );
96106}
97107else
98108{
99109if (scan -> rs_strategy != NULL )
100110FreeAccessStrategy (scan -> rs_strategy );
101111scan -> rs_strategy = NULL ;
112+
113+ scan -> rs_syncscan = false;
114+ scan -> rs_startblock = 0 ;
102115}
103116
117+ /* rs_pageatatime was set when the snapshot was filled in */
118+
104119scan -> rs_inited = false;
105120scan -> rs_ctup .t_data = NULL ;
106121ItemPointerSetInvalid (& scan -> rs_ctup .t_self );
@@ -229,6 +244,7 @@ heapgettup(HeapScanDesc scan,
229244Snapshot snapshot = scan -> rs_snapshot ;
230245bool backward = ScanDirectionIsBackward (dir );
231246BlockNumber page ;
247+ bool finished ;
232248Page dp ;
233249int lines ;
234250OffsetNumber lineoff ;
@@ -251,7 +267,7 @@ heapgettup(HeapScanDesc scan,
251267tuple -> t_data = NULL ;
252268return ;
253269}
254- page = 0 ;/* first page */
270+ page = scan -> rs_startblock ;/* first page */
255271heapgetpage (scan ,page );
256272lineoff = FirstOffsetNumber ;/* first offnum */
257273scan -> rs_inited = true;
@@ -285,7 +301,18 @@ heapgettup(HeapScanDesc scan,
285301tuple -> t_data = NULL ;
286302return ;
287303}
288- page = scan -> rs_nblocks - 1 ;/* final page */
304+ /*
305+ * Disable reporting to syncscan logic in a backwards scan; it's
306+ * not very likely anyone else is doing the same thing at the same
307+ * time, and much more likely that we'll just bollix things for
308+ * forward scanners.
309+ */
310+ scan -> rs_syncscan = false;
311+ /* start from last page of the scan */
312+ if (scan -> rs_startblock > 0 )
313+ page = scan -> rs_startblock - 1 ;
314+ else
315+ page = scan -> rs_nblocks - 1 ;
289316heapgetpage (scan ,page );
290317}
291318else
@@ -397,10 +424,43 @@ heapgettup(HeapScanDesc scan,
397424 */
398425LockBuffer (scan -> rs_cbuf ,BUFFER_LOCK_UNLOCK );
399426
427+ /*
428+ * advance to next/prior page and detect end of scan
429+ */
430+ if (backward )
431+ {
432+ finished = (page == scan -> rs_startblock );
433+ if (page == 0 )
434+ page = scan -> rs_nblocks ;
435+ page -- ;
436+ }
437+ else
438+ {
439+ page ++ ;
440+ if (page >=scan -> rs_nblocks )
441+ page = 0 ;
442+ finished = (page == scan -> rs_startblock );
443+
444+ /*
445+ * Report our new scan position for synchronization purposes.
446+ * We don't do that when moving backwards, however. That would
447+ * just mess up any other forward-moving scanners.
448+ *
449+ * Note: we do this before checking for end of scan so that the
450+ * final state of the position hint is back at the start of the
451+ * rel. That's not strictly necessary, but otherwise when you run
452+ * the same query multiple times the starting position would shift
453+ * a little bit backwards on every invocation, which is confusing.
454+ * We don't guarantee any specific ordering in general, though.
455+ */
456+ if (scan -> rs_syncscan )
457+ ss_report_location (scan -> rs_rd ,page );
458+ }
459+
400460/*
401461 * return NULL if we've exhausted all the pages
402462 */
403- if (backward ? ( page == 0 ) : ( page + 1 >= scan -> rs_nblocks ) )
463+ if (finished )
404464{
405465if (BufferIsValid (scan -> rs_cbuf ))
406466ReleaseBuffer (scan -> rs_cbuf );
@@ -411,8 +471,6 @@ heapgettup(HeapScanDesc scan,
411471return ;
412472}
413473
414- page = backward ? (page - 1 ) : (page + 1 );
415-
416474heapgetpage (scan ,page );
417475
418476LockBuffer (scan -> rs_cbuf ,BUFFER_LOCK_SHARE );
@@ -455,6 +513,7 @@ heapgettup_pagemode(HeapScanDesc scan,
455513HeapTuple tuple = & (scan -> rs_ctup );
456514bool backward = ScanDirectionIsBackward (dir );
457515BlockNumber page ;
516+ bool finished ;
458517Page dp ;
459518int lines ;
460519int lineindex ;
@@ -478,7 +537,7 @@ heapgettup_pagemode(HeapScanDesc scan,
478537tuple -> t_data = NULL ;
479538return ;
480539}
481- page = 0 ;/* first page */
540+ page = scan -> rs_startblock ;/* first page */
482541heapgetpage (scan ,page );
483542lineindex = 0 ;
484543scan -> rs_inited = true;
@@ -509,7 +568,18 @@ heapgettup_pagemode(HeapScanDesc scan,
509568tuple -> t_data = NULL ;
510569return ;
511570}
512- page = scan -> rs_nblocks - 1 ;/* final page */
571+ /*
572+ * Disable reporting to syncscan logic in a backwards scan; it's
573+ * not very likely anyone else is doing the same thing at the same
574+ * time, and much more likely that we'll just bollix things for
575+ * forward scanners.
576+ */
577+ scan -> rs_syncscan = false;
578+ /* start from last page of the scan */
579+ if (scan -> rs_startblock > 0 )
580+ page = scan -> rs_startblock - 1 ;
581+ else
582+ page = scan -> rs_nblocks - 1 ;
513583heapgetpage (scan ,page );
514584}
515585else
@@ -616,11 +686,40 @@ heapgettup_pagemode(HeapScanDesc scan,
616686 * if we get here, it means we've exhausted the items on this page and
617687 * it's time to move to the next.
618688 */
689+ if (backward )
690+ {
691+ finished = (page == scan -> rs_startblock );
692+ if (page == 0 )
693+ page = scan -> rs_nblocks ;
694+ page -- ;
695+ }
696+ else
697+ {
698+ page ++ ;
699+ if (page >=scan -> rs_nblocks )
700+ page = 0 ;
701+ finished = (page == scan -> rs_startblock );
702+
703+ /*
704+ * Report our new scan position for synchronization purposes.
705+ * We don't do that when moving backwards, however. That would
706+ * just mess up any other forward-moving scanners.
707+ *
708+ * Note: we do this before checking for end of scan so that the
709+ * final state of the position hint is back at the start of the
710+ * rel. That's not strictly necessary, but otherwise when you run
711+ * the same query multiple times the starting position would shift
712+ * a little bit backwards on every invocation, which is confusing.
713+ * We don't guarantee any specific ordering in general, though.
714+ */
715+ if (scan -> rs_syncscan )
716+ ss_report_location (scan -> rs_rd ,page );
717+ }
619718
620719/*
621720 * return NULL if we've exhausted all the pages
622721 */
623- if (backward ? ( page == 0 ) : ( page + 1 >= scan -> rs_nblocks ) )
722+ if (finished )
624723{
625724if (BufferIsValid (scan -> rs_cbuf ))
626725ReleaseBuffer (scan -> rs_cbuf );
@@ -631,7 +730,6 @@ heapgettup_pagemode(HeapScanDesc scan,
631730return ;
632731}
633732
634- page = backward ? (page - 1 ) : (page + 1 );
635733heapgetpage (scan ,page );
636734
637735dp = (Page )BufferGetPage (scan -> rs_cbuf );