@@ -268,65 +268,20 @@ bool
268268hashgettuple (IndexScanDesc scan ,ScanDirection dir )
269269{
270270HashScanOpaque so = (HashScanOpaque )scan -> opaque ;
271- Relation rel = scan -> indexRelation ;
272- Buffer buf ;
273- Page page ;
274- OffsetNumber offnum ;
275- ItemPointer current ;
276271bool res ;
277272
278273/* Hash indexes are always lossy since we store only the hash code */
279274scan -> xs_recheck = true;
280275
281- /*
282- * We hold pin but not lock on current buffer while outside the hash AM.
283- * Reacquire the read lock here.
284- */
285- if (BufferIsValid (so -> hashso_curbuf ))
286- LockBuffer (so -> hashso_curbuf ,BUFFER_LOCK_SHARE );
287-
288276/*
289277 * If we've already initialized this scan, we can just advance it in the
290278 * appropriate direction. If we haven't done so yet, we call a routine to
291279 * get the first item in the scan.
292280 */
293- current = & (so -> hashso_curpos );
294- if (ItemPointerIsValid (current ))
281+ if (!HashScanPosIsValid (so -> currPos ))
282+ res = _hash_first (scan ,dir );
283+ else
295284{
296- /*
297- * An insertion into the current index page could have happened while
298- * we didn't have read lock on it. Re-find our position by looking
299- * for the TID we previously returned. (Because we hold a pin on the
300- * primary bucket page, no deletions or splits could have occurred;
301- * therefore we can expect that the TID still exists in the current
302- * index page, at an offset >= where we were.)
303- */
304- OffsetNumber maxoffnum ;
305-
306- buf = so -> hashso_curbuf ;
307- Assert (BufferIsValid (buf ));
308- page = BufferGetPage (buf );
309-
310- /*
311- * We don't need test for old snapshot here as the current buffer is
312- * pinned, so vacuum can't clean the page.
313- */
314- maxoffnum = PageGetMaxOffsetNumber (page );
315- for (offnum = ItemPointerGetOffsetNumber (current );
316- offnum <=maxoffnum ;
317- offnum = OffsetNumberNext (offnum ))
318- {
319- IndexTuple itup ;
320-
321- itup = (IndexTuple )PageGetItem (page ,PageGetItemId (page ,offnum ));
322- if (ItemPointerEquals (& (so -> hashso_heappos ),& (itup -> t_tid )))
323- break ;
324- }
325- if (offnum > maxoffnum )
326- elog (ERROR ,"failed to re-find scan position within index \"%s\"" ,
327- RelationGetRelationName (rel ));
328- ItemPointerSetOffsetNumber (current ,offnum );
329-
330285/*
331286 * Check to see if we should kill the previously-fetched tuple.
332287 */
@@ -341,47 +296,18 @@ hashgettuple(IndexScanDesc scan, ScanDirection dir)
341296 * entries.
342297 */
343298if (so -> killedItems == NULL )
344- so -> killedItems = palloc ( MaxIndexTuplesPerPage *
345- sizeof (HashScanPosItem ));
299+ so -> killedItems = ( int * )
300+ palloc ( MaxIndexTuplesPerPage * sizeof (int ));
346301
347302if (so -> numKilled < MaxIndexTuplesPerPage )
348- {
349- so -> killedItems [so -> numKilled ].heapTid = so -> hashso_heappos ;
350- so -> killedItems [so -> numKilled ].indexOffset =
351- ItemPointerGetOffsetNumber (& (so -> hashso_curpos ));
352- so -> numKilled ++ ;
353- }
303+ so -> killedItems [so -> numKilled ++ ]= so -> currPos .itemIndex ;
354304}
355305
356306/*
357307 * Now continue the scan.
358308 */
359309res = _hash_next (scan ,dir );
360310}
361- else
362- res = _hash_first (scan ,dir );
363-
364- /*
365- * Skip killed tuples if asked to.
366- */
367- if (scan -> ignore_killed_tuples )
368- {
369- while (res )
370- {
371- offnum = ItemPointerGetOffsetNumber (current );
372- page = BufferGetPage (so -> hashso_curbuf );
373- if (!ItemIdIsDead (PageGetItemId (page ,offnum )))
374- break ;
375- res = _hash_next (scan ,dir );
376- }
377- }
378-
379- /* Release read lock on current buffer, but keep it pinned */
380- if (BufferIsValid (so -> hashso_curbuf ))
381- LockBuffer (so -> hashso_curbuf ,BUFFER_LOCK_UNLOCK );
382-
383- /* Return current heap TID on success */
384- scan -> xs_ctup .t_self = so -> hashso_heappos ;
385311
386312return res ;
387313}
@@ -396,35 +322,21 @@ hashgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
396322HashScanOpaque so = (HashScanOpaque )scan -> opaque ;
397323bool res ;
398324int64 ntids = 0 ;
325+ HashScanPosItem * currItem ;
399326
400327res = _hash_first (scan ,ForwardScanDirection );
401328
402329while (res )
403330{
404- bool add_tuple ;
331+ currItem = & so -> currPos . items [ so -> currPos . itemIndex ] ;
405332
406333/*
407- * Skip killed tuples if asked to.
334+ * _hash_first and _hash_next handle eliminate dead index entries
335+ * whenever scan->ignored_killed_tuples is true. Therefore, there's
336+ * nothing to do here except add the results to the TIDBitmap.
408337 */
409- if (scan -> ignore_killed_tuples )
410- {
411- Page page ;
412- OffsetNumber offnum ;
413-
414- offnum = ItemPointerGetOffsetNumber (& (so -> hashso_curpos ));
415- page = BufferGetPage (so -> hashso_curbuf );
416- add_tuple = !ItemIdIsDead (PageGetItemId (page ,offnum ));
417- }
418- else
419- add_tuple = true;
420-
421- /* Save tuple ID, and continue scanning */
422- if (add_tuple )
423- {
424- /* Note we mark the tuple ID as requiring recheck */
425- tbm_add_tuples (tbm ,& (so -> hashso_heappos ),1 , true);
426- ntids ++ ;
427- }
338+ tbm_add_tuples (tbm ,& (currItem -> heapTid ),1 , true);
339+ ntids ++ ;
428340
429341res = _hash_next (scan ,ForwardScanDirection );
430342}
@@ -448,12 +360,9 @@ hashbeginscan(Relation rel, int nkeys, int norderbys)
448360scan = RelationGetIndexScan (rel ,nkeys ,norderbys );
449361
450362so = (HashScanOpaque )palloc (sizeof (HashScanOpaqueData ));
451- so -> hashso_curbuf = InvalidBuffer ;
363+ HashScanPosInvalidate ( so -> currPos ) ;
452364so -> hashso_bucket_buf = InvalidBuffer ;
453365so -> hashso_split_bucket_buf = InvalidBuffer ;
454- /* set position invalid (this will cause _hash_first call) */
455- ItemPointerSetInvalid (& (so -> hashso_curpos ));
456- ItemPointerSetInvalid (& (so -> hashso_heappos ));
457366
458367so -> hashso_buc_populated = false;
459368so -> hashso_buc_split = false;
@@ -476,22 +385,17 @@ hashrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
476385HashScanOpaque so = (HashScanOpaque )scan -> opaque ;
477386Relation rel = scan -> indexRelation ;
478387
479- /*
480- * Before leaving current page, deal with any killed items. Also, ensure
481- * that we acquire lock on current page before calling _hash_kill_items.
482- */
483- if (so -> numKilled > 0 )
388+ if (HashScanPosIsValid (so -> currPos ))
484389{
485- LockBuffer ( so -> hashso_curbuf , BUFFER_LOCK_SHARE );
486- _hash_kill_items ( scan );
487- LockBuffer ( so -> hashso_curbuf , BUFFER_LOCK_UNLOCK );
390+ /* Before leaving current page, deal with any killed items */
391+ if ( so -> numKilled > 0 )
392+ _hash_kill_items ( scan );
488393}
489394
490395_hash_dropscanbuf (rel ,so );
491396
492397/* set position invalid (this will cause _hash_first call) */
493- ItemPointerSetInvalid (& (so -> hashso_curpos ));
494- ItemPointerSetInvalid (& (so -> hashso_heappos ));
398+ HashScanPosInvalidate (so -> currPos );
495399
496400/* Update scan key, if a new one is given */
497401if (scankey && scan -> numberOfKeys > 0 )
@@ -514,15 +418,11 @@ hashendscan(IndexScanDesc scan)
514418HashScanOpaque so = (HashScanOpaque )scan -> opaque ;
515419Relation rel = scan -> indexRelation ;
516420
517- /*
518- * Before leaving current page, deal with any killed items. Also, ensure
519- * that we acquire lock on current page before calling _hash_kill_items.
520- */
521- if (so -> numKilled > 0 )
421+ if (HashScanPosIsValid (so -> currPos ))
522422{
523- LockBuffer ( so -> hashso_curbuf , BUFFER_LOCK_SHARE );
524- _hash_kill_items ( scan );
525- LockBuffer ( so -> hashso_curbuf , BUFFER_LOCK_UNLOCK );
423+ /* Before leaving current page, deal with any killed items */
424+ if ( so -> numKilled > 0 )
425+ _hash_kill_items ( scan );
526426}
527427
528428_hash_dropscanbuf (rel ,so );
@@ -755,16 +655,15 @@ hashvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
755655 * primary bucket page. The lock won't necessarily be held continuously,
756656 * though, because we'll release it when visiting overflow pages.
757657 *
758- * It would be very bad if this function cleaned a page while some other
759- * backend was in the midst of scanning it, because hashgettuple assumes
760- * that the next valid TID will be greater than or equal to the current
761- * valid TID. There can't be any concurrent scans in progress when we first
762- * enter this function because of the cleanup lock we hold on the primary
763- * bucket page, but as soon as we release that lock, there might be. We
764- * handle that by conspiring to prevent those scans from passing our cleanup
765- * scan. To do that, we lock the next page in the bucket chain before
766- * releasing the lock on the previous page. (This type of lock chaining is
767- * not ideal, so we might want to look for a better solution at some point.)
658+ * There can't be any concurrent scans in progress when we first enter this
659+ * function because of the cleanup lock we hold on the primary bucket page,
660+ * but as soon as we release that lock, there might be. If those scans got
661+ * ahead of our cleanup scan, they might see a tuple before we kill it and
662+ * wake up only after VACUUM has completed and the TID has been recycled for
663+ * an unrelated tuple. To avoid that calamity, we prevent scans from passing
664+ * our cleanup scan by locking the next page in the bucket chain before
665+ * releasing the lock on the previous page. (This type of lock chaining is not
666+ * ideal, so we might want to look for a better solution at some point.)
768667 *
769668 * We need to retain a pin on the primary bucket to ensure that no concurrent
770669 * split can start.