104
104
#include "access/brin.h"
105
105
#include "access/gin.h"
106
106
#include "access/htup_details.h"
107
+ #include "access/relscan.h"
107
108
#include "access/sysattr.h"
108
- #include "catalog/index .h"
109
+ #include "access/visibilitymap .h"
109
110
#include "catalog/pg_am.h"
110
111
#include "catalog/pg_collation.h"
111
112
#include "catalog/pg_operator.h"
112
113
#include "catalog/pg_opfamily.h"
113
114
#include "catalog/pg_statistic.h"
114
115
#include "catalog/pg_statistic_ext.h"
115
116
#include "catalog/pg_type.h"
116
- #include "executor/executor.h"
117
117
#include "mb/pg_wchar.h"
118
118
#include "miscadmin.h"
119
119
#include "nodes/makefuncs.h"
130
130
#include "parser/parse_coerce.h"
131
131
#include "parser/parsetree.h"
132
132
#include "statistics/statistics.h"
133
+ #include "storage/bufmgr.h"
133
134
#include "utils/acl.h"
134
135
#include "utils/builtins.h"
135
136
#include "utils/bytea.h"
138
139
#include "utils/fmgroids.h"
139
140
#include "utils/index_selfuncs.h"
140
141
#include "utils/lsyscache.h"
142
+ #include "utils/memutils.h"
141
143
#include "utils/nabstime.h"
142
144
#include "utils/pg_locale.h"
143
145
#include "utils/rel.h"
@@ -204,6 +206,14 @@ static bool get_actual_variable_range(PlannerInfo *root,
204
206
VariableStatData * vardata ,
205
207
Oid sortop ,
206
208
Datum * min ,Datum * max );
209
+ static bool get_actual_variable_endpoint (Relation heapRel ,
210
+ Relation indexRel ,
211
+ ScanDirection indexscandir ,
212
+ ScanKey scankeys ,
213
+ int16 typLen ,
214
+ bool typByVal ,
215
+ MemoryContext outercontext ,
216
+ Datum * endpointDatum );
207
217
static RelOptInfo * find_join_input_rel (PlannerInfo * root ,Relids relids );
208
218
static Selectivity prefix_selectivity (PlannerInfo * root ,
209
219
VariableStatData * vardata ,
@@ -5539,31 +5549,22 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
5539
5549
}
5540
5550
5541
5551
/*
5542
- * Found a suitable index to extract data from.We'll need an EState
5543
- *and a bunch of other infrastructure .
5552
+ * Found a suitable index to extract data from.Set up some data that
5553
+ *can be used by both invocations of get_actual_variable_endpoint .
5544
5554
*/
5545
5555
{
5546
- EState * estate ;
5547
- ExprContext * econtext ;
5548
5556
MemoryContext tmpcontext ;
5549
5557
MemoryContext oldcontext ;
5550
5558
Relation heapRel ;
5551
5559
Relation indexRel ;
5552
- IndexInfo * indexInfo ;
5553
- TupleTableSlot * slot ;
5554
5560
int16 typLen ;
5555
5561
bool typByVal ;
5556
5562
ScanKeyData scankeys [1 ];
5557
- IndexScanDesc index_scan ;
5558
- HeapTuple tup ;
5559
- Datum values [INDEX_MAX_KEYS ];
5560
- bool isnull [INDEX_MAX_KEYS ];
5561
- SnapshotData SnapshotNonVacuumable ;
5562
-
5563
- estate = CreateExecutorState ();
5564
- econtext = GetPerTupleExprContext (estate );
5565
- /* Make sure any cruft is generated in the econtext's memory */
5566
- tmpcontext = econtext -> ecxt_per_tuple_memory ;
5563
+
5564
+ /* Make sure any cruft gets recycled when we're done */
5565
+ tmpcontext = AllocSetContextCreate (CurrentMemoryContext ,
5566
+ "get_actual_variable_range workspace" ,
5567
+ ALLOCSET_DEFAULT_SIZES );
5567
5568
oldcontext = MemoryContextSwitchTo (tmpcontext );
5568
5569
5569
5570
/*
@@ -5574,14 +5575,8 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
5574
5575
heapRel = heap_open (rte -> relid ,NoLock );
5575
5576
indexRel = index_open (index -> indexoid ,AccessShareLock );
5576
5577
5577
- /* extract index key information from the index's pg_index info */
5578
- indexInfo = BuildIndexInfo (indexRel );
5579
-
5580
- /* some other stuff */
5581
- slot = MakeSingleTupleTableSlot (RelationGetDescr (heapRel ));
5582
- econtext -> ecxt_scantuple = slot ;
5578
+ /* build some stuff needed for indexscan execution */
5583
5579
get_typlenbyval (vardata -> atttype ,& typLen ,& typByVal );
5584
- InitNonVacuumableSnapshot (SnapshotNonVacuumable ,RecentGlobalXmin );
5585
5580
5586
5581
/* set up an IS NOT NULL scan key so that we ignore nulls */
5587
5582
ScanKeyEntryInitialize (& scankeys [0 ],
@@ -5593,108 +5588,44 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
5593
5588
InvalidOid ,/* no reg proc for this */
5594
5589
(Datum )0 );/* constant */
5595
5590
5596
- have_data = true;
5597
-
5598
5591
/* If min is requested ... */
5599
5592
if (min )
5600
5593
{
5601
- /*
5602
- * In principle, we should scan the index with our current
5603
- * active snapshot, which is the best approximation we've got
5604
- * to what the query will see when executed. But that won't
5605
- * be exact if a new snap is taken before running the query,
5606
- * and it can be very expensive if a lot of recently-dead or
5607
- * uncommitted rows exist at the beginning or end of the index
5608
- * (because we'll laboriously fetch each one and reject it).
5609
- * Instead, we use SnapshotNonVacuumable. That will accept
5610
- * recently-dead and uncommitted rows as well as normal
5611
- * visible rows. On the other hand, it will reject known-dead
5612
- * rows, and thus not give a bogus answer when the extreme
5613
- * value has been deleted (unless the deletion was quite
5614
- * recent); that case motivates not using SnapshotAny here.
5615
- *
5616
- * A crucial point here is that SnapshotNonVacuumable, with
5617
- * RecentGlobalXmin as horizon, yields the inverse of the
5618
- * condition that the indexscan will use to decide that index
5619
- * entries are killable (see heap_hot_search_buffer()).
5620
- * Therefore, if the snapshot rejects a tuple and we have to
5621
- * continue scanning past it, we know that the indexscan will
5622
- * mark that index entry killed. That means that the next
5623
- * get_actual_variable_range() call will not have to visit
5624
- * that heap entry. In this way we avoid repetitive work when
5625
- * this function is used a lot during planning.
5626
- */
5627
- index_scan = index_beginscan (heapRel ,indexRel ,
5628
- & SnapshotNonVacuumable ,
5629
- 1 ,0 );
5630
- index_rescan (index_scan ,scankeys ,1 ,NULL ,0 );
5631
-
5632
- /* Fetch first tuple in sortop's direction */
5633
- if ((tup = index_getnext (index_scan ,
5634
- indexscandir ))!= NULL )
5635
- {
5636
- /* Extract the index column values from the heap tuple */
5637
- ExecStoreTuple (tup ,slot ,InvalidBuffer , false);
5638
- FormIndexDatum (indexInfo ,slot ,estate ,
5639
- values ,isnull );
5640
-
5641
- /* Shouldn't have got a null, but be careful */
5642
- if (isnull [0 ])
5643
- elog (ERROR ,"found unexpected null value in index \"%s\"" ,
5644
- RelationGetRelationName (indexRel ));
5645
-
5646
- /* Copy the index column value out to caller's context */
5647
- MemoryContextSwitchTo (oldcontext );
5648
- * min = datumCopy (values [0 ],typByVal ,typLen );
5649
- MemoryContextSwitchTo (tmpcontext );
5650
- }
5651
- else
5652
- have_data = false;
5653
-
5654
- index_endscan (index_scan );
5594
+ have_data = get_actual_variable_endpoint (heapRel ,
5595
+ indexRel ,
5596
+ indexscandir ,
5597
+ scankeys ,
5598
+ typLen ,
5599
+ typByVal ,
5600
+ oldcontext ,
5601
+ min );
5602
+ }
5603
+ else
5604
+ {
5605
+ /* If min not requested, assume index is nonempty */
5606
+ have_data = true;
5655
5607
}
5656
5608
5657
5609
/* If max is requested, and we didn't find the index is empty */
5658
5610
if (max && have_data )
5659
5611
{
5660
- index_scan = index_beginscan (heapRel ,indexRel ,
5661
- & SnapshotNonVacuumable ,
5662
- 1 ,0 );
5663
- index_rescan (index_scan ,scankeys ,1 ,NULL ,0 );
5664
-
5665
- /* Fetch first tuple in reverse direction */
5666
- if ((tup = index_getnext (index_scan ,
5667
- - indexscandir ))!= NULL )
5668
- {
5669
- /* Extract the index column values from the heap tuple */
5670
- ExecStoreTuple (tup ,slot ,InvalidBuffer , false);
5671
- FormIndexDatum (indexInfo ,slot ,estate ,
5672
- values ,isnull );
5673
-
5674
- /* Shouldn't have got a null, but be careful */
5675
- if (isnull [0 ])
5676
- elog (ERROR ,"found unexpected null value in index \"%s\"" ,
5677
- RelationGetRelationName (indexRel ));
5678
-
5679
- /* Copy the index column value out to caller's context */
5680
- MemoryContextSwitchTo (oldcontext );
5681
- * max = datumCopy (values [0 ],typByVal ,typLen );
5682
- MemoryContextSwitchTo (tmpcontext );
5683
- }
5684
- else
5685
- have_data = false;
5686
-
5687
- index_endscan (index_scan );
5612
+ /* scan in the opposite direction; all else is the same */
5613
+ have_data = get_actual_variable_endpoint (heapRel ,
5614
+ indexRel ,
5615
+ - indexscandir ,
5616
+ scankeys ,
5617
+ typLen ,
5618
+ typByVal ,
5619
+ oldcontext ,
5620
+ max );
5688
5621
}
5689
5622
5690
5623
/* Clean everything up */
5691
- ExecDropSingleTupleTableSlot (slot );
5692
-
5693
5624
index_close (indexRel ,AccessShareLock );
5694
5625
heap_close (heapRel ,NoLock );
5695
5626
5696
5627
MemoryContextSwitchTo (oldcontext );
5697
- FreeExecutorState ( estate );
5628
+ MemoryContextDelete ( tmpcontext );
5698
5629
5699
5630
/* And we're done */
5700
5631
break ;
@@ -5704,6 +5635,133 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
5704
5635
return have_data ;
5705
5636
}
5706
5637
5638
+ /*
5639
+ * Get one endpoint datum (min or max depending on indexscandir) from the
5640
+ * specified index. Return true if successful, false if index is empty.
5641
+ * On success, endpoint value is stored to *endpointDatum (and copied into
5642
+ * outercontext).
5643
+ *
5644
+ * scankeys is a 1-element scankey array set up to reject nulls.
5645
+ * typLen/typByVal describe the datatype of the index's first column.
5646
+ * (We could compute these values locally, but that would mean computing them
5647
+ * twice when get_actual_variable_range needs both the min and the max.)
5648
+ */
5649
+ static bool
5650
+ get_actual_variable_endpoint (Relation heapRel ,
5651
+ Relation indexRel ,
5652
+ ScanDirection indexscandir ,
5653
+ ScanKey scankeys ,
5654
+ int16 typLen ,
5655
+ bool typByVal ,
5656
+ MemoryContext outercontext ,
5657
+ Datum * endpointDatum )
5658
+ {
5659
+ bool have_data = false;
5660
+ SnapshotData SnapshotNonVacuumable ;
5661
+ IndexScanDesc index_scan ;
5662
+ Buffer vmbuffer = InvalidBuffer ;
5663
+ ItemPointer tid ;
5664
+ Datum values [INDEX_MAX_KEYS ];
5665
+ bool isnull [INDEX_MAX_KEYS ];
5666
+ MemoryContext oldcontext ;
5667
+
5668
+ /*
5669
+ * We use the index-only-scan machinery for this. With mostly-static
5670
+ * tables that's a win because it avoids a heap visit. It's also a win
5671
+ * for dynamic data, but the reason is less obvious; read on for details.
5672
+ *
5673
+ * In principle, we should scan the index with our current active
5674
+ * snapshot, which is the best approximation we've got to what the query
5675
+ * will see when executed. But that won't be exact if a new snap is taken
5676
+ * before running the query, and it can be very expensive if a lot of
5677
+ * recently-dead or uncommitted rows exist at the beginning or end of the
5678
+ * index (because we'll laboriously fetch each one and reject it).
5679
+ * Instead, we use SnapshotNonVacuumable. That will accept recently-dead
5680
+ * and uncommitted rows as well as normal visible rows. On the other
5681
+ * hand, it will reject known-dead rows, and thus not give a bogus answer
5682
+ * when the extreme value has been deleted (unless the deletion was quite
5683
+ * recent); that case motivates not using SnapshotAny here.
5684
+ *
5685
+ * A crucial point here is that SnapshotNonVacuumable, with
5686
+ * RecentGlobalXmin as horizon, yields the inverse of the condition that
5687
+ * the indexscan will use to decide that index entries are killable (see
5688
+ * heap_hot_search_buffer()). Therefore, if the snapshot rejects a tuple
5689
+ * (or more precisely, all tuples of a HOT chain) and we have to continue
5690
+ * scanning past it, we know that the indexscan will mark that index entry
5691
+ * killed. That means that the next get_actual_variable_endpoint() call
5692
+ * will not have to re-consider that index entry. In this way we avoid
5693
+ * repetitive work when this function is used a lot during planning.
5694
+ *
5695
+ * But using SnapshotNonVacuumable creates a hazard of its own. In a
5696
+ * recently-created index, some index entries may point at "broken" HOT
5697
+ * chains in which not all the tuple versions contain data matching the
5698
+ * index entry. The live tuple version(s) certainly do match the index,
5699
+ * but SnapshotNonVacuumable can accept recently-dead tuple versions that
5700
+ * don't match. Hence, if we took data from the selected heap tuple, we
5701
+ * might get a bogus answer that's not close to the index extremal value,
5702
+ * or could even be NULL. We avoid this hazard because we take the data
5703
+ * from the index entry not the heap.
5704
+ */
5705
+ InitNonVacuumableSnapshot (SnapshotNonVacuumable ,RecentGlobalXmin );
5706
+
5707
+ index_scan = index_beginscan (heapRel ,indexRel ,
5708
+ & SnapshotNonVacuumable ,
5709
+ 1 ,0 );
5710
+ /* Set it up for index-only scan */
5711
+ index_scan -> xs_want_itup = true;
5712
+ index_rescan (index_scan ,scankeys ,1 ,NULL ,0 );
5713
+
5714
+ /* Fetch first/next tuple in specified direction */
5715
+ while ((tid = index_getnext_tid (index_scan ,indexscandir ))!= NULL )
5716
+ {
5717
+ if (!VM_ALL_VISIBLE (heapRel ,
5718
+ ItemPointerGetBlockNumber (tid ),
5719
+ & vmbuffer ))
5720
+ {
5721
+ /* Rats, we have to visit the heap to check visibility */
5722
+ if (index_fetch_heap (index_scan )== NULL )
5723
+ continue ;/* no visible tuple, try next index entry */
5724
+
5725
+ /*
5726
+ * We don't care whether there's more than one visible tuple in
5727
+ * the HOT chain; if any are visible, that's good enough.
5728
+ */
5729
+ }
5730
+
5731
+ /*
5732
+ * We expect that btree will return data in IndexTuple not HeapTuple
5733
+ * format. It's not lossy either.
5734
+ */
5735
+ if (!index_scan -> xs_itup )
5736
+ elog (ERROR ,"no data returned for index-only scan" );
5737
+ if (index_scan -> xs_recheck )
5738
+ elog (ERROR ,"unexpected recheck indication from btree" );
5739
+
5740
+ /* OK to deconstruct the index tuple */
5741
+ index_deform_tuple (index_scan -> xs_itup ,
5742
+ index_scan -> xs_itupdesc ,
5743
+ values ,isnull );
5744
+
5745
+ /* Shouldn't have got a null, but be careful */
5746
+ if (isnull [0 ])
5747
+ elog (ERROR ,"found unexpected null value in index \"%s\"" ,
5748
+ RelationGetRelationName (indexRel ));
5749
+
5750
+ /* Copy the index column value out to caller's context */
5751
+ oldcontext = MemoryContextSwitchTo (outercontext );
5752
+ * endpointDatum = datumCopy (values [0 ],typByVal ,typLen );
5753
+ MemoryContextSwitchTo (oldcontext );
5754
+ have_data = true;
5755
+ break ;
5756
+ }
5757
+
5758
+ if (vmbuffer != InvalidBuffer )
5759
+ ReleaseBuffer (vmbuffer );
5760
+ index_endscan (index_scan );
5761
+
5762
+ return have_data ;
5763
+ }
5764
+
5707
5765
/*
5708
5766
* find_join_input_rel
5709
5767
*Look up the input relation for a join.