3636#include "commands/trigger.h"
3737#include "commands/vacuum.h"
3838#include "miscadmin.h"
39+ #include "optimizer/planner.h"
3940#include "storage/bufmgr.h"
4041#include "storage/procarray.h"
4142#include "storage/smgr.h"
4950#include "utils/snapmgr.h"
5051#include "utils/syscache.h"
5152#include "utils/tqual.h"
53+ #include "utils/tuplesort.h"
5254
5355
5456/*
@@ -69,7 +71,10 @@ static void copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
6971int freeze_min_age ,int freeze_table_age ,
7072bool * pSwapToastByContent ,TransactionId * pFreezeXid );
7173static List * get_tables_to_cluster (MemoryContext cluster_context );
72-
74+ static void reform_and_rewrite_tuple (HeapTuple tuple ,
75+ TupleDesc oldTupDesc ,TupleDesc newTupDesc ,
76+ Datum * values ,bool * isnull ,
77+ bool newRelHasOids ,RewriteState rwstate );
7378
7479
7580/*---------------------------------------------------------------------------
@@ -759,6 +764,8 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
759764TransactionId OldestXmin ;
760765TransactionId FreezeXid ;
761766RewriteState rwstate ;
767+ bool use_sort ;
768+ Tuplesortstate * tuplesort ;
762769
763770/*
764771 * Open the relations we need.
@@ -845,12 +852,30 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
845852rwstate = begin_heap_rewrite (NewHeap ,OldestXmin ,FreezeXid ,use_wal );
846853
847854/*
848- * Scan through the OldHeap, either in OldIndex order or sequentially, and
849- * copy each tuple into the NewHeap. To ensure we see recently-dead
850- * tuples that still need to be copied, we scan with SnapshotAny and use
855+ * Decide whether to use an indexscan or seqscan-and-optional-sort to
856+ * scan the OldHeap. We know how to use a sort to duplicate the ordering
857+ * of a btree index, and will use seqscan-and-sort for that case if the
858+ * planner tells us it's cheaper. Otherwise, always indexscan if an
859+ * index is provided, else plain seqscan.
860+ */
861+ if (OldIndex != NULL && OldIndex -> rd_rel -> relam == BTREE_AM_OID )
862+ use_sort = plan_cluster_use_sort (OIDOldHeap ,OIDOldIndex );
863+ else
864+ use_sort = false;
865+
866+ /* Set up sorting if wanted */
867+ if (use_sort )
868+ tuplesort = tuplesort_begin_cluster (oldTupDesc ,OldIndex ,
869+ maintenance_work_mem , false);
870+ else
871+ tuplesort = NULL ;
872+
873+ /*
874+ * Prepare to scan the OldHeap. To ensure we see recently-dead tuples
875+ * that still need to be copied, we scan with SnapshotAny and use
851876 * HeapTupleSatisfiesVacuum for the visibility test.
852877 */
853- if (OldIndex != NULL )
878+ if (OldIndex != NULL && ! use_sort )
854879{
855880heapScan = NULL ;
856881indexScan = index_beginscan (OldHeap ,OldIndex ,
@@ -862,17 +887,21 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
862887indexScan = NULL ;
863888}
864889
890+ /*
891+ * Scan through the OldHeap, either in OldIndex order or sequentially;
892+ * copy each tuple into the NewHeap, or transiently to the tuplesort
893+ * module. Note that we don't bother sorting dead tuples (they won't
894+ * get to the new table anyway).
895+ */
865896for (;;)
866897{
867898HeapTuple tuple ;
868- HeapTuple copiedTuple ;
869899Buffer buf ;
870900bool isdead ;
871- int i ;
872901
873902CHECK_FOR_INTERRUPTS ();
874903
875- if (OldIndex != NULL )
904+ if (indexScan != NULL )
876905{
877906tuple = index_getnext (indexScan ,ForwardScanDirection );
878907if (tuple == NULL )
@@ -951,45 +980,50 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
951980continue ;
952981}
953982
954- /*
955- * We cannot simply copy the tuple as-is, for several reasons:
956- *
957- * 1. We'd like to squeeze out the values of any dropped columns, both
958- * to save space and to ensure we have no corner-case failures. (It's
959- * possible for example that the new table hasn't got a TOAST table
960- * and so is unable to store any large values of dropped cols.)
961- *
962- * 2. The tuple might not even be legal for the new table; this is
963- * currently only known to happen as an after-effect of ALTER TABLE
964- * SET WITHOUT OIDS.
965- *
966- * So, we must reconstruct the tuple from component Datums.
967- */
968- heap_deform_tuple (tuple ,oldTupDesc ,values ,isnull );
983+ if (tuplesort != NULL )
984+ tuplesort_putheaptuple (tuplesort ,tuple );
985+ else
986+ reform_and_rewrite_tuple (tuple ,
987+ oldTupDesc ,newTupDesc ,
988+ values ,isnull ,
989+ NewHeap -> rd_rel -> relhasoids ,rwstate );
990+ }
969991
970- /* Be sure to null out any dropped columns */
971- for (i = 0 ;i < natts ;i ++ )
992+ if (indexScan != NULL )
993+ index_endscan (indexScan );
994+ if (heapScan != NULL )
995+ heap_endscan (heapScan );
996+
997+ /*
998+ * In scan-and-sort mode, complete the sort, then read out all live
999+ * tuples from the tuplestore and write them to the new relation.
1000+ */
1001+ if (tuplesort != NULL )
1002+ {
1003+ tuplesort_performsort (tuplesort );
1004+
1005+ for (;;)
9721006{
973- if (newTupDesc -> attrs [i ]-> attisdropped )
974- isnull [i ]= true;
975- }
1007+ HeapTuple tuple ;
1008+ bool shouldfree ;
9761009
977- copiedTuple = heap_form_tuple ( newTupDesc , values , isnull );
1010+ CHECK_FOR_INTERRUPTS ( );
9781011
979- /* Preserve OID, if any */
980- if (NewHeap -> rd_rel -> relhasoids )
981- HeapTupleSetOid ( copiedTuple , HeapTupleGetOid ( tuple )) ;
1012+ tuple = tuplesort_getheaptuple ( tuplesort , true, & shouldfree );
1013+ if (tuple == NULL )
1014+ break ;
9821015
983- /* The heap rewrite module does the rest */
984- rewrite_heap_tuple (rwstate ,tuple ,copiedTuple );
1016+ reform_and_rewrite_tuple (tuple ,
1017+ oldTupDesc ,newTupDesc ,
1018+ values ,isnull ,
1019+ NewHeap -> rd_rel -> relhasoids ,rwstate );
9851020
986- heap_freetuple (copiedTuple );
987- }
1021+ if (shouldfree )
1022+ heap_freetuple (tuple );
1023+ }
9881024
989- if (OldIndex != NULL )
990- index_endscan (indexScan );
991- else
992- heap_endscan (heapScan );
1025+ tuplesort_end (tuplesort );
1026+ }
9931027
9941028/* Write out any remaining tuples, and fsync if needed */
9951029end_heap_rewrite (rwstate );
@@ -1488,3 +1522,50 @@ get_tables_to_cluster(MemoryContext cluster_context)
14881522
14891523return rvs ;
14901524}
1525+
1526+
1527+ /*
1528+ * Reconstruct and rewrite the given tuple
1529+ *
1530+ * We cannot simply copy the tuple as-is, for several reasons:
1531+ *
1532+ * 1. We'd like to squeeze out the values of any dropped columns, both
1533+ * to save space and to ensure we have no corner-case failures. (It's
1534+ * possible for example that the new table hasn't got a TOAST table
1535+ * and so is unable to store any large values of dropped cols.)
1536+ *
1537+ * 2. The tuple might not even be legal for the new table; this is
1538+ * currently only known to happen as an after-effect of ALTER TABLE
1539+ * SET WITHOUT OIDS.
1540+ *
1541+ * So, we must reconstruct the tuple from component Datums.
1542+ */
1543+ static void
1544+ reform_and_rewrite_tuple (HeapTuple tuple ,
1545+ TupleDesc oldTupDesc ,TupleDesc newTupDesc ,
1546+ Datum * values ,bool * isnull ,
1547+ bool newRelHasOids ,RewriteState rwstate )
1548+ {
1549+ HeapTuple copiedTuple ;
1550+ int i ;
1551+
1552+ heap_deform_tuple (tuple ,oldTupDesc ,values ,isnull );
1553+
1554+ /* Be sure to null out any dropped columns */
1555+ for (i = 0 ;i < newTupDesc -> natts ;i ++ )
1556+ {
1557+ if (newTupDesc -> attrs [i ]-> attisdropped )
1558+ isnull [i ]= true;
1559+ }
1560+
1561+ copiedTuple = heap_form_tuple (newTupDesc ,values ,isnull );
1562+
1563+ /* Preserve OID, if any */
1564+ if (newRelHasOids )
1565+ HeapTupleSetOid (copiedTuple ,HeapTupleGetOid (tuple ));
1566+
1567+ /* The heap rewrite module does the rest */
1568+ rewrite_heap_tuple (rwstate ,tuple ,copiedTuple );
1569+
1570+ heap_freetuple (copiedTuple );
1571+ }