2020#include "commands/copy.h"
2121#include "commands/defrem.h"
2222#include "commands/explain.h"
23+ #include "commands/vacuum.h"
2324#include "foreign/fdwapi.h"
2425#include "foreign/foreign.h"
2526#include "miscadmin.h"
2829#include "optimizer/pathnode.h"
2930#include "optimizer/planmain.h"
3031#include "optimizer/restrictinfo.h"
32+ #include "utils/memutils.h"
3133#include "utils/rel.h"
3234
3335PG_MODULE_MAGIC ;
@@ -123,6 +125,7 @@ static void fileBeginForeignScan(ForeignScanState *node, int eflags);
123125static TupleTableSlot * fileIterateForeignScan (ForeignScanState * node );
124126static void fileReScanForeignScan (ForeignScanState * node );
125127static void fileEndForeignScan (ForeignScanState * node );
128+ static AcquireSampleRowsFunc fileAnalyzeForeignTable (Relation relation );
126129
127130/*
128131 * Helper functions
@@ -136,6 +139,10 @@ static void estimate_size(PlannerInfo *root, RelOptInfo *baserel,
136139static void estimate_costs (PlannerInfo * root ,RelOptInfo * baserel ,
137140FileFdwPlanState * fdw_private ,
138141Cost * startup_cost ,Cost * total_cost );
142+ static int file_acquire_sample_rows (Relation onerel ,int elevel ,
143+ HeapTuple * rows ,int targrows ,
144+ double * totalrows ,double * totaldeadrows ,
145+ BlockNumber * totalpages );
139146
140147
141148/*
@@ -155,6 +162,7 @@ file_fdw_handler(PG_FUNCTION_ARGS)
155162fdwroutine -> IterateForeignScan = fileIterateForeignScan ;
156163fdwroutine -> ReScanForeignScan = fileReScanForeignScan ;
157164fdwroutine -> EndForeignScan = fileEndForeignScan ;
165+ fdwroutine -> AnalyzeForeignTable = fileAnalyzeForeignTable ;
158166
159167PG_RETURN_POINTER (fdwroutine );
160168}
@@ -613,6 +621,23 @@ fileIterateForeignScan(ForeignScanState *node)
613621return slot ;
614622}
615623
624+ /*
625+ * fileReScanForeignScan
626+ *Rescan table, possibly with new parameters
627+ */
628+ static void
629+ fileReScanForeignScan (ForeignScanState * node )
630+ {
631+ FileFdwExecutionState * festate = (FileFdwExecutionState * )node -> fdw_state ;
632+
633+ EndCopyFrom (festate -> cstate );
634+
635+ festate -> cstate = BeginCopyFrom (node -> ss .ss_currentRelation ,
636+ festate -> filename ,
637+ NIL ,
638+ festate -> options );
639+ }
640+
616641/*
617642 * fileEndForeignScan
618643 *Finish scanning foreign table and dispose objects used for this scan
@@ -628,20 +653,13 @@ fileEndForeignScan(ForeignScanState *node)
628653}
629654
630655/*
631- *fileReScanForeignScan
632- *Rescan table, possibly with new parameters
656+ *fileAnalyzeForeignTable
657+ *Test whether analyzing this foreign table is supported
633658 */
634- static void
635- fileReScanForeignScan ( ForeignScanState * node )
659+ static AcquireSampleRowsFunc
660+ fileAnalyzeForeignTable ( Relation relation )
636661{
637- FileFdwExecutionState * festate = (FileFdwExecutionState * )node -> fdw_state ;
638-
639- EndCopyFrom (festate -> cstate );
640-
641- festate -> cstate = BeginCopyFrom (node -> ss .ss_currentRelation ,
642- festate -> filename ,
643- NIL ,
644- festate -> options );
662+ return file_acquire_sample_rows ;
645663}
646664
647665/*
@@ -657,7 +675,6 @@ estimate_size(PlannerInfo *root, RelOptInfo *baserel,
657675{
658676struct stat stat_buf ;
659677BlockNumber pages ;
660- int tuple_width ;
661678double ntuples ;
662679double nrows ;
663680
@@ -674,26 +691,45 @@ estimate_size(PlannerInfo *root, RelOptInfo *baserel,
674691pages = (stat_buf .st_size + (BLCKSZ - 1 )) /BLCKSZ ;
675692if (pages < 1 )
676693pages = 1 ;
677-
678694fdw_private -> pages = pages ;
679695
680696/*
681- * Estimate the number of tuples in the file. We back into this estimate
682- * using the planner's idea of the relation width; which is bogus if not
683- * all columns are being read, not to mention that the text representation
684- * of a row probably isn't the same size as its internal representation.
685- * FIXME later.
697+ * Estimate the number of tuples in the file.
686698 */
687- tuple_width = MAXALIGN (baserel -> width )+ MAXALIGN (sizeof (HeapTupleHeaderData ));
699+ if (baserel -> pages > 0 )
700+ {
701+ /*
702+ * We have # of pages and # of tuples from pg_class (that is, from a
703+ * previous ANALYZE), so compute a tuples-per-page estimate and scale
704+ * that by the current file size.
705+ */
706+ double density ;
688707
689- ntuples = clamp_row_est ((double )stat_buf .st_size / (double )tuple_width );
708+ density = baserel -> tuples / (double )baserel -> pages ;
709+ ntuples = clamp_row_est (density * (double )pages );
710+ }
711+ else
712+ {
713+ /*
714+ * Otherwise we have to fake it. We back into this estimate using the
715+ * planner's idea of the relation width; which is bogus if not all
716+ * columns are being read, not to mention that the text representation
717+ * of a row probably isn't the same size as its internal
718+ * representation. Possibly we could do something better, but the
719+ * real answer to anyone who complains is "ANALYZE" ...
720+ */
721+ int tuple_width ;
690722
723+ tuple_width = MAXALIGN (baserel -> width )+
724+ MAXALIGN (sizeof (HeapTupleHeaderData ));
725+ ntuples = clamp_row_est ((double )stat_buf .st_size /
726+ (double )tuple_width );
727+ }
691728fdw_private -> ntuples = ntuples ;
692729
693730/*
694731 * Now estimate the number of rows returned by the scan after applying the
695- * baserestrictinfo quals.This is pretty bogus too, since the planner
696- * will have no stats about the relation, but it's better than nothing.
732+ * baserestrictinfo quals.
697733 */
698734nrows = ntuples *
699735clauselist_selectivity (root ,
@@ -736,3 +772,169 @@ estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
736772run_cost += cpu_per_tuple * ntuples ;
737773* total_cost = * startup_cost + run_cost ;
738774}
775+
776+ /*
777+ * file_acquire_sample_rows -- acquire a random sample of rows from the table
778+ *
779+ * Selected rows are returned in the caller-allocated array rows[],
780+ * which must have at least targrows entries.
781+ * The actual number of rows selected is returned as the function result.
782+ * We also count the total number of rows in the file and return it into
783+ * *totalrows, and return the file's physical size in *totalpages.
784+ * Note that *totaldeadrows is always set to 0.
785+ *
786+ * Note that the returned list of rows is not always in order by physical
787+ * position in the file. Therefore, correlation estimates derived later
788+ * may be meaningless, but it's OK because we don't use the estimates
789+ * currently (the planner only pays attention to correlation for indexscans).
790+ */
791+ static int
792+ file_acquire_sample_rows (Relation onerel ,int elevel ,
793+ HeapTuple * rows ,int targrows ,
794+ double * totalrows ,double * totaldeadrows ,
795+ BlockNumber * totalpages )
796+ {
797+ int numrows = 0 ;
798+ double rowstoskip = -1 ;/* -1 means not set yet */
799+ double rstate ;
800+ TupleDesc tupDesc ;
801+ Datum * values ;
802+ bool * nulls ;
803+ bool found ;
804+ char * filename ;
805+ struct stat stat_buf ;
806+ List * options ;
807+ CopyState cstate ;
808+ ErrorContextCallback errcontext ;
809+ MemoryContext oldcontext = CurrentMemoryContext ;
810+ MemoryContext tupcontext ;
811+
812+ Assert (onerel );
813+ Assert (targrows > 0 );
814+
815+ tupDesc = RelationGetDescr (onerel );
816+ values = (Datum * )palloc (tupDesc -> natts * sizeof (Datum ));
817+ nulls = (bool * )palloc (tupDesc -> natts * sizeof (bool ));
818+
819+ /* Fetch options of foreign table */
820+ fileGetOptions (RelationGetRelid (onerel ),& filename ,& options );
821+
822+ /*
823+ * Get size of the file.
824+ */
825+ if (stat (filename ,& stat_buf )< 0 )
826+ ereport (ERROR ,
827+ (errcode_for_file_access (),
828+ errmsg ("could not stat file \"%s\": %m" ,
829+ filename )));
830+
831+ /*
832+ * Convert size to pages for use in I/O cost estimate.
833+ */
834+ * totalpages = (stat_buf .st_size + (BLCKSZ - 1 )) /BLCKSZ ;
835+ if (* totalpages < 1 )
836+ * totalpages = 1 ;
837+
838+ /*
839+ * Create CopyState from FDW options.
840+ */
841+ cstate = BeginCopyFrom (onerel ,filename ,NIL ,options );
842+
843+ /*
844+ * Use per-tuple memory context to prevent leak of memory used to read rows
845+ * from the file with Copy routines.
846+ */
847+ tupcontext = AllocSetContextCreate (CurrentMemoryContext ,
848+ "file_fdw temporary context" ,
849+ ALLOCSET_DEFAULT_MINSIZE ,
850+ ALLOCSET_DEFAULT_INITSIZE ,
851+ ALLOCSET_DEFAULT_MAXSIZE );
852+
853+ /* Prepare for sampling rows */
854+ rstate = anl_init_selection_state (targrows );
855+
856+ /* Set up callback to identify error line number. */
857+ errcontext .callback = CopyFromErrorCallback ;
858+ errcontext .arg = (void * )cstate ;
859+ errcontext .previous = error_context_stack ;
860+ error_context_stack = & errcontext ;
861+
862+ * totalrows = 0 ;
863+ * totaldeadrows = 0 ;
864+ for (;;)
865+ {
866+ /* Check for user-requested abort or sleep */
867+ vacuum_delay_point ();
868+
869+ /* Fetch next row */
870+ MemoryContextReset (tupcontext );
871+ MemoryContextSwitchTo (tupcontext );
872+
873+ found = NextCopyFrom (cstate ,NULL ,values ,nulls ,NULL );
874+
875+ MemoryContextSwitchTo (oldcontext );
876+
877+ if (!found )
878+ break ;
879+
880+ /*
881+ * The first targrows sample rows are simply copied into the
882+ * reservoir. Then we start replacing tuples in the sample until we
883+ * reach the end of the relation. This algorithm is from Jeff Vitter's
884+ * paper (see more info in commands/analyze.c).
885+ */
886+ if (numrows < targrows )
887+ {
888+ rows [numrows ++ ]= heap_form_tuple (tupDesc ,values ,nulls );
889+ }
890+ else
891+ {
892+ /*
893+ * t in Vitter's paper is the number of records already processed.
894+ * If we need to compute a new S value, we must use the
895+ * not-yet-incremented value of totalrows as t.
896+ */
897+ if (rowstoskip < 0 )
898+ rowstoskip = anl_get_next_S (* totalrows ,targrows ,& rstate );
899+
900+ if (rowstoskip <=0 )
901+ {
902+ /*
903+ * Found a suitable tuple, so save it, replacing one
904+ * old tuple at random
905+ */
906+ int k = (int ) (targrows * anl_random_fract ());
907+
908+ Assert (k >=0 && k < targrows );
909+ heap_freetuple (rows [k ]);
910+ rows [k ]= heap_form_tuple (tupDesc ,values ,nulls );
911+ }
912+
913+ rowstoskip -= 1 ;
914+ }
915+
916+ * totalrows += 1 ;
917+ }
918+
919+ /* Remove error callback. */
920+ error_context_stack = errcontext .previous ;
921+
922+ /* Clean up. */
923+ MemoryContextDelete (tupcontext );
924+
925+ EndCopyFrom (cstate );
926+
927+ pfree (values );
928+ pfree (nulls );
929+
930+ /*
931+ * Emit some interesting relation info
932+ */
933+ ereport (elevel ,
934+ (errmsg ("\"%s\": scanned %u pages containing %.0f rows; "
935+ "%d rows in sample" ,
936+ RelationGetRelationName (onerel ),
937+ * totalpages ,* totalrows ,numrows )));
938+
939+ return numrows ;
940+ }