Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit263d9de

Browse files
committed
Allow statistics to be collected for foreign tables.
ANALYZE now accepts foreign tables and allows the table's FDW to controlhow the sample rows are collected. (But only manual ANALYZEs will touchforeign tables, for the moment, since among other things it's not veryclear how to handle remote permissions checks in an auto-analyze.)contrib/file_fdw is extended to support this.Etsuro Fujita, reviewed by Shigeru Hanada, some further tweaking by me.
1 parent8cb5365 commit263d9de

File tree

13 files changed

+484
-99
lines changed

13 files changed

+484
-99
lines changed

‎contrib/file_fdw/file_fdw.c

Lines changed: 225 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include"commands/copy.h"
2121
#include"commands/defrem.h"
2222
#include"commands/explain.h"
23+
#include"commands/vacuum.h"
2324
#include"foreign/fdwapi.h"
2425
#include"foreign/foreign.h"
2526
#include"miscadmin.h"
@@ -28,6 +29,7 @@
2829
#include"optimizer/pathnode.h"
2930
#include"optimizer/planmain.h"
3031
#include"optimizer/restrictinfo.h"
32+
#include"utils/memutils.h"
3133
#include"utils/rel.h"
3234

3335
PG_MODULE_MAGIC;
@@ -123,6 +125,7 @@ static void fileBeginForeignScan(ForeignScanState *node, int eflags);
123125
staticTupleTableSlot*fileIterateForeignScan(ForeignScanState*node);
124126
staticvoidfileReScanForeignScan(ForeignScanState*node);
125127
staticvoidfileEndForeignScan(ForeignScanState*node);
128+
staticAcquireSampleRowsFuncfileAnalyzeForeignTable(Relationrelation);
126129

127130
/*
128131
* Helper functions
@@ -136,6 +139,10 @@ static void estimate_size(PlannerInfo *root, RelOptInfo *baserel,
136139
staticvoidestimate_costs(PlannerInfo*root,RelOptInfo*baserel,
137140
FileFdwPlanState*fdw_private,
138141
Cost*startup_cost,Cost*total_cost);
142+
staticintfile_acquire_sample_rows(Relationonerel,intelevel,
143+
HeapTuple*rows,inttargrows,
144+
double*totalrows,double*totaldeadrows,
145+
BlockNumber*totalpages);
139146

140147

141148
/*
@@ -155,6 +162,7 @@ file_fdw_handler(PG_FUNCTION_ARGS)
155162
fdwroutine->IterateForeignScan=fileIterateForeignScan;
156163
fdwroutine->ReScanForeignScan=fileReScanForeignScan;
157164
fdwroutine->EndForeignScan=fileEndForeignScan;
165+
fdwroutine->AnalyzeForeignTable=fileAnalyzeForeignTable;
158166

159167
PG_RETURN_POINTER(fdwroutine);
160168
}
@@ -613,6 +621,23 @@ fileIterateForeignScan(ForeignScanState *node)
613621
returnslot;
614622
}
615623

624+
/*
625+
* fileReScanForeignScan
626+
*Rescan table, possibly with new parameters
627+
*/
628+
staticvoid
629+
fileReScanForeignScan(ForeignScanState*node)
630+
{
631+
FileFdwExecutionState*festate= (FileFdwExecutionState*)node->fdw_state;
632+
633+
EndCopyFrom(festate->cstate);
634+
635+
festate->cstate=BeginCopyFrom(node->ss.ss_currentRelation,
636+
festate->filename,
637+
NIL,
638+
festate->options);
639+
}
640+
616641
/*
617642
* fileEndForeignScan
618643
*Finish scanning foreign table and dispose objects used for this scan
@@ -628,20 +653,13 @@ fileEndForeignScan(ForeignScanState *node)
628653
}
629654

630655
/*
631-
*fileReScanForeignScan
632-
*Rescan table, possibly with new parameters
656+
*fileAnalyzeForeignTable
657+
*Test whether analyzing this foreign table is supported
633658
*/
634-
staticvoid
635-
fileReScanForeignScan(ForeignScanState*node)
659+
staticAcquireSampleRowsFunc
660+
fileAnalyzeForeignTable(Relationrelation)
636661
{
637-
FileFdwExecutionState*festate= (FileFdwExecutionState*)node->fdw_state;
638-
639-
EndCopyFrom(festate->cstate);
640-
641-
festate->cstate=BeginCopyFrom(node->ss.ss_currentRelation,
642-
festate->filename,
643-
NIL,
644-
festate->options);
662+
returnfile_acquire_sample_rows;
645663
}
646664

647665
/*
@@ -657,7 +675,6 @@ estimate_size(PlannerInfo *root, RelOptInfo *baserel,
657675
{
658676
structstatstat_buf;
659677
BlockNumberpages;
660-
inttuple_width;
661678
doublentuples;
662679
doublenrows;
663680

@@ -674,26 +691,45 @@ estimate_size(PlannerInfo *root, RelOptInfo *baserel,
674691
pages= (stat_buf.st_size+ (BLCKSZ-1)) /BLCKSZ;
675692
if (pages<1)
676693
pages=1;
677-
678694
fdw_private->pages=pages;
679695

680696
/*
681-
* Estimate the number of tuples in the file. We back into this estimate
682-
* using the planner's idea of the relation width; which is bogus if not
683-
* all columns are being read, not to mention that the text representation
684-
* of a row probably isn't the same size as its internal representation.
685-
* FIXME later.
697+
* Estimate the number of tuples in the file.
686698
*/
687-
tuple_width=MAXALIGN(baserel->width)+MAXALIGN(sizeof(HeapTupleHeaderData));
699+
if (baserel->pages>0)
700+
{
701+
/*
702+
* We have # of pages and # of tuples from pg_class (that is, from a
703+
* previous ANALYZE), so compute a tuples-per-page estimate and scale
704+
* that by the current file size.
705+
*/
706+
doubledensity;
688707

689-
ntuples=clamp_row_est((double)stat_buf.st_size / (double)tuple_width);
708+
density=baserel->tuples / (double)baserel->pages;
709+
ntuples=clamp_row_est(density* (double)pages);
710+
}
711+
else
712+
{
713+
/*
714+
* Otherwise we have to fake it. We back into this estimate using the
715+
* planner's idea of the relation width; which is bogus if not all
716+
* columns are being read, not to mention that the text representation
717+
* of a row probably isn't the same size as its internal
718+
* representation. Possibly we could do something better, but the
719+
* real answer to anyone who complains is "ANALYZE" ...
720+
*/
721+
inttuple_width;
690722

723+
tuple_width=MAXALIGN(baserel->width)+
724+
MAXALIGN(sizeof(HeapTupleHeaderData));
725+
ntuples=clamp_row_est((double)stat_buf.st_size /
726+
(double)tuple_width);
727+
}
691728
fdw_private->ntuples=ntuples;
692729

693730
/*
694731
* Now estimate the number of rows returned by the scan after applying the
695-
* baserestrictinfo quals.This is pretty bogus too, since the planner
696-
* will have no stats about the relation, but it's better than nothing.
732+
* baserestrictinfo quals.
697733
*/
698734
nrows=ntuples*
699735
clauselist_selectivity(root,
@@ -736,3 +772,169 @@ estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
736772
run_cost+=cpu_per_tuple*ntuples;
737773
*total_cost=*startup_cost+run_cost;
738774
}
775+
776+
/*
777+
* file_acquire_sample_rows -- acquire a random sample of rows from the table
778+
*
779+
* Selected rows are returned in the caller-allocated array rows[],
780+
* which must have at least targrows entries.
781+
* The actual number of rows selected is returned as the function result.
782+
* We also count the total number of rows in the file and return it into
783+
* *totalrows, and return the file's physical size in *totalpages.
784+
* Note that *totaldeadrows is always set to 0.
785+
*
786+
* Note that the returned list of rows is not always in order by physical
787+
* position in the file. Therefore, correlation estimates derived later
788+
* may be meaningless, but it's OK because we don't use the estimates
789+
* currently (the planner only pays attention to correlation for indexscans).
790+
*/
791+
staticint
792+
file_acquire_sample_rows(Relationonerel,intelevel,
793+
HeapTuple*rows,inttargrows,
794+
double*totalrows,double*totaldeadrows,
795+
BlockNumber*totalpages)
796+
{
797+
intnumrows=0;
798+
doublerowstoskip=-1;/* -1 means not set yet */
799+
doublerstate;
800+
TupleDesctupDesc;
801+
Datum*values;
802+
bool*nulls;
803+
boolfound;
804+
char*filename;
805+
structstatstat_buf;
806+
List*options;
807+
CopyStatecstate;
808+
ErrorContextCallbackerrcontext;
809+
MemoryContextoldcontext=CurrentMemoryContext;
810+
MemoryContexttupcontext;
811+
812+
Assert(onerel);
813+
Assert(targrows>0);
814+
815+
tupDesc=RelationGetDescr(onerel);
816+
values= (Datum*)palloc(tupDesc->natts*sizeof(Datum));
817+
nulls= (bool*)palloc(tupDesc->natts*sizeof(bool));
818+
819+
/* Fetch options of foreign table */
820+
fileGetOptions(RelationGetRelid(onerel),&filename,&options);
821+
822+
/*
823+
* Get size of the file.
824+
*/
825+
if (stat(filename,&stat_buf)<0)
826+
ereport(ERROR,
827+
(errcode_for_file_access(),
828+
errmsg("could not stat file \"%s\": %m",
829+
filename)));
830+
831+
/*
832+
* Convert size to pages for use in I/O cost estimate.
833+
*/
834+
*totalpages= (stat_buf.st_size+ (BLCKSZ-1)) /BLCKSZ;
835+
if (*totalpages<1)
836+
*totalpages=1;
837+
838+
/*
839+
* Create CopyState from FDW options.
840+
*/
841+
cstate=BeginCopyFrom(onerel,filename,NIL,options);
842+
843+
/*
844+
* Use per-tuple memory context to prevent leak of memory used to read rows
845+
* from the file with Copy routines.
846+
*/
847+
tupcontext=AllocSetContextCreate(CurrentMemoryContext,
848+
"file_fdw temporary context",
849+
ALLOCSET_DEFAULT_MINSIZE,
850+
ALLOCSET_DEFAULT_INITSIZE,
851+
ALLOCSET_DEFAULT_MAXSIZE);
852+
853+
/* Prepare for sampling rows */
854+
rstate=anl_init_selection_state(targrows);
855+
856+
/* Set up callback to identify error line number. */
857+
errcontext.callback=CopyFromErrorCallback;
858+
errcontext.arg= (void*)cstate;
859+
errcontext.previous=error_context_stack;
860+
error_context_stack=&errcontext;
861+
862+
*totalrows=0;
863+
*totaldeadrows=0;
864+
for (;;)
865+
{
866+
/* Check for user-requested abort or sleep */
867+
vacuum_delay_point();
868+
869+
/* Fetch next row */
870+
MemoryContextReset(tupcontext);
871+
MemoryContextSwitchTo(tupcontext);
872+
873+
found=NextCopyFrom(cstate,NULL,values,nulls,NULL);
874+
875+
MemoryContextSwitchTo(oldcontext);
876+
877+
if (!found)
878+
break;
879+
880+
/*
881+
* The first targrows sample rows are simply copied into the
882+
* reservoir. Then we start replacing tuples in the sample until we
883+
* reach the end of the relation. This algorithm is from Jeff Vitter's
884+
* paper (see more info in commands/analyze.c).
885+
*/
886+
if (numrows<targrows)
887+
{
888+
rows[numrows++]=heap_form_tuple(tupDesc,values,nulls);
889+
}
890+
else
891+
{
892+
/*
893+
* t in Vitter's paper is the number of records already processed.
894+
* If we need to compute a new S value, we must use the
895+
* not-yet-incremented value of totalrows as t.
896+
*/
897+
if (rowstoskip<0)
898+
rowstoskip=anl_get_next_S(*totalrows,targrows,&rstate);
899+
900+
if (rowstoskip <=0)
901+
{
902+
/*
903+
* Found a suitable tuple, so save it, replacing one
904+
* old tuple at random
905+
*/
906+
intk= (int) (targrows*anl_random_fract());
907+
908+
Assert(k >=0&&k<targrows);
909+
heap_freetuple(rows[k]);
910+
rows[k]=heap_form_tuple(tupDesc,values,nulls);
911+
}
912+
913+
rowstoskip-=1;
914+
}
915+
916+
*totalrows+=1;
917+
}
918+
919+
/* Remove error callback. */
920+
error_context_stack=errcontext.previous;
921+
922+
/* Clean up. */
923+
MemoryContextDelete(tupcontext);
924+
925+
EndCopyFrom(cstate);
926+
927+
pfree(values);
928+
pfree(nulls);
929+
930+
/*
931+
* Emit some interesting relation info
932+
*/
933+
ereport(elevel,
934+
(errmsg("\"%s\": scanned %u pages containing %.0f rows; "
935+
"%d rows in sample",
936+
RelationGetRelationName(onerel),
937+
*totalpages,*totalrows,numrows)));
938+
939+
returnnumrows;
940+
}

‎doc/src/sgml/fdwhandler.sgml

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010

1111
<para>
1212
All operations on a foreign table are handled through its foreign data
13-
wrapper, which consists of a set of functions that theplanner and
14-
executor call. The foreign data wrapper is responsible for fetching
13+
wrapper, which consists of a set of functions that thecore server
14+
calls. The foreign data wrapper is responsible for fetching
1515
data from the remote data source and returning it to the
1616
<productname>PostgreSQL</productname> executor. This chapter outlines how
1717
to write a new foreign data wrapper.
@@ -47,7 +47,8 @@
4747

4848
<para>
4949
The handler function simply returns a struct of function pointers to
50-
callback functions that will be called by the planner and executor.
50+
callback functions that will be called by the planner, executor, and
51+
various maintenance commands.
5152
Most of the effort in writing an FDW is in implementing these callback
5253
functions.
5354
The handler function must be registered with
@@ -276,6 +277,41 @@ EndForeignScan (ForeignScanState *node);
276277
to remote servers should be cleaned up.
277278
</para>
278279

280+
<para>
281+
<programlisting>
282+
AcquireSampleRowsFunc
283+
AnalyzeForeignTable (Relation relation);
284+
</programlisting>
285+
286+
This function is called when <xref linkend="sql-analyze"> is executed on
287+
a foreign table. If the FDW supports collecting statistics for this
288+
foreign table, it should return a pointer to a function that will collect
289+
sample rows from the table. Otherwise, return <literal>NULL</>. If the
290+
FDW does not support collecting statistics for any tables, the
291+
<function>AnalyzeForeignTable</> pointer can be set to <literal>NULL</>.
292+
</para>
293+
294+
<para>
295+
If provided, the sample collection function must have the signature
296+
<programlisting>
297+
int
298+
AcquireSampleRowsFunc (Relation relation, int elevel,
299+
HeapTuple *rows, int targrows,
300+
double *totalrows,
301+
double *totaldeadrows,
302+
BlockNumber *totalpages);
303+
</programlisting>
304+
305+
A random sample of up to <parameter>targrows</> rows should be collected
306+
from the table and stored into the caller-provided <parameter>rows</>
307+
array. The actual number of rows collected must be returned. In
308+
addition, store estimates of the total numbers of live rows, dead rows,
309+
and pages in the table into the output parameters
310+
<parameter>totalrows</>, <parameter>totaldeadrows</>, and
311+
<parameter>totalpages</>. These numbers will be recorded in the table's
312+
<structname>pg_class</> entry for future use.
313+
</para>
314+
279315
<para>
280316
The <structname>FdwRoutine</> struct type is declared in
281317
<filename>src/include/foreign/fdwapi.h</>, which see for additional

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp