Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit8ad51b5

Browse files
committed
Sample postgres_fdw tables remotely during ANALYZE
When collecting ANALYZE sample on foreign tables, postgres_fdw fetchedall rows and performed the sampling locally. For large tables this meanstransferring and immediately discarding large amounts of data.This commit allows the sampling to be performed on the remote server,transferring only the much smaller sample. The sampling is performedusing the built-in TABLESAMPLE methods (system, bernoulli) or random()function, depending on the remote server version.Remote sampling can be enabled by analyze_sampling on the foreign serverand/or foreign table, with supported values 'off', 'auto', 'system','bernoulli' and 'random'. The default value is 'auto' which uses either'bernoulli' (TABLESAMPLE method) or 'random' (for remote servers withoutTABLESAMPLE support).
1 parent02699bc commit8ad51b5

File tree

7 files changed

+397
-9
lines changed

7 files changed

+397
-9
lines changed

‎contrib/postgres_fdw/deparse.c

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2367,14 +2367,57 @@ deparseAnalyzeSizeSql(StringInfo buf, Relation rel)
23672367
appendStringInfo(buf,"::pg_catalog.regclass) / %d",BLCKSZ);
23682368
}
23692369

2370+
/*
2371+
* Construct SELECT statement to acquire the number of rows of a relation.
2372+
*
2373+
* Note: we just return the remote server's reltuples value, which might
2374+
* be off a good deal, but it doesn't seem worth working harder. See
2375+
* comments in postgresAcquireSampleRowsFunc.
2376+
*/
2377+
void
2378+
deparseAnalyzeTuplesSql(StringInfobuf,Relationrel)
2379+
{
2380+
StringInfoDatarelname;
2381+
2382+
/* We'll need the remote relation name as a literal. */
2383+
initStringInfo(&relname);
2384+
deparseRelation(&relname,rel);
2385+
2386+
appendStringInfoString(buf,"SELECT reltuples FROM pg_catalog.pg_class WHERE oid = ");
2387+
deparseStringLiteral(buf,relname.data);
2388+
appendStringInfoString(buf,"::pg_catalog.regclass");
2389+
}
2390+
23702391
/*
23712392
* Construct SELECT statement to acquire sample rows of given relation.
23722393
*
23732394
* SELECT command is appended to buf, and list of columns retrieved
23742395
* is returned to *retrieved_attrs.
2396+
*
2397+
* We only support sampling methods we can decide based on server version.
2398+
* Allowing custom TSM modules (like tsm_system_rows) might be useful, but it
2399+
* would require detecting which extensions are installed, to allow automatic
2400+
* fall-back. Moreover, the methods may use different parameters like number
2401+
* of rows (and not sampling rate). So we leave this for future improvements.
2402+
*
2403+
* Using random() to sample rows on the remote server has the advantage that
2404+
* this works on all PostgreSQL versions (unlike TABLESAMPLE), and that it
2405+
* does the sampling on the remote side (without transferring everything and
2406+
* then discarding most rows).
2407+
*
2408+
* The disadvantage is that we still have to read all rows and evaluate the
2409+
* random(), while TABLESAMPLE (at least with the "system" method) may skip.
2410+
* It's not that different from the "bernoulli" method, though.
2411+
*
2412+
* We could also do "ORDER BY random() LIMIT x", which would always pick
2413+
* the expected number of rows, but it requires sorting so it may be much
2414+
* more expensive (particularly on large tables, which is what what the
2415+
* remote sampling is meant to improve).
23752416
*/
23762417
void
2377-
deparseAnalyzeSql(StringInfobuf,Relationrel,List**retrieved_attrs)
2418+
deparseAnalyzeSql(StringInfobuf,Relationrel,
2419+
PgFdwSamplingMethodsample_method,doublesample_frac,
2420+
List**retrieved_attrs)
23782421
{
23792422
Oidrelid=RelationGetRelid(rel);
23802423
TupleDesctupdesc=RelationGetDescr(rel);
@@ -2422,10 +2465,35 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
24222465
appendStringInfoString(buf,"NULL");
24232466

24242467
/*
2425-
* Construct FROM clause
2468+
* Construct FROM clause, and perhaps WHERE clause too, depending on the
2469+
* selected sampling method.
24262470
*/
24272471
appendStringInfoString(buf," FROM ");
24282472
deparseRelation(buf,rel);
2473+
2474+
switch (sample_method)
2475+
{
2476+
caseANALYZE_SAMPLE_OFF:
2477+
/* nothing to do here */
2478+
break;
2479+
2480+
caseANALYZE_SAMPLE_RANDOM:
2481+
appendStringInfo(buf," WHERE pg_catalog.random() < %f",sample_frac);
2482+
break;
2483+
2484+
caseANALYZE_SAMPLE_SYSTEM:
2485+
appendStringInfo(buf," TABLESAMPLE SYSTEM(%f)", (100.0*sample_frac));
2486+
break;
2487+
2488+
caseANALYZE_SAMPLE_BERNOULLI:
2489+
appendStringInfo(buf," TABLESAMPLE BERNOULLI(%f)", (100.0*sample_frac));
2490+
break;
2491+
2492+
caseANALYZE_SAMPLE_AUTO:
2493+
/* should have been resolved into actual method */
2494+
elog(ERROR,"unexpected sampling method");
2495+
break;
2496+
}
24292497
}
24302498

24312499
/*

‎contrib/postgres_fdw/expected/postgres_fdw.out

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11779,3 +11779,28 @@ SELECT * FROM prem2;
1177911779

1178011780
ALTER SERVER loopback OPTIONS (DROP parallel_commit);
1178111781
ALTER SERVER loopback2 OPTIONS (DROP parallel_commit);
11782+
-- ===================================================================
11783+
-- test for ANALYZE sampling
11784+
-- ===================================================================
11785+
CREATE TABLE analyze_table (id int, a text, b bigint);
11786+
CREATE FOREIGN TABLE analyze_ftable (id int, a text, b bigint)
11787+
SERVER loopback OPTIONS (table_name 'analyze_rtable1');
11788+
INSERT INTO analyze_table (SELECT x FROM generate_series(1,1000) x);
11789+
ANALYZE analyze_table;
11790+
SET default_statistics_target = 10;
11791+
ANALYZE analyze_table;
11792+
ALTER SERVER loopback OPTIONS (analyze_sampling 'invalid');
11793+
ERROR: invalid value for string option "analyze_sampling": invalid
11794+
ALTER SERVER loopback OPTIONS (analyze_sampling 'auto');
11795+
ANALYZE analyze_table;
11796+
ALTER SERVER loopback OPTIONS (SET analyze_sampling 'system');
11797+
ANALYZE analyze_table;
11798+
ALTER SERVER loopback OPTIONS (SET analyze_sampling 'bernoulli');
11799+
ANALYZE analyze_table;
11800+
ALTER SERVER loopback OPTIONS (SET analyze_sampling 'random');
11801+
ANALYZE analyze_table;
11802+
ALTER SERVER loopback OPTIONS (SET analyze_sampling 'off');
11803+
ANALYZE analyze_table;
11804+
-- cleanup
11805+
DROP FOREIGN TABLE analyze_ftable;
11806+
DROP TABLE analyze_table;

‎contrib/postgres_fdw/option.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,23 @@ postgres_fdw_validator(PG_FUNCTION_ARGS)
210210
errmsg("sslcert and sslkey are superuser-only"),
211211
errhint("User mappings with the sslcert or sslkey options set may only be created or modified by the superuser.")));
212212
}
213+
elseif (strcmp(def->defname,"analyze_sampling")==0)
214+
{
215+
char*value;
216+
217+
value=defGetString(def);
218+
219+
/* we recognize off/auto/random/system/bernoulli */
220+
if (strcmp(value,"off")!=0&&
221+
strcmp(value,"auto")!=0&&
222+
strcmp(value,"random")!=0&&
223+
strcmp(value,"system")!=0&&
224+
strcmp(value,"bernoulli")!=0)
225+
ereport(ERROR,
226+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
227+
errmsg("invalid value for string option \"%s\": %s",
228+
def->defname,value)));
229+
}
213230
}
214231

215232
PG_RETURN_VOID();
@@ -257,6 +274,10 @@ InitPgFdwOptions(void)
257274
{"keep_connections",ForeignServerRelationId, false},
258275
{"password_required",UserMappingRelationId, false},
259276

277+
/* sampling is available on both server and table */
278+
{"analyze_sampling",ForeignServerRelationId, false},
279+
{"analyze_sampling",ForeignTableRelationId, false},
280+
260281
/*
261282
* sslcert and sslkey are in fact libpq options, but we repeat them
262283
* here to allow them to appear in both foreign server context (when

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp