Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit036bdce

Browse files
committed
Teach planner how to estimate rows for timestamp generate_series
This provides the planner with row estimates forgenerate_series(TIMESTAMP, TIMESTAMP, INTERVAL),generate_series(TIMESTAMPTZ, TIMESTAMPTZ, INTERVAL) andgenerate_series(TIMESTAMPTZ, TIMESTAMPTZ, INTERVAL, TEXT) when the inputparameter values can be estimated during planning.Author: David RowleyReviewed-by: jian he <jian.universality@gmail.com>Discussion:https://postgr.es/m/CAApHDvrBE%3D%2BASo_sGYmQJ3GvO8GPvX5yxXhRS%3Dt_ybd4odFkhQ%40mail.gmail.com
1 parent5193ca8 commit036bdce

File tree

4 files changed

+291
-3
lines changed

4 files changed

+291
-3
lines changed

‎src/backend/utils/adt/timestamp.c

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include"funcapi.h"
2828
#include"libpq/pqformat.h"
2929
#include"miscadmin.h"
30+
#include"optimizer/optimizer.h"
3031
#include"nodes/nodeFuncs.h"
3132
#include"nodes/supportnodes.h"
3233
#include"parser/scansup.h"
@@ -6680,6 +6681,93 @@ generate_series_timestamptz_at_zone(PG_FUNCTION_ARGS)
66806681
returngenerate_series_timestamptz_internal(fcinfo);
66816682
}
66826683

6684+
/*
6685+
* Planner support function for generate_series(timestamp, timestamp, interval)
6686+
*/
6687+
Datum
6688+
generate_series_timestamp_support(PG_FUNCTION_ARGS)
6689+
{
6690+
Node*rawreq= (Node*)PG_GETARG_POINTER(0);
6691+
Node*ret=NULL;
6692+
6693+
if (IsA(rawreq,SupportRequestRows))
6694+
{
6695+
/* Try to estimate the number of rows returned */
6696+
SupportRequestRows*req= (SupportRequestRows*)rawreq;
6697+
6698+
if (is_funcclause(req->node))/* be paranoid */
6699+
{
6700+
List*args= ((FuncExpr*)req->node)->args;
6701+
Node*arg1,
6702+
*arg2,
6703+
*arg3;
6704+
6705+
/* We can use estimated argument values here */
6706+
arg1=estimate_expression_value(req->root,linitial(args));
6707+
arg2=estimate_expression_value(req->root,lsecond(args));
6708+
arg3=estimate_expression_value(req->root,lthird(args));
6709+
6710+
/*
6711+
* If any argument is constant NULL, we can safely assume that
6712+
* zero rows are returned. Otherwise, if they're all non-NULL
6713+
* constants, we can calculate the number of rows that will be
6714+
* returned.
6715+
*/
6716+
if ((IsA(arg1,Const)&& ((Const*)arg1)->constisnull)||
6717+
(IsA(arg2,Const)&& ((Const*)arg2)->constisnull)||
6718+
(IsA(arg3,Const)&& ((Const*)arg3)->constisnull))
6719+
{
6720+
req->rows=0;
6721+
ret= (Node*)req;
6722+
}
6723+
elseif (IsA(arg1,Const)&&IsA(arg2,Const)&&IsA(arg3,Const))
6724+
{
6725+
Timestampstart,
6726+
finish;
6727+
Interval*step;
6728+
Datumdiff;
6729+
doubledstep;
6730+
int64dummy;
6731+
6732+
start=DatumGetTimestamp(((Const*)arg1)->constvalue);
6733+
finish=DatumGetTimestamp(((Const*)arg2)->constvalue);
6734+
step=DatumGetIntervalP(((Const*)arg3)->constvalue);
6735+
6736+
/*
6737+
* Perform some prechecks which could cause timestamp_mi to
6738+
* raise an ERROR. It's much better to just return some
6739+
* default estimate than error out in a support function.
6740+
*/
6741+
if (!TIMESTAMP_NOT_FINITE(start)&& !TIMESTAMP_NOT_FINITE(finish)&&
6742+
!pg_sub_s64_overflow(finish,start,&dummy))
6743+
{
6744+
diff=DirectFunctionCall2(timestamp_mi,
6745+
TimestampGetDatum(finish),
6746+
TimestampGetDatum(start));
6747+
6748+
#defineINTERVAL_TO_MICROSECONDS(i) ((((double) (i)->month * DAYS_PER_MONTH + (i)->day)) * USECS_PER_DAY + (i)->time)
6749+
6750+
dstep=INTERVAL_TO_MICROSECONDS(step);
6751+
6752+
/* This equation works for either sign of step */
6753+
if (dstep!=0.0)
6754+
{
6755+
Interval*idiff=DatumGetIntervalP(diff);
6756+
doubleddiff=INTERVAL_TO_MICROSECONDS(idiff);
6757+
6758+
req->rows=floor(ddiff /dstep+1.0);
6759+
ret= (Node*)req;
6760+
}
6761+
#undef INTERVAL_TO_MICROSECONDS
6762+
}
6763+
}
6764+
}
6765+
}
6766+
6767+
PG_RETURN_POINTER(ret);
6768+
}
6769+
6770+
66836771
/* timestamp_at_local()
66846772
* timestamptz_at_local()
66856773
*

‎src/include/catalog/pg_proc.dat

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8331,19 +8331,25 @@
83318331
prorettype => 'numeric', proargtypes => 'numeric numeric',
83328332
prosrc => 'generate_series_numeric' },
83338333
{ oid => '938', descr => 'non-persistent series generator',
8334-
proname => 'generate_series', prorows => '1000', proretset => 't',
8334+
proname => 'generate_series', prorows => '1000',
8335+
prosupport => 'generate_series_timestamp_support', proretset => 't',
83358336
prorettype => 'timestamp', proargtypes => 'timestamp timestamp interval',
83368337
prosrc => 'generate_series_timestamp' },
83378338
{ oid => '939', descr => 'non-persistent series generator',
8338-
proname => 'generate_series', prorows => '1000', proretset => 't',
8339+
proname => 'generate_series', prorows => '1000',
8340+
prosupport => 'generate_series_timestamp_support', proretset => 't',
83398341
provolatile => 's', prorettype => 'timestamptz',
83408342
proargtypes => 'timestamptz timestamptz interval',
83418343
prosrc => 'generate_series_timestamptz' },
83428344
{ oid => '6274', descr => 'non-persistent series generator',
8343-
proname => 'generate_series', prorows => '1000', proretset => 't',
8345+
proname => 'generate_series', prorows => '1000',
8346+
prosupport => 'generate_series_timestamp_support', proretset => 't',
83448347
prorettype => 'timestamptz',
83458348
proargtypes => 'timestamptz timestamptz interval text',
83468349
prosrc => 'generate_series_timestamptz_at_zone' },
8350+
{ oid => '8402', descr => 'planner support for generate_series',
8351+
proname => 'generate_series_timestamp_support', prorettype => 'internal',
8352+
proargtypes => 'internal', prosrc => 'generate_series_timestamp_support' },
83478353

83488354
# boolean aggregates
83498355
{ oid => '2515', descr => 'aggregate transition function',

‎src/test/regress/expected/misc_functions.out

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,43 @@
22
\getenv libdir PG_LIBDIR
33
\getenv dlsuffix PG_DLSUFFIX
44
\set regresslib :libdir '/regress' :dlsuffix
5+
-- Function to assist with verifying EXPLAIN which includes costs. A series
6+
-- of bool flags allows control over which portions are masked out
7+
CREATE FUNCTION explain_mask_costs(query text, do_analyze bool,
8+
hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text
9+
LANGUAGE plpgsql AS
10+
$$
11+
DECLARE
12+
ln text;
13+
analyze_str text;
14+
BEGIN
15+
IF do_analyze = true THEN
16+
analyze_str := 'on';
17+
ELSE
18+
analyze_str := 'off';
19+
END IF;
20+
21+
FOR ln IN
22+
EXECUTE format('explain (analyze %s, costs on, summary off, timing off) %s',
23+
analyze_str, query)
24+
LOOP
25+
IF hide_costs = true THEN
26+
ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N');
27+
END IF;
28+
29+
IF hide_row_est = true THEN
30+
-- don't use 'g' so that we leave the actual rows intact
31+
ln := regexp_replace(ln, 'rows=\d+', 'rows=N');
32+
END IF;
33+
34+
IF hide_width = true THEN
35+
ln := regexp_replace(ln, 'width=\d+', 'width=N');
36+
END IF;
37+
38+
RETURN NEXT ln;
39+
END LOOP;
40+
END;
41+
$$;
542
--
643
-- num_nulls()
744
--
@@ -594,6 +631,78 @@ SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g;
594631
Index Cond: (unique1 = g.g)
595632
(4 rows)
596633

634+
--
635+
-- Test the SupportRequestRows support function for generate_series_timestamp()
636+
--
637+
-- Ensure the row estimate matches the actual rows
638+
SELECT explain_mask_costs($$
639+
SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day') g(s);$$,
640+
true, true, false, true);
641+
explain_mask_costs
642+
------------------------------------------------------------------------------------------
643+
Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30 loops=1)
644+
(1 row)
645+
646+
-- As above but with generate_series_timestamp
647+
SELECT explain_mask_costs($$
648+
SELECT * FROM generate_series(TIMESTAMP '2024-02-01', TIMESTAMP '2024-03-01', INTERVAL '1 day') g(s);$$,
649+
true, true, false, true);
650+
explain_mask_costs
651+
------------------------------------------------------------------------------------------
652+
Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30 loops=1)
653+
(1 row)
654+
655+
-- As above but with generate_series_timestamptz_at_zone()
656+
SELECT explain_mask_costs($$
657+
SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day', 'UTC') g(s);$$,
658+
true, true, false, true);
659+
explain_mask_costs
660+
------------------------------------------------------------------------------------------
661+
Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30 loops=1)
662+
(1 row)
663+
664+
-- Ensure the estimated and actual row counts match when the range isn't
665+
-- evenly divisible by the step
666+
SELECT explain_mask_costs($$
667+
SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '7 day') g(s);$$,
668+
true, true, false, true);
669+
explain_mask_costs
670+
----------------------------------------------------------------------------------------
671+
Function Scan on generate_series g (cost=N..N rows=5 width=N) (actual rows=5 loops=1)
672+
(1 row)
673+
674+
-- Ensure the estimates match when step is decreasing
675+
SELECT explain_mask_costs($$
676+
SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '-1 day') g(s);$$,
677+
true, true, false, true);
678+
explain_mask_costs
679+
------------------------------------------------------------------------------------------
680+
Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30 loops=1)
681+
(1 row)
682+
683+
-- Ensure an empty range estimates 1 row
684+
SELECT explain_mask_costs($$
685+
SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '1 day') g(s);$$,
686+
true, true, false, true);
687+
explain_mask_costs
688+
----------------------------------------------------------------------------------------
689+
Function Scan on generate_series g (cost=N..N rows=1 width=N) (actual rows=0 loops=1)
690+
(1 row)
691+
692+
-- Ensure we get the default row estimate for infinity values
693+
SELECT explain_mask_costs($$
694+
SELECT * FROM generate_series(TIMESTAMPTZ '-infinity', TIMESTAMPTZ 'infinity', INTERVAL '1 day') g(s);$$,
695+
false, true, false, true);
696+
explain_mask_costs
697+
-------------------------------------------------------------------
698+
Function Scan on generate_series g (cost=N..N rows=1000 width=N)
699+
(1 row)
700+
701+
-- Ensure the row estimate behaves correctly when step size is zero.
702+
-- We expect generate_series_timestamp() to throw the error rather than in
703+
-- the support function.
704+
SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s);
705+
ERROR: step size cannot equal zero
597706
-- Test functions for control data
598707
SELECT count(*) > 0 AS ok FROM pg_control_checkpoint();
599708
ok
@@ -706,3 +815,4 @@ SELECT pg_column_toast_chunk_id(a) IS NULL,
706815
(1 row)
707816

708817
DROP TABLE test_chunk_id;
818+
DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);

‎src/test/regress/sql/misc_functions.sql

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,44 @@
44

55
\set regresslib :libdir'/regress' :dlsuffix
66

7+
-- Function to assist with verifying EXPLAIN which includes costs. A series
8+
-- of bool flags allows control over which portions are masked out
9+
CREATEFUNCTIONexplain_mask_costs(querytext, do_analyze bool,
10+
hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setoftext
11+
LANGUAGE plpgsqlAS
12+
$$
13+
DECLARE
14+
lntext;
15+
analyze_strtext;
16+
BEGIN
17+
IF do_analyze= true THEN
18+
analyze_str :='on';
19+
ELSE
20+
analyze_str :='off';
21+
END IF;
22+
23+
FOR lnIN
24+
EXECUTE format('explain (analyze %s, costs on, summary off, timing off) %s',
25+
analyze_str, query)
26+
LOOP
27+
IF hide_costs= true THEN
28+
ln := regexp_replace(ln,'cost=\d+\.\d\d\.\.\d+\.\d\d','cost=N..N');
29+
END IF;
30+
31+
IF hide_row_est= true THEN
32+
-- don't use 'g' so that we leave the actual rows intact
33+
ln := regexp_replace(ln,'rows=\d+','rows=N');
34+
END IF;
35+
36+
IF hide_width= true THEN
37+
ln := regexp_replace(ln,'width=\d+','width=N');
38+
END IF;
39+
40+
RETURN NEXT ln;
41+
END LOOP;
42+
END;
43+
$$;
44+
745
--
846
-- num_nulls()
947
--
@@ -224,6 +262,51 @@ SELECT * FROM tenk1 a JOIN my_gen_series(1,1000) g ON a.unique1 = g;
224262
EXPLAIN (COSTS OFF)
225263
SELECT*FROM tenk1 aJOIN my_gen_series(1,10) gONa.unique1= g;
226264

265+
--
266+
-- Test the SupportRequestRows support function for generate_series_timestamp()
267+
--
268+
269+
-- Ensure the row estimate matches the actual rows
270+
SELECT explain_mask_costs($$
271+
SELECT*FROM generate_series(TIMESTAMPTZ'2024-02-01',TIMESTAMPTZ'2024-03-01', INTERVAL'1 day') g(s);$$,
272+
true, true, false, true);
273+
274+
-- As above but with generate_series_timestamp
275+
SELECT explain_mask_costs($$
276+
SELECT*FROM generate_series(TIMESTAMP'2024-02-01',TIMESTAMP'2024-03-01', INTERVAL'1 day') g(s);$$,
277+
true, true, false, true);
278+
279+
-- As above but with generate_series_timestamptz_at_zone()
280+
SELECT explain_mask_costs($$
281+
SELECT*FROM generate_series(TIMESTAMPTZ'2024-02-01',TIMESTAMPTZ'2024-03-01', INTERVAL'1 day','UTC') g(s);$$,
282+
true, true, false, true);
283+
284+
-- Ensure the estimated and actual row counts match when the range isn't
285+
-- evenly divisible by the step
286+
SELECT explain_mask_costs($$
287+
SELECT*FROM generate_series(TIMESTAMPTZ'2024-02-01',TIMESTAMPTZ'2024-03-01', INTERVAL'7 day') g(s);$$,
288+
true, true, false, true);
289+
290+
-- Ensure the estimates match when step is decreasing
291+
SELECT explain_mask_costs($$
292+
SELECT*FROM generate_series(TIMESTAMPTZ'2024-03-01',TIMESTAMPTZ'2024-02-01', INTERVAL'-1 day') g(s);$$,
293+
true, true, false, true);
294+
295+
-- Ensure an empty range estimates 1 row
296+
SELECT explain_mask_costs($$
297+
SELECT*FROM generate_series(TIMESTAMPTZ'2024-03-01',TIMESTAMPTZ'2024-02-01', INTERVAL'1 day') g(s);$$,
298+
true, true, false, true);
299+
300+
-- Ensure we get the default row estimate for infinity values
301+
SELECT explain_mask_costs($$
302+
SELECT*FROM generate_series(TIMESTAMPTZ'-infinity',TIMESTAMPTZ'infinity', INTERVAL'1 day') g(s);$$,
303+
false, true, false, true);
304+
305+
-- Ensure the row estimate behaves correctly when step size is zero.
306+
-- We expect generate_series_timestamp() to throw the error rather than in
307+
-- the support function.
308+
SELECT*FROM generate_series(TIMESTAMPTZ'2024-02-01',TIMESTAMPTZ'2024-03-01', INTERVAL'0 day') g(s);
309+
227310
-- Test functions for control data
228311
SELECTcount(*)>0AS okFROM pg_control_checkpoint();
229312
SELECTcount(*)>0AS okFROM pg_control_init();
@@ -273,3 +356,4 @@ SELECT pg_column_toast_chunk_id(a) IS NULL,
273356
pg_column_toast_chunk_id(b)IN (SELECT chunk_idFROM pg_toast.:toastrel)
274357
FROM test_chunk_id;
275358
DROPTABLE test_chunk_id;
359+
DROPFUNCTION explain_mask_costs(text, bool, bool, bool, bool);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp