Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit959d00e

Browse files
committed
Use Append rather than MergeAppend for scanning ordered partitions.
If we need ordered output from a scan of a partitioned table, butthe ordering matches the partition ordering, then we don't need touse a MergeAppend to combine the pre-ordered per-partition scanresults: a plain Append will produce the same results. Thisboth saves useless comparison work inside the MergeAppend proper,and allows us to start returning tuples after istarting up justthe first child node not all of them.However, all is not peaches and cream, because if some of thechild nodes have high startup costs then there will be bigdiscontinuities in the tuples-returned-versus-elapsed-time curve.The planner's cost model cannot handle that (yet, anyway).If we model the Append's startup cost as being just the firstchild's startup cost, we may drastically underestimate the costof fetching slightly more tuples than are available from the firstchild. Since we've had bad experiences with over-optimistic choicesof "fast start" plans for ORDER BY LIMIT queries, that seems scary.As a klugy workaround, set the startup cost estimate for an orderedAppend to be the sum of its children's startup costs (as MergeAppendwould). This doesn't really describe reality, but it's less likelyto cause a bad plan choice than an underestimated startup cost would.In practice, the cases where we really care about this optimizationwill have child plans that are IndexScans with zero startup cost,so that the overly conservative estimate is still just zero.David Rowley, reviewed by Julien Rouhaud and Antonin HouskaDiscussion:https://postgr.es/m/CAKJS1f-hAqhPLRk_RaSFTgYxd=Tz5hA7kQ2h4-DhJufQk8TGuw@mail.gmail.com
1 parent9f06d79 commit959d00e

File tree

19 files changed

+1044
-135
lines changed

19 files changed

+1044
-135
lines changed

‎src/backend/executor/execProcnode.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -840,6 +840,19 @@ ExecSetTupleBound(int64 tuples_needed, PlanState *child_node)
840840
sortState->bound=tuples_needed;
841841
}
842842
}
843+
elseif (IsA(child_node,AppendState))
844+
{
845+
/*
846+
* If it is an Append, we can apply the bound to any nodes that are
847+
* children of the Append, since the Append surely need read no more
848+
* than that many tuples from any one input.
849+
*/
850+
AppendState*aState= (AppendState*)child_node;
851+
inti;
852+
853+
for (i=0;i<aState->as_nplans;i++)
854+
ExecSetTupleBound(tuples_needed,aState->appendplans[i]);
855+
}
843856
elseif (IsA(child_node,MergeAppendState))
844857
{
845858
/*

‎src/backend/nodes/outfuncs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1847,6 +1847,7 @@ _outAppendPath(StringInfo str, const AppendPath *node)
18471847
WRITE_NODE_FIELD(partitioned_rels);
18481848
WRITE_NODE_FIELD(subpaths);
18491849
WRITE_INT_FIELD(first_partial_path);
1850+
WRITE_FLOAT_FIELD(limit_tuples,"%.0f");
18501851
}
18511852

18521853
staticvoid

‎src/backend/optimizer/path/allpaths.c

Lines changed: 198 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include"optimizer/tlist.h"
4545
#include"parser/parse_clause.h"
4646
#include"parser/parsetree.h"
47+
#include"partitioning/partbounds.h"
4748
#include"partitioning/partprune.h"
4849
#include"rewrite/rewriteManip.h"
4950
#include"utils/lsyscache.h"
@@ -96,15 +97,16 @@ static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
9697
Indexrti,RangeTblEntry*rte);
9798
staticvoidset_append_rel_pathlist(PlannerInfo*root,RelOptInfo*rel,
9899
Indexrti,RangeTblEntry*rte);
99-
staticvoidgenerate_mergeappend_paths(PlannerInfo*root,RelOptInfo*rel,
100-
List*live_childrels,
101-
List*all_child_pathkeys,
102-
List*partitioned_rels);
100+
staticvoidgenerate_orderedappend_paths(PlannerInfo*root,RelOptInfo*rel,
101+
List*live_childrels,
102+
List*all_child_pathkeys,
103+
List*partitioned_rels);
103104
staticPath*get_cheapest_parameterized_child_path(PlannerInfo*root,
104105
RelOptInfo*rel,
105106
Relidsrequired_outer);
106107
staticvoidaccumulate_append_subpath(Path*path,
107108
List**subpaths,List**special_subpaths);
109+
staticPath*get_singleton_append_subpath(Path*path);
108110
staticvoidset_dummy_rel_pathlist(RelOptInfo*rel);
109111
staticvoidset_subquery_pathlist(PlannerInfo*root,RelOptInfo*rel,
110112
Indexrti,RangeTblEntry*rte);
@@ -1520,7 +1522,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
15201522
*/
15211523
if (subpaths_valid)
15221524
add_path(rel, (Path*)create_append_path(root,rel,subpaths,NIL,
1523-
NULL,0, false,
1525+
NIL,NULL,0, false,
15241526
partitioned_rels,-1));
15251527

15261528
/*
@@ -1562,7 +1564,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
15621564

15631565
/* Generate a partial append path. */
15641566
appendpath=create_append_path(root,rel,NIL,partial_subpaths,
1565-
NULL,parallel_workers,
1567+
NIL,NULL,parallel_workers,
15661568
enable_parallel_append,
15671569
partitioned_rels,-1);
15681570

@@ -1612,19 +1614,19 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
16121614

16131615
appendpath=create_append_path(root,rel,pa_nonpartial_subpaths,
16141616
pa_partial_subpaths,
1615-
NULL,parallel_workers, true,
1617+
NIL,NULL,parallel_workers, true,
16161618
partitioned_rels,partial_rows);
16171619
add_partial_path(rel, (Path*)appendpath);
16181620
}
16191621

16201622
/*
1621-
* Also build unparameterizedMergeAppend paths based on the collected
1623+
* Also build unparameterizedordered append paths based on the collected
16221624
* list of child pathkeys.
16231625
*/
16241626
if (subpaths_valid)
1625-
generate_mergeappend_paths(root,rel,live_childrels,
1626-
all_child_pathkeys,
1627-
partitioned_rels);
1627+
generate_orderedappend_paths(root,rel,live_childrels,
1628+
all_child_pathkeys,
1629+
partitioned_rels);
16281630

16291631
/*
16301632
* Build Append paths for each parameterization seen among the child rels.
@@ -1674,7 +1676,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
16741676
if (subpaths_valid)
16751677
add_path(rel, (Path*)
16761678
create_append_path(root,rel,subpaths,NIL,
1677-
required_outer,0, false,
1679+
NIL,required_outer,0, false,
16781680
partitioned_rels,-1));
16791681
}
16801682

@@ -1703,26 +1705,30 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
17031705
continue;
17041706

17051707
appendpath=create_append_path(root,rel,NIL,list_make1(path),
1706-
NULL,path->parallel_workers,
1707-
true,
1708+
NIL,NULL,
1709+
path->parallel_workers,true,
17081710
partitioned_rels,partial_rows);
17091711
add_partial_path(rel, (Path*)appendpath);
17101712
}
17111713
}
17121714
}
17131715

17141716
/*
1715-
*generate_mergeappend_paths
1716-
*GenerateMergeAppend paths for an append relation
1717+
*generate_orderedappend_paths
1718+
*Generateordered append paths for an append relation
17171719
*
1718-
* Generate a path for each ordering (pathkey list) appearing in
1720+
* Usually we generate MergeAppend paths here, but there are some special
1721+
* cases where we can generate simple Append paths, because the subpaths
1722+
* can provide tuples in the required order already.
1723+
*
1724+
* We generate a path for each ordering (pathkey list) appearing in
17191725
* all_child_pathkeys.
17201726
*
17211727
* We consider both cheapest-startup and cheapest-total cases, ie, for each
17221728
* interesting ordering, collect all the cheapest startup subpaths and all the
1723-
* cheapest total paths, and build aMergeAppend path for each case.
1729+
* cheapest total paths, and build asuitable path for each case.
17241730
*
1725-
* We don't currently generate any parameterizedMergeAppend paths. While
1731+
* We don't currently generate any parameterizedordered paths here. While
17261732
* it would not take much more code here to do so, it's very unclear that it
17271733
* is worth the planning cycles to investigate such paths: there's little
17281734
* use for an ordered path on the inside of a nestloop. In fact, it's likely
@@ -1731,24 +1737,80 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
17311737
* and a parameterized MergeAppend is going to be more expensive than the
17321738
* corresponding parameterized Append path. If we ever try harder to support
17331739
* parameterized mergejoin plans, it might be worth adding support for
1734-
* parameterizedMergeAppends to feed such joins. (See notes in
1740+
* parameterizedpaths here to feed such joins. (See notes in
17351741
* optimizer/README for why that might not ever happen, though.)
17361742
*/
17371743
staticvoid
1738-
generate_mergeappend_paths(PlannerInfo*root,RelOptInfo*rel,
1739-
List*live_childrels,
1740-
List*all_child_pathkeys,
1741-
List*partitioned_rels)
1744+
generate_orderedappend_paths(PlannerInfo*root,RelOptInfo*rel,
1745+
List*live_childrels,
1746+
List*all_child_pathkeys,
1747+
List*partitioned_rels)
17421748
{
17431749
ListCell*lcp;
1750+
List*partition_pathkeys=NIL;
1751+
List*partition_pathkeys_desc=NIL;
1752+
boolpartition_pathkeys_partial= true;
1753+
boolpartition_pathkeys_desc_partial= true;
1754+
1755+
/*
1756+
* Some partitioned table setups may allow us to use an Append node
1757+
* instead of a MergeAppend. This is possible in cases such as RANGE
1758+
* partitioned tables where it's guaranteed that an earlier partition must
1759+
* contain rows which come earlier in the sort order. To detect whether
1760+
* this is relevant, build pathkey descriptions of the partition ordering,
1761+
* for both forward and reverse scans.
1762+
*/
1763+
if (rel->part_scheme!=NULL&&IS_SIMPLE_REL(rel)&&
1764+
partitions_are_ordered(rel->boundinfo,rel->nparts))
1765+
{
1766+
partition_pathkeys=build_partition_pathkeys(root,rel,
1767+
ForwardScanDirection,
1768+
&partition_pathkeys_partial);
1769+
1770+
partition_pathkeys_desc=build_partition_pathkeys(root,rel,
1771+
BackwardScanDirection,
1772+
&partition_pathkeys_desc_partial);
1773+
1774+
/*
1775+
* You might think we should truncate_useless_pathkeys here, but
1776+
* allowing partition keys which are a subset of the query's pathkeys
1777+
* can often be useful. For example, consider a table partitioned by
1778+
* RANGE (a, b), and a query with ORDER BY a, b, c. If we have child
1779+
* paths that can produce the a, b, c ordering (perhaps via indexes on
1780+
* (a, b, c)) then it works to consider the appendrel output as
1781+
* ordered by a, b, c.
1782+
*/
1783+
}
17441784

1785+
/* Now consider each interesting sort ordering */
17451786
foreach(lcp,all_child_pathkeys)
17461787
{
17471788
List*pathkeys= (List*)lfirst(lcp);
17481789
List*startup_subpaths=NIL;
17491790
List*total_subpaths=NIL;
17501791
boolstartup_neq_total= false;
17511792
ListCell*lcr;
1793+
boolmatch_partition_order;
1794+
boolmatch_partition_order_desc;
1795+
1796+
/*
1797+
* Determine if this sort ordering matches any partition pathkeys we
1798+
* have, for both ascending and descending partition order. If the
1799+
* partition pathkeys happen to be contained in pathkeys then it still
1800+
* works, as described above, providing that the partition pathkeys
1801+
* are complete and not just a prefix of the partition keys. (In such
1802+
* cases we'll be relying on the child paths to have sorted the
1803+
* lower-order columns of the required pathkeys.)
1804+
*/
1805+
match_partition_order=
1806+
pathkeys_contained_in(pathkeys,partition_pathkeys)||
1807+
(!partition_pathkeys_partial&&
1808+
pathkeys_contained_in(partition_pathkeys,pathkeys));
1809+
1810+
match_partition_order_desc= !match_partition_order&&
1811+
(pathkeys_contained_in(pathkeys,partition_pathkeys_desc)||
1812+
(!partition_pathkeys_desc_partial&&
1813+
pathkeys_contained_in(partition_pathkeys_desc,pathkeys)));
17521814

17531815
/* Select the child paths for this ordering... */
17541816
foreach(lcr,live_childrels)
@@ -1791,26 +1853,94 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
17911853
if (cheapest_startup!=cheapest_total)
17921854
startup_neq_total= true;
17931855

1794-
accumulate_append_subpath(cheapest_startup,
1795-
&startup_subpaths,NULL);
1796-
accumulate_append_subpath(cheapest_total,
1797-
&total_subpaths,NULL);
1856+
/*
1857+
* Collect the appropriate child paths. The required logic varies
1858+
* for the Append and MergeAppend cases.
1859+
*/
1860+
if (match_partition_order)
1861+
{
1862+
/*
1863+
* We're going to make a plain Append path. We don't need
1864+
* most of what accumulate_append_subpath would do, but we do
1865+
* want to cut out child Appends or MergeAppends if they have
1866+
* just a single subpath (and hence aren't doing anything
1867+
* useful).
1868+
*/
1869+
cheapest_startup=get_singleton_append_subpath(cheapest_startup);
1870+
cheapest_total=get_singleton_append_subpath(cheapest_total);
1871+
1872+
startup_subpaths=lappend(startup_subpaths,cheapest_startup);
1873+
total_subpaths=lappend(total_subpaths,cheapest_total);
1874+
}
1875+
elseif (match_partition_order_desc)
1876+
{
1877+
/*
1878+
* As above, but we need to reverse the order of the children,
1879+
* because nodeAppend.c doesn't know anything about reverse
1880+
* ordering and will scan the children in the order presented.
1881+
*/
1882+
cheapest_startup=get_singleton_append_subpath(cheapest_startup);
1883+
cheapest_total=get_singleton_append_subpath(cheapest_total);
1884+
1885+
startup_subpaths=lcons(cheapest_startup,startup_subpaths);
1886+
total_subpaths=lcons(cheapest_total,total_subpaths);
1887+
}
1888+
else
1889+
{
1890+
/*
1891+
* Otherwise, rely on accumulate_append_subpath to collect the
1892+
* child paths for the MergeAppend.
1893+
*/
1894+
accumulate_append_subpath(cheapest_startup,
1895+
&startup_subpaths,NULL);
1896+
accumulate_append_subpath(cheapest_total,
1897+
&total_subpaths,NULL);
1898+
}
17981899
}
17991900

1800-
/* ... and build the MergeAppend paths */
1801-
add_path(rel, (Path*)create_merge_append_path(root,
1802-
rel,
1803-
startup_subpaths,
1804-
pathkeys,
1805-
NULL,
1806-
partitioned_rels));
1807-
if (startup_neq_total)
1901+
/* ... and build the Append or MergeAppend paths */
1902+
if (match_partition_order||match_partition_order_desc)
1903+
{
1904+
/* We only need Append */
1905+
add_path(rel, (Path*)create_append_path(root,
1906+
rel,
1907+
startup_subpaths,
1908+
NIL,
1909+
pathkeys,
1910+
NULL,
1911+
0,
1912+
false,
1913+
partitioned_rels,
1914+
-1));
1915+
if (startup_neq_total)
1916+
add_path(rel, (Path*)create_append_path(root,
1917+
rel,
1918+
total_subpaths,
1919+
NIL,
1920+
pathkeys,
1921+
NULL,
1922+
0,
1923+
false,
1924+
partitioned_rels,
1925+
-1));
1926+
}
1927+
else
1928+
{
1929+
/* We need MergeAppend */
18081930
add_path(rel, (Path*)create_merge_append_path(root,
18091931
rel,
1810-
total_subpaths,
1932+
startup_subpaths,
18111933
pathkeys,
18121934
NULL,
18131935
partitioned_rels));
1936+
if (startup_neq_total)
1937+
add_path(rel, (Path*)create_merge_append_path(root,
1938+
rel,
1939+
total_subpaths,
1940+
pathkeys,
1941+
NULL,
1942+
partitioned_rels));
1943+
}
18141944
}
18151945
}
18161946

@@ -1901,7 +2031,6 @@ get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel,
19012031
* omitting a sort step, which seems fine: if the parent is to be an Append,
19022032
* its result would be unsorted anyway, while if the parent is to be a
19032033
* MergeAppend, there's no point in a separate sort on a child.
1904-
* its result would be unsorted anyway.
19052034
*
19062035
* Normally, either path is a partial path and subpaths is a list of partial
19072036
* paths, or else path is a non-partial plan and subpaths is a list of those.
@@ -1951,6 +2080,36 @@ accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths)
19512080
*subpaths=lappend(*subpaths,path);
19522081
}
19532082

2083+
/*
2084+
* get_singleton_append_subpath
2085+
*Returns the single subpath of an Append/MergeAppend, or just
2086+
*return 'path' if it's not a single sub-path Append/MergeAppend.
2087+
*
2088+
* Note: 'path' must not be a parallel-aware path.
2089+
*/
2090+
staticPath*
2091+
get_singleton_append_subpath(Path*path)
2092+
{
2093+
Assert(!path->parallel_aware);
2094+
2095+
if (IsA(path,AppendPath))
2096+
{
2097+
AppendPath*apath= (AppendPath*)path;
2098+
2099+
if (list_length(apath->subpaths)==1)
2100+
return (Path*)linitial(apath->subpaths);
2101+
}
2102+
elseif (IsA(path,MergeAppendPath))
2103+
{
2104+
MergeAppendPath*mpath= (MergeAppendPath*)path;
2105+
2106+
if (list_length(mpath->subpaths)==1)
2107+
return (Path*)linitial(mpath->subpaths);
2108+
}
2109+
2110+
returnpath;
2111+
}
2112+
19542113
/*
19552114
* set_dummy_rel_pathlist
19562115
* Build a dummy path for a relation that's been excluded by constraints
@@ -1975,7 +2134,7 @@ set_dummy_rel_pathlist(RelOptInfo *rel)
19752134

19762135
/* Set up the dummy path */
19772136
add_path(rel, (Path*)create_append_path(NULL,rel,NIL,NIL,
1978-
rel->lateral_relids,
2137+
NIL,rel->lateral_relids,
19792138
0, false,NIL,-1));
19802139

19812140
/*

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp