77 *
88 *
99 * IDENTIFICATION
10- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.12 1999/07/16 04:59:15 momjian Exp $
10+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.13 1999/08/13 01:17:16 tgl Exp $
1111 *
1212 *-------------------------------------------------------------------------
1313 */
@@ -33,18 +33,24 @@ static List *new_join_pathkey(List *pathkeys, List *join_rel_tlist,
3333 *order of the result generated by the Path.
3434 *
3535 *In single/base relation RelOptInfo's, the Path's represent various ways
36- *ofgenerating the relation and the resulting ordering of the tuples.
36+ *ofscanning the relation and the resulting ordering of the tuples.
3737 *Sequential scan Paths have NIL pathkeys, indicating no known ordering.
38- *Index scans have Path.pathkeys that represent the chosen index.
39- *A single-key index pathkeys would be { {tab1_indexkey1} }.For a
40- *multi-key index pathkeys would be { {tab1_indexkey1}, {tab1_indexkey2} },
41- *indicating major sort by indexkey1 and minor sort by indexkey2.
38+ *Index scans have Path.pathkeys that represent the chosen index's ordering,
39+ * if any. A single-key index would create a pathkey with a single sublist,
40+ *e.g. ( (tab1_indexkey1) ). A multi-key index generates a sublist per key,
41+ *e.g. ( (tab1_indexkey1) (tab1_indexkey2) ) which shows major sort by
42+ *indexkey1 and minor sort by indexkey2.
43+ *
44+ *Note that a multi-pass indexscan (OR clause scan) has NIL pathkeys since
45+ *we can say nothing about the overall order of its result. Also, an index
46+ *scan on an unordered type of index generates no useful pathkeys. However,
47+ *we can always create a pathkey by doing an explicit sort.
4248 *
4349 *Multi-relation RelOptInfo Path's are more complicated. Mergejoins are
4450 *only performed with equijoins ("="). Because of this, the multi-relation
4551 *path actually has more than one primary Var key. For example, a
46- *mergejoin Path of "tab1.col1 = tab2.col1" would generatea pathkeys of
47- *{ { tab1.col1, tab2.col1} } , indicating that the major sort order of the
52+ *mergejoin Path of "tab1.col1 = tab2.col1" would generate pathkeys of
53+ *( ( tab1.col1 tab2.col1) ) , indicating that the major sort order of the
4854 *Path can be taken to be *either* tab1.col1 or tab2.col1.
4955 *They are equal, so they are both primary sort keys. This allows future
5056 *joins to use either Var as a pre-sorted key to prevent upper Mergejoins
@@ -53,21 +59,30 @@ static List *new_join_pathkey(List *pathkeys, List *join_rel_tlist,
5359 *Note that while the order of the top list is meaningful (primary vs.
5460 *secondary sort key), the order of each sublist is arbitrary.
5561 *
56- *For multi-key sorts, if the outer is sorted by a multi-key index, the
57- *multi-key index remains after the join. If the inner has a multi-key
58- *sort, only the primary key of the inner is added to the result.
59- *Mergejoins only join on the primary key. Currently, non-primary keys
60- *in the pathkeys List are of limited value.
62+ *We can actually keep all of the keys of the outer path of a merge or
63+ *nestloop join, since the ordering of the outer path will be reflected
64+ *in the result. We add to each pathkey sublist any inner vars that are
65+ *equijoined to any of the outer vars in the sublist. In the nestloop
66+ *case we have to be careful to consider only equijoin operators; the
67+ *nestloop's join clauses might include non-equijoin operators.
68+ *(Currently, we do this by considering only mergejoinable operators while
69+ *making the pathkeys, since we have no separate marking for operators that
70+ *are equijoins but aren't mergejoinable.)
6171 *
6272 *Although Hashjoins also work only with equijoin operators, it is *not*
6373 *safe to consider the output of a Hashjoin to be sorted in any particular
6474 *order --- not even the outer path's order. This is true because the
65- *executor might have to split the join into multiple batches.
75+ *executor might have to split the join into multiple batches. Therefore
76+ *a Hashjoin is always given NIL pathkeys.
6677 *
67- *NestJoin does not perform sorting, and allows non-equijoins, so it does
68- *not allow useful pathkeys.(But couldn't we use the outer path's order?)
78+ *Notice that pathkeys only say *what* is being ordered, and not *how*
79+ *it is ordered. The actual sort ordering is indicated by a separate
80+ *data structure, the PathOrder. The PathOrder provides a sort operator
81+ *OID for each of the sublists of the path key. This is fairly bogus,
82+ *since in cross-datatype cases we really want to keep track of more than
83+ *one sort operator...
6984 *
70- *-- bjm
85+ *-- bjm & tgl
7186 *--------------------
7287 */
7388
@@ -328,17 +343,32 @@ make_pathkeys_from_joinkeys(List *joinkeys,
328343
329344/*
330345 * new_join_pathkeys
331- * Find the path keys for a join relation by finding all vars in the list
332- * of join clauses 'joinclauses' such that:
333- *(1) the var corresponding to the outer join relation is a
334- *key on the outer path
335- *(2) the var appears in the target list of the join relation
336- * In other words, add to each outer path key the inner path keys that
337- * are required for qualification.
346+ * Build the path keys for a join relation constructed by mergejoin or
347+ * nestloop join. These keys should include all the path key vars of the
348+ * outer path (since the join will retain the ordering of the outer path)
349+ * plus any vars of the inner path that are mergejoined to the outer vars.
350+ *
351+ * Per the discussion at the top of this file, mergejoined inner vars
352+ * can be considered path keys of the result, just the same as the outer
353+ * vars they were joined with.
354+ *
355+ * We can also use inner path vars as pathkeys of a nestloop join, but we
356+ * must be careful that we only consider equijoin clauses and not general
357+ * join clauses. For example, "t1.a < t2.b" might be a join clause of a
358+ * nestloop, but it doesn't result in b acquiring the ordering of a!
359+ * joinpath.c handles that problem by only passing this routine clauses
360+ * that are marked mergejoinable, even if a nestloop join is being built.
361+ * Therefore we only have 't1.a = t2.b' style clauses, and can expect that
362+ * the inner var will acquire the outer's ordering no matter which join
363+ * method is actually used.
364+ *
365+ * All vars in the result are copied from the join relation's tlist, not from
366+ * the given pathkeys or the join clauses. (Is that necessary? I suspect
367+ * not --- tgl)
338368 *
339369 * 'outer_pathkeys' is the list of the outer path's path keys
340370 * 'join_rel_tlist' is the target list of the join relation
341- * 'joinclauses' is the list ofrestricting join clauses
371+ * 'joinclauses' is the list ofmergejoinable join clauses
342372 *
343373 * Returns the list of new path keys.
344374 *
@@ -358,8 +388,13 @@ new_join_pathkeys(List *outer_pathkeys,
358388
359389new_pathkey = new_join_pathkey (outer_pathkey ,join_rel_tlist ,
360390joinclauses );
361- if (new_pathkey != NIL )
362- final_pathkeys = lappend (final_pathkeys ,new_pathkey );
391+ /* if we can find no sortable vars for the n'th sort key,
392+ * then we're done generating pathkeys; can't expect to order
393+ * subsequent vars. Not clear that this can really happen.
394+ */
395+ if (new_pathkey == NIL )
396+ break ;
397+ final_pathkeys = lappend (final_pathkeys ,new_pathkey );
363398}
364399return final_pathkeys ;
365400}
@@ -372,7 +407,7 @@ new_join_pathkeys(List *outer_pathkeys,
372407 * at the top of this file).
373408 *
374409 * Note that each returned pathkey is the var node found in
375- * 'join_rel_tlist' rather than the joinclause var node.
410+ * 'join_rel_tlist' rather than theinput pathkey or joinclause var node.
376411 * (Is this important?)Also, we return a fully copied list
377412 * that does not share any subnodes with existing data structures.
378413 * (Is that important, either?)