77 *
88 *
99 * IDENTIFICATION
10- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.8 1999/04/30 03:59:06 tgl Exp $
10+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.9 1999/05/17 00:26:33 tgl Exp $
1111 *
1212 *-------------------------------------------------------------------------
1313 */
@@ -29,42 +29,51 @@ static int match_pathkey_joinkeys(List *pathkey, List *joinkeys,
2929int outer_or_inner );
3030static List * new_join_pathkey (List * pathkeys ,List * join_rel_tlist ,
3131List * joinclauses );
32- static List * get_joinvars_for_var (Var * pathkey ,List * * considered_pathkeys ,
33- List * join_rel_tlist ,List * joinclauses );
3432
3533
36- /*
34+ /*--------------------
3735 *Explanation of Path.pathkeys
3836 *
3937 *Path.pathkeys is a List of List of Var nodes that represent the sort
4038 *order of the result generated by the Path.
4139 *
4240 *In single/base relation RelOptInfo's, the Path's represent various ways
43- *of generate the relation. Sequential scan Paths have a NIL pathkeys.
44- *Index scans have Path.pathkeys that represent the chosen index. A
45- *single-key index pathkeys would be { {tab1_indexkey1} }. The pathkeys
46- *entry for a multi-key index would be { {tab1_indexkey1}, {tab1_indexkey2} }.
41+ *of generating the relation and the resulting ordering of the tuples.
42+ *Sequential scan Paths have NIL pathkeys, indicating no known ordering.
43+ *Index scans have Path.pathkeys that represent the chosen index.
44+ *A single-key index pathkeys would be { {tab1_indexkey1} }. For a
45+ *multi-key index pathkeys would be { {tab1_indexkey1}, {tab1_indexkey2} },
46+ *indicating major sort by indexkey1 and minor sort by indexkey2.
4747 *
4848 *Multi-relation RelOptInfo Path's are more complicated. Mergejoins are
49- *only performed withequajoins ("="). Because of this, the multi-relation
49+ *only performed withequijoins ("="). Because of this, the multi-relation
5050 *path actually has more than one primary Var key. For example, a
5151 *mergejoin Path of "tab1.col1 = tab2.col1" would generate a pathkeys of
52- *{ {tab1.col1, tab2.col1} }. This allows future joins to use either Var
53- *as a pre-sorted key to prevent Mergejoins from having to re-sort the Path.
54- *They are equal, so they are both primary sort keys. This is why pathkeys
55- *is a List of Lists.
52+ *{ {tab1.col1, tab2.col1} }, indicating that the major sort order of the
53+ *Path can be taken to be *either* tab1.col1 or tab2.col1.
54+ *They are equal, so they are both primary sort keys. This allows future
55+ *joins to use either Var as a pre-sorted key to prevent upper Mergejoins
56+ *from having to re-sort the Path. This is why pathkeys is a List of Lists.
57+ *
58+ *Note that while the order of the top list is meaningful (primary vs.
59+ *secondary sort key), the order of each sublist is arbitrary.
5660 *
5761 *For multi-key sorts, if the outer is sorted by a multi-key index, the
5862 *multi-key index remains after the join. If the inner has a multi-key
5963 *sort, only the primary key of the inner is added to the result.
6064 *Mergejoins only join on the primary key. Currently, non-primary keys
6165 *in the pathkeys List are of limited value.
6266 *
63- *HashJoin has similar functionality. NestJoin does not perform sorting,
64- *and allows non-equajoins, so it does not allow useful pathkeys.
67+ *Although Hashjoins also work only with equijoin operators, it is *not*
68+ *safe to consider the output of a Hashjoin to be sorted in any particular
69+ *order --- not even the outer path's order. This is true because the
70+ *executor might have to split the join into multiple batches.
71+ *
72+ *NestJoin does not perform sorting, and allows non-equijoins, so it does
73+ *not allow useful pathkeys. (But couldn't we use the outer path's order?)
6574 *
6675 *-- bjm
67- *
76+ *--------------------
6877 */
6978
7079/****************************************************************************
@@ -228,7 +237,7 @@ get_cheapest_path_for_joinkeys(List *joinkeys,
228237int outer_or_inner )
229238{
230239Path * matched_path = NULL ;
231- List * i = NIL ;
240+ List * i ;
232241
233242foreach (i ,paths )
234243{
@@ -337,16 +346,16 @@ new_join_pathkeys(List *outer_pathkeys,
337346List * join_rel_tlist ,
338347List * joinclauses )
339348{
340- List * outer_pathkey = NIL ;
341349List * final_pathkeys = NIL ;
342- List * new_pathkey ;
343- List * i = NIL ;
350+ List * i ;
344351
345352foreach (i ,outer_pathkeys )
346353{
347- outer_pathkey = lfirst (i );
354+ List * outer_pathkey = lfirst (i );
355+ List * new_pathkey ;
356+
348357new_pathkey = new_join_pathkey (outer_pathkey ,join_rel_tlist ,
349- joinclauses );
358+ joinclauses );
350359if (new_pathkey != NIL )
351360final_pathkeys = lappend (final_pathkeys ,new_pathkey );
352361}
@@ -355,104 +364,52 @@ new_join_pathkeys(List *outer_pathkeys,
355364
356365/*
357366 * new_join_pathkey
358- * Finds new vars that become pathkeys due to qualification clauses that
359- * contain any previously considered pathkeys. These new pathkeys plus the
360- * pathkeys from 'pathkeys' form a new pathkey for the join relation.
367+ * Generate an individual pathkey sublist, consisting of the outer vars
368+ * already mentioned in 'pathkey' plus any inner vars that are joined to
369+ * them (and thus can now also be considered path keys, per discussion
370+ * at the top of this file).
361371 *
362372 * Note that each returned pathkey is the var node found in
363373 * 'join_rel_tlist' rather than the joinclause var node.
374+ * (Is this important?) Also, we return a fully copied list
375+ * that does not share any subnodes with existing data structures.
376+ * (Is that important, either?)
364377 *
365- * 'pathkeys' is a list of pathkeys for which matching pathkeys are to be
366- *found
367- * 'considered_pathkeys' is the current list of all pathkeys corresponding
368- *to a given pathkey
369- *
370- * Returns a new pathkey(list of pathkeys).
378+ * Returns a new pathkey (list of pathkey variables).
371379 *
372380 */
373381static List *
374382new_join_pathkey (List * pathkey ,
375383List * join_rel_tlist ,
376384List * joinclauses )
377385{
378- List * final_pathkey = NIL ;
379- List * i = NIL ;
380- List * considered_pathkeys = NIL ;
386+ List * new_pathkey = NIL ;
387+ List * i ,
388+ * j ;
381389
382390foreach (i ,pathkey )
383391{
384392Var * key = (Var * )lfirst (i );
385- List * joined_keys ;
386393Expr * tlist_key ;
387394
388395Assert (key );
389- joined_keys = get_joinvars_for_var (key ,& considered_pathkeys ,
390- join_rel_tlist ,joinclauses );
391- if (joined_keys )
392- {
393- considered_pathkeys = nconc (considered_pathkeys ,joined_keys );
394- final_pathkey = nconc (final_pathkey ,joined_keys );
395- }
396396
397397tlist_key = matching_tlist_var (key ,join_rel_tlist );
398- if (tlist_key && !member (tlist_key ,considered_pathkeys ))
399- {
400- /*
401- *If pathkey is in the target list, and not considered,
402- *add it
403- */
404- considered_pathkeys = lcons (tlist_key ,considered_pathkeys );
405- final_pathkey = lcons (tlist_key ,final_pathkey );
406- }
407- }
408- return copyObject (final_pathkey );
409- }
398+ if (tlist_key && !member (tlist_key ,new_pathkey ))
399+ new_pathkey = lcons (copyObject (tlist_key ),new_pathkey );
410400
411- /*
412- * get_joinvars_for_var
413- * Returns a list of new pathkeys:
414- * (1) which are not listed in 'considered_pathkeys'
415- * (2) for which the "other" variable in some clause in 'joinclauses' is
416- * 'pathkey'
417- * (3) which are mentioned in 'join_rel_tlist'
418- *
419- * Note that each returned pathkey is the var node found in
420- * 'join_rel_tlist' rather than the joinclause var node.
421- *
422- * 'pathkey' is the var node for which we are trying to find matching
423- *clauses
424- *
425- * Returns a list of new pathkeys.
426- *
427- */
428- static List *
429- get_joinvars_for_var (Var * key ,
430- List * * considered_pathkeys ,
431- List * join_rel_tlist ,
432- List * joinclauses )
433- {
434- List * final_pathkey = NIL ;
435- Expr * joinclause ;
436- List * i ;
437- Expr * tlist_other_var ;
438-
439- foreach (i ,joinclauses )
440- {
441- joinclause = lfirst (i );
401+ foreach (j ,joinclauses )
402+ {
403+ Expr * joinclause = lfirst (j );
404+ Expr * tlist_other_var ;
442405
443- tlist_other_var = matching_tlist_var (
406+ tlist_other_var = matching_tlist_var (
444407other_join_clause_var (key ,joinclause ),
445408join_rel_tlist );
446- if (tlist_other_var &&
447- !member (tlist_other_var ,* considered_pathkeys ))
448- {
449- /*
450- *The key has a join variable that is in the target list,
451- *and has not been considered.
452- */
453- * considered_pathkeys = lcons (tlist_other_var ,* considered_pathkeys );
454- final_pathkey = lcons (tlist_other_var ,final_pathkey );
409+ if (tlist_other_var && !member (tlist_other_var ,new_pathkey ))
410+ new_pathkey = lcons (copyObject (tlist_other_var ),new_pathkey );
455411}
456412}
457- return final_pathkey ;
413+
414+ return new_pathkey ;
458415}