Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit6cda3ad

Browse files
committed
Cause planner to make use of average-column-width statistic that is now
collected by ANALYZE. Also, add some modest amount of intelligence toguesses that are used for varlena columns in the absence of any ANALYZEstatistics. The 'width' reported by EXPLAIN is finally something lessthan totally bogus for varlena columns ... and, in consequence, hashjoinestimating should be a little better ...
1 parentb66a0cc commit6cda3ad

File tree

4 files changed

+124
-42
lines changed

4 files changed

+124
-42
lines changed

‎doc/src/sgml/perform.sgml

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
$Header: /cvsroot/pgsql/doc/src/sgml/perform.sgml,v 1.3 2001/03/24 23:03:26 petere Exp $
2+
$Header: /cvsroot/pgsql/doc/src/sgml/perform.sgml,v 1.4 2001/05/09 00:35:09 tgl Exp $
33
-->
44

55
<chapter id="performance-tips">
@@ -97,13 +97,6 @@ $Header: /cvsroot/pgsql/doc/src/sgml/perform.sgml,v 1.3 2001/03/24 23:03:26 pete
9797
by the query (again, without considering the effects of LIMIT).
9898
</para>
9999

100-
<para>
101-
Average width is pretty bogus because the thing really doesn't have
102-
any idea of the average length of variable-length columns. I'm thinking
103-
about improving that in the future, but it may not be worth the trouble,
104-
because the width isn't used for very much.
105-
</para>
106-
107100
<para>
108101
Here are some examples (using the regress test database after a
109102
vacuum analyze, and almost-7.0 sources):

‎src/backend/optimizer/path/costsize.c

Lines changed: 43 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
* Portions Copyright (c) 1994, Regents of the University of California
4242
*
4343
* IDENTIFICATION
44-
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.71 2001/05/07 00:43:20 tgl Exp $
44+
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.72 2001/05/09 00:35:09 tgl Exp $
4545
*
4646
*-------------------------------------------------------------------------
4747
*/
@@ -61,12 +61,6 @@
6161
#include"utils/syscache.h"
6262

6363

64-
/*
65-
* The length of a variable-length field in bytes (stupid estimate...)
66-
*/
67-
#define_DEFAULT_ATTRIBUTE_WIDTH_ 12
68-
69-
7064
#defineLOG2(x) (log(x) / 0.693147180559945)
7165
#defineLOG6(x) (log(x) / 1.79175946922805)
7266

@@ -90,7 +84,6 @@ boolenable_hashjoin = true;
9084

9185
staticboolcost_qual_eval_walker(Node*node,Cost*total);
9286
staticvoidset_rel_width(Query*root,RelOptInfo*rel);
93-
staticintcompute_attribute_width(TargetEntry*tlistentry);
9487
staticdoublerelation_byte_size(doubletuples,intwidth);
9588
staticdoublepage_size(doubletuples,intwidth);
9689

@@ -1082,36 +1075,54 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
10821075
/*
10831076
* set_rel_width
10841077
*Set the estimated output width of the relation.
1078+
*
1079+
* NB: this works best on base relations because it prefers to look at
1080+
* real Vars. It will fail to make use of pg_statistic info when applied
1081+
* to a subquery relation, even if the subquery outputs are simple vars
1082+
* that we could have gotten info for. Is it worth trying to be smarter
1083+
* about subqueries?
10851084
*/
10861085
staticvoid
10871086
set_rel_width(Query*root,RelOptInfo*rel)
10881087
{
1089-
inttuple_width=0;
1090-
List*tle;
1088+
int32tuple_width=0;
1089+
List*tllist;
10911090

1092-
foreach(tle,rel->targetlist)
1093-
tuple_width+=compute_attribute_width((TargetEntry*)lfirst(tle));
1094-
Assert(tuple_width >=0);
1095-
rel->width=tuple_width;
1096-
}
1091+
foreach(tllist,rel->targetlist)
1092+
{
1093+
TargetEntry*tle= (TargetEntry*)lfirst(tllist);
1094+
int32item_width;
10971095

1098-
/*
1099-
* compute_attribute_width
1100-
* Given a target list entry, find the size in bytes of the attribute.
1101-
*
1102-
* If a field is variable-length, we make a default assumption.Would be
1103-
* better if VACUUM recorded some stats about the average field width...
1104-
* also, we have access to the atttypmod, but fail to use it...
1105-
*/
1106-
staticint
1107-
compute_attribute_width(TargetEntry*tlistentry)
1108-
{
1109-
intwidth=get_typlen(tlistentry->resdom->restype);
1096+
/*
1097+
* If it's a Var, try to get statistical info from pg_statistic.
1098+
*/
1099+
if (tle->expr&&IsA(tle->expr,Var))
1100+
{
1101+
Var*var= (Var*)tle->expr;
1102+
Oidrelid;
11101103

1111-
if (width<0)
1112-
return_DEFAULT_ATTRIBUTE_WIDTH_;
1113-
else
1114-
returnwidth;
1104+
relid=getrelid(var->varno,root->rtable);
1105+
if (relid!=InvalidOid)
1106+
{
1107+
item_width=get_attavgwidth(relid,var->varattno);
1108+
if (item_width>0)
1109+
{
1110+
tuple_width+=item_width;
1111+
continue;
1112+
}
1113+
}
1114+
}
1115+
/*
1116+
* Not a Var, or can't find statistics for it. Estimate using
1117+
* just the type info.
1118+
*/
1119+
item_width=get_typavgwidth(tle->resdom->restype,
1120+
tle->resdom->restypmod);
1121+
Assert(item_width>0);
1122+
tuple_width+=item_width;
1123+
}
1124+
Assert(tuple_width >=0);
1125+
rel->width=tuple_width;
11151126
}
11161127

11171128
/*
@@ -1122,7 +1133,7 @@ compute_attribute_width(TargetEntry *tlistentry)
11221133
staticdouble
11231134
relation_byte_size(doubletuples,intwidth)
11241135
{
1125-
returntuples* ((double) (width+sizeof(HeapTupleData)));
1136+
returntuples* ((double)MAXALIGN(width+sizeof(HeapTupleData)));
11261137
}
11271138

11281139
/*

‎src/backend/utils/cache/lsyscache.c

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
99
* IDENTIFICATION
10-
* $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.53 2001/05/07 00:43:24 tgl Exp $
10+
* $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.54 2001/05/09 00:35:09 tgl Exp $
1111
*
1212
* NOTES
1313
* Eventually, the index information should go through here, too.
@@ -749,6 +749,56 @@ get_typdefault(Oid typid)
749749
returnreturnValue;
750750
}
751751

752+
/*
753+
* get_typavgwidth
754+
*
755+
* Given a type OID and a typmod value (pass -1 if typmod is unknown),
756+
* estimate the average width of values of the type. This is used by
757+
* the planner, which doesn't require absolutely correct results;
758+
* it's OK (and expected) to guess if we don't know for sure.
759+
*/
760+
int32
761+
get_typavgwidth(Oidtypid,int32typmod)
762+
{
763+
inttyplen=get_typlen(typid);
764+
int32maxwidth;
765+
766+
/*
767+
* Easy if it's a fixed-width type
768+
*/
769+
if (typlen>0)
770+
returntyplen;
771+
/*
772+
* type_maximum_size knows the encoding of typmod for some datatypes;
773+
* don't duplicate that knowledge here.
774+
*/
775+
maxwidth=type_maximum_size(typid,typmod);
776+
if (maxwidth>0)
777+
{
778+
/*
779+
* For BPCHAR, the max width is also the only width. Otherwise
780+
* we need to guess about the typical data width given the max.
781+
* A sliding scale for percentage of max width seems reasonable.
782+
*/
783+
if (typid==BPCHAROID)
784+
returnmaxwidth;
785+
if (maxwidth <=32)
786+
returnmaxwidth;/* assume full width */
787+
if (maxwidth<1000)
788+
return32+ (maxwidth-32) /2;/* assume 50% */
789+
/*
790+
* Beyond 1000, assume we're looking at something like
791+
* "varchar(10000)" where the limit isn't actually reached often,
792+
* and use a fixed estimate.
793+
*/
794+
return32+ (1000-32) /2;
795+
}
796+
/*
797+
* Ooops, we have no idea ... wild guess time.
798+
*/
799+
return32;
800+
}
801+
752802
/*
753803
* get_typtype
754804
*
@@ -782,6 +832,32 @@ get_typtype(Oid typid)
782832

783833
/*---------- STATISTICS CACHE ---------- */
784834

835+
/*
836+
* get_attavgwidth
837+
*
838+
* Given the table and attribute number of a column, get the average
839+
* width of entries in the column. Return zero if no data available.
840+
*/
841+
int32
842+
get_attavgwidth(Oidrelid,AttrNumberattnum)
843+
{
844+
HeapTupletp;
845+
846+
tp=SearchSysCache(STATRELATT,
847+
ObjectIdGetDatum(relid),
848+
Int16GetDatum(attnum),
849+
0,0);
850+
if (HeapTupleIsValid(tp))
851+
{
852+
int32stawidth= ((Form_pg_statistic)GETSTRUCT(tp))->stawidth;
853+
854+
ReleaseSysCache(tp);
855+
if (stawidth>0)
856+
returnstawidth;
857+
}
858+
return0;
859+
}
860+
785861
/*
786862
* get_attstatsslot
787863
*

‎src/include/utils/lsyscache.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
9-
* $Id: lsyscache.h,v 1.31 2001/05/07 00:43:26 tgl Exp $
9+
* $Id: lsyscache.h,v 1.32 2001/05/09 00:35:09 tgl Exp $
1010
*
1111
*-------------------------------------------------------------------------
1212
*/
@@ -39,6 +39,8 @@ extern bool get_typbyval(Oid typid);
3939
externvoidget_typlenbyval(Oidtypid,int16*typlen,bool*typbyval);
4040
externcharget_typstorage(Oidtypid);
4141
externDatumget_typdefault(Oidtypid);
42+
externint32get_typavgwidth(Oidtypid,int32typmod);
43+
externint32get_attavgwidth(Oidrelid,AttrNumberattnum);
4244
externboolget_attstatsslot(HeapTuplestatstuple,
4345
Oidatttype,int32atttypmod,
4446
intreqkind,Oidreqop,

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp