Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit82e1ba7

Browse files
committed
Make ANALYZE compute basic statistics even for types with no "=" operator.
Previously, ANALYZE simply ignored columns of datatypes that have neithera btree nor hash opclass (which means they have no recognized equalityoperator). Without a notion of equality, we can't identify most-commonvalues nor estimate the number of distinct values. But we can stillcount nulls and compute the average physical column width, and thosestats might be of value. Moreover there are some tools out there thatdon't work so well if rows are missing from pg_statistic. So let'sadd suitable logic for this case.While this is arguably a bug fix, it also has the potential to changequery plans, and the gain seems not worth taking a risk of that instable branches. So back-patch into 9.5 but not further.Oleksandr Shulgin, rewritten a bit by me.
1 parenta0d9f6e commit82e1ba7

File tree

1 file changed

+104
-14
lines changed

1 file changed

+104
-14
lines changed

‎src/backend/commands/analyze.c

Lines changed: 104 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1689,10 +1689,14 @@ typedef struct
16891689
}CompareScalarsContext;
16901690

16911691

1692-
staticvoidcompute_minimal_stats(VacAttrStatsPstats,
1692+
staticvoidcompute_trivial_stats(VacAttrStatsPstats,
16931693
AnalyzeAttrFetchFuncfetchfunc,
16941694
intsamplerows,
16951695
doubletotalrows);
1696+
staticvoidcompute_distinct_stats(VacAttrStatsPstats,
1697+
AnalyzeAttrFetchFuncfetchfunc,
1698+
intsamplerows,
1699+
doubletotalrows);
16961700
staticvoidcompute_scalar_stats(VacAttrStatsPstats,
16971701
AnalyzeAttrFetchFuncfetchfunc,
16981702
intsamplerows,
@@ -1723,21 +1727,17 @@ std_typanalyze(VacAttrStats *stats)
17231727
&ltopr,&eqopr,NULL,
17241728
NULL);
17251729

1726-
/* If column has no "=" operator, we can't do much of anything */
1727-
if (!OidIsValid(eqopr))
1728-
return false;
1729-
17301730
/* Save the operator info for compute_stats routines */
17311731
mystats= (StdAnalyzeData*)palloc(sizeof(StdAnalyzeData));
17321732
mystats->eqopr=eqopr;
1733-
mystats->eqfunc=get_opcode(eqopr);
1733+
mystats->eqfunc=OidIsValid(eqopr) ?get_opcode(eqopr) :InvalidOid;
17341734
mystats->ltopr=ltopr;
17351735
stats->extra_data=mystats;
17361736

17371737
/*
17381738
* Determine which standard statistics algorithm to use
17391739
*/
1740-
if (OidIsValid(ltopr))
1740+
if (OidIsValid(eqopr)&&OidIsValid(ltopr))
17411741
{
17421742
/* Seems to be a scalar datatype */
17431743
stats->compute_stats=compute_scalar_stats;
@@ -1762,19 +1762,109 @@ std_typanalyze(VacAttrStats *stats)
17621762
*/
17631763
stats->minrows=300*attr->attstattarget;
17641764
}
1765+
elseif (OidIsValid(eqopr))
1766+
{
1767+
/* We can still recognize distinct values */
1768+
stats->compute_stats=compute_distinct_stats;
1769+
/* Might as well use the same minrows as above */
1770+
stats->minrows=300*attr->attstattarget;
1771+
}
17651772
else
17661773
{
1767-
/* Can't do much but theminimal stuff */
1768-
stats->compute_stats=compute_minimal_stats;
1774+
/* Can't do much but thetrivial stuff */
1775+
stats->compute_stats=compute_trivial_stats;
17691776
/* Might as well use the same minrows as above */
17701777
stats->minrows=300*attr->attstattarget;
17711778
}
17721779

17731780
return true;
17741781
}
17751782

1783+
1784+
/*
1785+
*compute_trivial_stats() -- compute very basic column statistics
1786+
*
1787+
*We use this when we cannot find a hash "=" operator for the datatype.
1788+
*
1789+
*We determine the fraction of non-null rows and the average datum width.
1790+
*/
1791+
staticvoid
1792+
compute_trivial_stats(VacAttrStatsPstats,
1793+
AnalyzeAttrFetchFuncfetchfunc,
1794+
intsamplerows,
1795+
doubletotalrows)
1796+
{
1797+
inti;
1798+
intnull_cnt=0;
1799+
intnonnull_cnt=0;
1800+
doubletotal_width=0;
1801+
boolis_varlena= (!stats->attrtype->typbyval&&
1802+
stats->attrtype->typlen==-1);
1803+
boolis_varwidth= (!stats->attrtype->typbyval&&
1804+
stats->attrtype->typlen<0);
1805+
1806+
for (i=0;i<samplerows;i++)
1807+
{
1808+
Datumvalue;
1809+
boolisnull;
1810+
1811+
vacuum_delay_point();
1812+
1813+
value=fetchfunc(stats,i,&isnull);
1814+
1815+
/* Check for null/nonnull */
1816+
if (isnull)
1817+
{
1818+
null_cnt++;
1819+
continue;
1820+
}
1821+
nonnull_cnt++;
1822+
1823+
/*
1824+
* If it's a variable-width field, add up widths for average width
1825+
* calculation. Note that if the value is toasted, we use the toasted
1826+
* width. We don't bother with this calculation if it's a fixed-width
1827+
* type.
1828+
*/
1829+
if (is_varlena)
1830+
{
1831+
total_width+=VARSIZE_ANY(DatumGetPointer(value));
1832+
}
1833+
elseif (is_varwidth)
1834+
{
1835+
/* must be cstring */
1836+
total_width+=strlen(DatumGetCString(value))+1;
1837+
}
1838+
}
1839+
1840+
/* We can only compute average width if we found some non-null values. */
1841+
if (nonnull_cnt>0)
1842+
{
1843+
stats->stats_valid= true;
1844+
/* Do the simple null-frac and width stats */
1845+
stats->stanullfrac= (double)null_cnt / (double)samplerows;
1846+
if (is_varwidth)
1847+
stats->stawidth=total_width / (double)nonnull_cnt;
1848+
else
1849+
stats->stawidth=stats->attrtype->typlen;
1850+
stats->stadistinct=0.0;/* "unknown" */
1851+
}
1852+
elseif (null_cnt>0)
1853+
{
1854+
/* We found only nulls; assume the column is entirely null */
1855+
stats->stats_valid= true;
1856+
stats->stanullfrac=1.0;
1857+
if (is_varwidth)
1858+
stats->stawidth=0;/* "unknown" */
1859+
else
1860+
stats->stawidth=stats->attrtype->typlen;
1861+
stats->stadistinct=0.0;/* "unknown" */
1862+
}
1863+
}
1864+
1865+
17761866
/*
1777-
*compute_minimal_stats() -- computeminimalcolumn statistics
1867+
*compute_distinct_stats() -- compute column statistics including ndistinct
17781868
*
17791869
*We use this when we can find only an "=" operator for the datatype.
17801870
*
@@ -1789,10 +1879,10 @@ std_typanalyze(VacAttrStats *stats)
17891879
*depend mainly on the length of the list we are willing to keep.
17901880
*/
17911881
staticvoid
1792-
compute_minimal_stats(VacAttrStatsPstats,
1793-
AnalyzeAttrFetchFuncfetchfunc,
1794-
intsamplerows,
1795-
doubletotalrows)
1882+
compute_distinct_stats(VacAttrStatsPstats,
1883+
AnalyzeAttrFetchFuncfetchfunc,
1884+
intsamplerows,
1885+
doubletotalrows)
17961886
{
17971887
inti;
17981888
intnull_cnt=0;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp