Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit8004953

Browse files
committed
Speed up ruleutils' name de-duplication code, and fix overlength-name case.
Since commit11e1318, ruleutils.c hasattempted to ensure that each RTE in a query or plan tree has a uniquealias name. However, the code that was added for this could be quite slow,even as bad as O(N^3) if N identical RTE names must be replaced, as notedby Jeff Janes. Improve matters by building a transient hash table withinset_rtable_names. The hash table in itself reduces the cost of detecting aduplicate from O(N) to O(1), and we can save another factor of N by storingthe number of de-duplicated names already created for each entry, so thatwe don't have to re-try names already created. This way is probably a bitslower overall for small range tables, but almost by definition, such casesshould not be a performance problem.In principle the same problem applies to the column-name-de-duplicationcode; but in practice that seems to be less of a problem, first becauseN is limited since we don't support extremely wide tables, and secondbecause duplicate column names within an RTE are fairly rare, so that inpractice the cost is more like O(N^2) not O(N^3). It would be very muchmessier to fix the column-name code, so for now I've left that alone.An independent problem in the same area was that the de-duplication codepaid no attention to the identifier length limit, and would happily produceidentifiers that were longer than NAMEDATALEN and wouldn't be unique aftertruncation to NAMEDATALEN. This could result in dump/reload failures, orperhaps even views that silently behaved differently than before. We canfix that by shortening the base name as needed. Fix it for both therelation and column name cases.In passing, check for interrupts in set_rtable_names, just in case it'sstill slow enough to be an issue.Back-patch to 9.3 where this code was introduced.
1 parent179c97b commit8004953

File tree

3 files changed

+161
-46
lines changed

3 files changed

+161
-46
lines changed

‎src/backend/utils/adt/ruleutils.c

Lines changed: 125 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include"commands/tablespace.h"
3939
#include"executor/spi.h"
4040
#include"funcapi.h"
41+
#include"mb/pg_wchar.h"
4142
#include"miscadmin.h"
4243
#include"nodes/makefuncs.h"
4344
#include"nodes/nodeFuncs.h"
@@ -55,6 +56,7 @@
5556
#include"utils/array.h"
5657
#include"utils/builtins.h"
5758
#include"utils/fmgroids.h"
59+
#include"utils/hsearch.h"
5860
#include"utils/lsyscache.h"
5961
#include"utils/rel.h"
6062
#include"utils/ruleutils.h"
@@ -267,6 +269,15 @@ typedef struct
267269
#definedeparse_columns_fetch(rangetable_index,dpns) \
268270
((deparse_columns *) list_nth((dpns)->rtable_columns, (rangetable_index)-1))
269271

272+
/*
273+
* Entry in set_rtable_names' hash table
274+
*/
275+
typedefstruct
276+
{
277+
charname[NAMEDATALEN];/* Hash key --- must be first */
278+
intcounter;/* Largest addition used so far for name */
279+
}NameHashEntry;
280+
270281

271282
/* ----------
272283
* Global data
@@ -312,8 +323,6 @@ static void print_function_rettype(StringInfo buf, HeapTuple proctup);
312323
staticvoidprint_function_trftypes(StringInfobuf,HeapTupleproctup);
313324
staticvoidset_rtable_names(deparse_namespace*dpns,List*parent_namespaces,
314325
Bitmapset*rels_used);
315-
staticboolrefname_is_unique(char*refname,deparse_namespace*dpns,
316-
List*parent_namespaces);
317326
staticvoidset_deparse_for_query(deparse_namespace*dpns,Query*query,
318327
List*parent_namespaces);
319328
staticvoidset_simple_column_names(deparse_namespace*dpns);
@@ -2676,15 +2685,61 @@ static void
26762685
set_rtable_names(deparse_namespace*dpns,List*parent_namespaces,
26772686
Bitmapset*rels_used)
26782687
{
2688+
HASHCTLhash_ctl;
2689+
HTAB*names_hash;
2690+
NameHashEntry*hentry;
2691+
boolfound;
2692+
intrtindex;
26792693
ListCell*lc;
2680-
intrtindex=1;
26812694

26822695
dpns->rtable_names=NIL;
2696+
/* nothing more to do if empty rtable */
2697+
if (dpns->rtable==NIL)
2698+
return;
2699+
2700+
/*
2701+
* We use a hash table to hold known names, so that this process is O(N)
2702+
* not O(N^2) for N names.
2703+
*/
2704+
MemSet(&hash_ctl,0,sizeof(hash_ctl));
2705+
hash_ctl.keysize=NAMEDATALEN;
2706+
hash_ctl.entrysize=sizeof(NameHashEntry);
2707+
hash_ctl.hcxt=CurrentMemoryContext;
2708+
names_hash=hash_create("set_rtable_names names",
2709+
list_length(dpns->rtable),
2710+
&hash_ctl,
2711+
HASH_ELEM |HASH_CONTEXT);
2712+
/* Preload the hash table with names appearing in parent_namespaces */
2713+
foreach(lc,parent_namespaces)
2714+
{
2715+
deparse_namespace*olddpns= (deparse_namespace*)lfirst(lc);
2716+
ListCell*lc2;
2717+
2718+
foreach(lc2,olddpns->rtable_names)
2719+
{
2720+
char*oldname= (char*)lfirst(lc2);
2721+
2722+
if (oldname==NULL)
2723+
continue;
2724+
hentry= (NameHashEntry*)hash_search(names_hash,
2725+
oldname,
2726+
HASH_ENTER,
2727+
&found);
2728+
/* we do not complain about duplicate names in parent namespaces */
2729+
hentry->counter=0;
2730+
}
2731+
}
2732+
2733+
/* Now we can scan the rtable */
2734+
rtindex=1;
26832735
foreach(lc,dpns->rtable)
26842736
{
26852737
RangeTblEntry*rte= (RangeTblEntry*)lfirst(lc);
26862738
char*refname;
26872739

2740+
/* Just in case this takes an unreasonable amount of time ... */
2741+
CHECK_FOR_INTERRUPTS();
2742+
26882743
if (rels_used&& !bms_is_member(rtindex,rels_used))
26892744
{
26902745
/* Ignore unreferenced RTE */
@@ -2712,56 +2767,62 @@ set_rtable_names(deparse_namespace *dpns, List *parent_namespaces,
27122767
}
27132768

27142769
/*
2715-
* If the selected name isn't unique, append digits to make it so
2770+
* If the selected name isn't unique, append digits to make it so, and
2771+
* make a new hash entry for it once we've got a unique name. For a
2772+
* very long input name, we might have to truncate to stay within
2773+
* NAMEDATALEN.
27162774
*/
2717-
if (refname&&
2718-
!refname_is_unique(refname,dpns,parent_namespaces))
2775+
if (refname)
27192776
{
2720-
char*modname= (char*)palloc(strlen(refname)+32);
2721-
inti=0;
2777+
hentry= (NameHashEntry*)hash_search(names_hash,
2778+
refname,
2779+
HASH_ENTER,
2780+
&found);
2781+
if (found)
2782+
{
2783+
/* Name already in use, must choose a new one */
2784+
intrefnamelen=strlen(refname);
2785+
char*modname= (char*)palloc(refnamelen+16);
2786+
NameHashEntry*hentry2;
27222787

2723-
do
2788+
do
2789+
{
2790+
hentry->counter++;
2791+
for (;;)
2792+
{
2793+
/*
2794+
* We avoid using %.*s here because it can misbehave
2795+
* if the data is not valid in what libc thinks is the
2796+
* prevailing encoding.
2797+
*/
2798+
memcpy(modname,refname,refnamelen);
2799+
sprintf(modname+refnamelen,"_%d",hentry->counter);
2800+
if (strlen(modname)<NAMEDATALEN)
2801+
break;
2802+
/* drop chars from refname to keep all the digits */
2803+
refnamelen=pg_mbcliplen(refname,refnamelen,
2804+
refnamelen-1);
2805+
}
2806+
hentry2= (NameHashEntry*)hash_search(names_hash,
2807+
modname,
2808+
HASH_ENTER,
2809+
&found);
2810+
}while (found);
2811+
hentry2->counter=0;/* init new hash entry */
2812+
refname=modname;
2813+
}
2814+
else
27242815
{
2725-
sprintf(modname,"%s_%d",refname,++i);
2726-
}while (!refname_is_unique(modname,dpns,parent_namespaces));
2727-
refname=modname;
2816+
/* Name not previously used, need only initialize hentry */
2817+
hentry->counter=0;
2818+
}
27282819
}
27292820

27302821
dpns->rtable_names=lappend(dpns->rtable_names,refname);
27312822
rtindex++;
27322823
}
2733-
}
2734-
2735-
/*
2736-
* refname_is_unique: is refname distinct from all already-chosen RTE names?
2737-
*/
2738-
staticbool
2739-
refname_is_unique(char*refname,deparse_namespace*dpns,
2740-
List*parent_namespaces)
2741-
{
2742-
ListCell*lc;
27432824

2744-
foreach(lc,dpns->rtable_names)
2745-
{
2746-
char*oldname= (char*)lfirst(lc);
2747-
2748-
if (oldname&&strcmp(oldname,refname)==0)
2749-
return false;
2750-
}
2751-
foreach(lc,parent_namespaces)
2752-
{
2753-
deparse_namespace*olddpns= (deparse_namespace*)lfirst(lc);
2754-
ListCell*lc2;
2755-
2756-
foreach(lc2,olddpns->rtable_names)
2757-
{
2758-
char*oldname= (char*)lfirst(lc2);
2759-
2760-
if (oldname&&strcmp(oldname,refname)==0)
2761-
return false;
2762-
}
2763-
}
2764-
return true;
2825+
hash_destroy(names_hash);
27652826
}
27662827

27672828
/*
@@ -3589,16 +3650,34 @@ make_colname_unique(char *colname, deparse_namespace *dpns,
35893650
deparse_columns*colinfo)
35903651
{
35913652
/*
3592-
* If the selected name isn't unique, append digits to make it so
3653+
* If the selected name isn't unique, append digits to make it so. For a
3654+
* very long input name, we might have to truncate to stay within
3655+
* NAMEDATALEN.
35933656
*/
35943657
if (!colname_is_unique(colname,dpns,colinfo))
35953658
{
3596-
char*modname= (char*)palloc(strlen(colname)+32);
3659+
intcolnamelen=strlen(colname);
3660+
char*modname= (char*)palloc(colnamelen+16);
35973661
inti=0;
35983662

35993663
do
36003664
{
3601-
sprintf(modname,"%s_%d",colname,++i);
3665+
i++;
3666+
for (;;)
3667+
{
3668+
/*
3669+
* We avoid using %.*s here because it can misbehave if the
3670+
* data is not valid in what libc thinks is the prevailing
3671+
* encoding.
3672+
*/
3673+
memcpy(modname,colname,colnamelen);
3674+
sprintf(modname+colnamelen,"_%d",i);
3675+
if (strlen(modname)<NAMEDATALEN)
3676+
break;
3677+
/* drop chars from colname to keep all the digits */
3678+
colnamelen=pg_mbcliplen(colname,colnamelen,
3679+
colnamelen-1);
3680+
}
36023681
}while (!colname_is_unique(modname,dpns,colinfo));
36033682
colname=modname;
36043683
}

‎src/test/regress/expected/create_view.out

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1475,6 +1475,33 @@ select * from int8_tbl i where i.* in (values(i.*::int8_tbl));
14751475
4567890123456789 | -4567890123456789
14761476
(5 rows)
14771477

1478+
-- check unique-ification of overlength names
1479+
create view tt18v as
1480+
select * from int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy
1481+
union all
1482+
select * from int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz;
1483+
NOTICE: identifier "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy" will be truncated to "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
1484+
NOTICE: identifier "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz" will be truncated to "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
1485+
select pg_get_viewdef('tt18v', true);
1486+
pg_get_viewdef
1487+
-----------------------------------------------------------------------------------
1488+
SELECT xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.q1, +
1489+
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.q2 +
1490+
FROM int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +
1491+
UNION ALL +
1492+
SELECT xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.q1, +
1493+
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.q2 +
1494+
FROM int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx;
1495+
(1 row)
1496+
1497+
explain (costs off) select * from tt18v;
1498+
QUERY PLAN
1499+
--------------------------------------------------------------------------------------------
1500+
Append
1501+
-> Seq Scan on int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
1502+
-> Seq Scan on int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx_1
1503+
(3 rows)
1504+
14781505
-- clean up all the random objects we made above
14791506
set client_min_messages = warning;
14801507
DROP SCHEMA temp_view_test CASCADE;

‎src/test/regress/sql/create_view.sql

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,15 @@ select * from tt17v;
487487
select pg_get_viewdef('tt17v', true);
488488
select*from int8_tbl iwhere i.*in (values(i.*::int8_tbl));
489489

490+
-- check unique-ification of overlength names
491+
492+
createviewtt18vas
493+
select*from int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy
494+
union all
495+
select*from int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz;
496+
select pg_get_viewdef('tt18v', true);
497+
explain (costs off)select*from tt18v;
498+
490499
-- clean up all the random objects we made above
491500
set client_min_messages= warning;
492501
DROPSCHEMA temp_view_test CASCADE;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp