Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitfaf18a9

Browse files
committed
Speed up ruleutils' name de-duplication code, and fix overlength-name case.
Since commit11e1318, ruleutils.c hasattempted to ensure that each RTE in a query or plan tree has a uniquealias name. However, the code that was added for this could be quite slow,even as bad as O(N^3) if N identical RTE names must be replaced, as notedby Jeff Janes. Improve matters by building a transient hash table withinset_rtable_names. The hash table in itself reduces the cost of detecting aduplicate from O(N) to O(1), and we can save another factor of N by storingthe number of de-duplicated names already created for each entry, so thatwe don't have to re-try names already created. This way is probably a bitslower overall for small range tables, but almost by definition, such casesshould not be a performance problem.In principle the same problem applies to the column-name-de-duplicationcode; but in practice that seems to be less of a problem, first becauseN is limited since we don't support extremely wide tables, and secondbecause duplicate column names within an RTE are fairly rare, so that inpractice the cost is more like O(N^2) not O(N^3). It would be very muchmessier to fix the column-name code, so for now I've left that alone.An independent problem in the same area was that the de-duplication codepaid no attention to the identifier length limit, and would happily produceidentifiers that were longer than NAMEDATALEN and wouldn't be unique aftertruncation to NAMEDATALEN. This could result in dump/reload failures, orperhaps even views that silently behaved differently than before. We canfix that by shortening the base name as needed. Fix it for both therelation and column name cases.In passing, check for interrupts in set_rtable_names, just in case it'sstill slow enough to be an issue.Back-patch to 9.3 where this code was introduced.
1 parent7d0e872 commitfaf18a9

File tree

3 files changed

+161
-46
lines changed

3 files changed

+161
-46
lines changed

‎src/backend/utils/adt/ruleutils.c

Lines changed: 125 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include"commands/tablespace.h"
3737
#include"executor/spi.h"
3838
#include"funcapi.h"
39+
#include"mb/pg_wchar.h"
3940
#include"miscadmin.h"
4041
#include"nodes/makefuncs.h"
4142
#include"nodes/nodeFuncs.h"
@@ -52,6 +53,7 @@
5253
#include"utils/array.h"
5354
#include"utils/builtins.h"
5455
#include"utils/fmgroids.h"
56+
#include"utils/hsearch.h"
5557
#include"utils/lsyscache.h"
5658
#include"utils/rel.h"
5759
#include"utils/syscache.h"
@@ -260,6 +262,15 @@ typedef struct
260262
#definedeparse_columns_fetch(rangetable_index,dpns) \
261263
((deparse_columns *) list_nth((dpns)->rtable_columns, (rangetable_index)-1))
262264

265+
/*
266+
* Entry in set_rtable_names' hash table
267+
*/
268+
typedefstruct
269+
{
270+
charname[NAMEDATALEN];/* Hash key --- must be first */
271+
intcounter;/* Largest addition used so far for name */
272+
}NameHashEntry;
273+
263274

264275
/* ----------
265276
* Global data
@@ -304,8 +315,6 @@ static int print_function_arguments(StringInfo buf, HeapTuple proctup,
304315
staticvoidprint_function_rettype(StringInfobuf,HeapTupleproctup);
305316
staticvoidset_rtable_names(deparse_namespace*dpns,List*parent_namespaces,
306317
Bitmapset*rels_used);
307-
staticboolrefname_is_unique(char*refname,deparse_namespace*dpns,
308-
List*parent_namespaces);
309318
staticvoidset_deparse_for_query(deparse_namespace*dpns,Query*query,
310319
List*parent_namespaces);
311320
staticvoidset_simple_column_names(deparse_namespace*dpns);
@@ -2491,15 +2500,61 @@ static void
24912500
set_rtable_names(deparse_namespace*dpns,List*parent_namespaces,
24922501
Bitmapset*rels_used)
24932502
{
2503+
HASHCTLhash_ctl;
2504+
HTAB*names_hash;
2505+
NameHashEntry*hentry;
2506+
boolfound;
2507+
intrtindex;
24942508
ListCell*lc;
2495-
intrtindex=1;
24962509

24972510
dpns->rtable_names=NIL;
2511+
/* nothing more to do if empty rtable */
2512+
if (dpns->rtable==NIL)
2513+
return;
2514+
2515+
/*
2516+
* We use a hash table to hold known names, so that this process is O(N)
2517+
* not O(N^2) for N names.
2518+
*/
2519+
MemSet(&hash_ctl,0,sizeof(hash_ctl));
2520+
hash_ctl.keysize=NAMEDATALEN;
2521+
hash_ctl.entrysize=sizeof(NameHashEntry);
2522+
hash_ctl.hcxt=CurrentMemoryContext;
2523+
names_hash=hash_create("set_rtable_names names",
2524+
list_length(dpns->rtable),
2525+
&hash_ctl,
2526+
HASH_ELEM |HASH_CONTEXT);
2527+
/* Preload the hash table with names appearing in parent_namespaces */
2528+
foreach(lc,parent_namespaces)
2529+
{
2530+
deparse_namespace*olddpns= (deparse_namespace*)lfirst(lc);
2531+
ListCell*lc2;
2532+
2533+
foreach(lc2,olddpns->rtable_names)
2534+
{
2535+
char*oldname= (char*)lfirst(lc2);
2536+
2537+
if (oldname==NULL)
2538+
continue;
2539+
hentry= (NameHashEntry*)hash_search(names_hash,
2540+
oldname,
2541+
HASH_ENTER,
2542+
&found);
2543+
/* we do not complain about duplicate names in parent namespaces */
2544+
hentry->counter=0;
2545+
}
2546+
}
2547+
2548+
/* Now we can scan the rtable */
2549+
rtindex=1;
24982550
foreach(lc,dpns->rtable)
24992551
{
25002552
RangeTblEntry*rte= (RangeTblEntry*)lfirst(lc);
25012553
char*refname;
25022554

2555+
/* Just in case this takes an unreasonable amount of time ... */
2556+
CHECK_FOR_INTERRUPTS();
2557+
25032558
if (rels_used&& !bms_is_member(rtindex,rels_used))
25042559
{
25052560
/* Ignore unreferenced RTE */
@@ -2527,56 +2582,62 @@ set_rtable_names(deparse_namespace *dpns, List *parent_namespaces,
25272582
}
25282583

25292584
/*
2530-
* If the selected name isn't unique, append digits to make it so
2585+
* If the selected name isn't unique, append digits to make it so, and
2586+
* make a new hash entry for it once we've got a unique name. For a
2587+
* very long input name, we might have to truncate to stay within
2588+
* NAMEDATALEN.
25312589
*/
2532-
if (refname&&
2533-
!refname_is_unique(refname,dpns,parent_namespaces))
2590+
if (refname)
25342591
{
2535-
char*modname= (char*)palloc(strlen(refname)+32);
2536-
inti=0;
2592+
hentry= (NameHashEntry*)hash_search(names_hash,
2593+
refname,
2594+
HASH_ENTER,
2595+
&found);
2596+
if (found)
2597+
{
2598+
/* Name already in use, must choose a new one */
2599+
intrefnamelen=strlen(refname);
2600+
char*modname= (char*)palloc(refnamelen+16);
2601+
NameHashEntry*hentry2;
25372602

2538-
do
2603+
do
2604+
{
2605+
hentry->counter++;
2606+
for (;;)
2607+
{
2608+
/*
2609+
* We avoid using %.*s here because it can misbehave
2610+
* if the data is not valid in what libc thinks is the
2611+
* prevailing encoding.
2612+
*/
2613+
memcpy(modname,refname,refnamelen);
2614+
sprintf(modname+refnamelen,"_%d",hentry->counter);
2615+
if (strlen(modname)<NAMEDATALEN)
2616+
break;
2617+
/* drop chars from refname to keep all the digits */
2618+
refnamelen=pg_mbcliplen(refname,refnamelen,
2619+
refnamelen-1);
2620+
}
2621+
hentry2= (NameHashEntry*)hash_search(names_hash,
2622+
modname,
2623+
HASH_ENTER,
2624+
&found);
2625+
}while (found);
2626+
hentry2->counter=0;/* init new hash entry */
2627+
refname=modname;
2628+
}
2629+
else
25392630
{
2540-
sprintf(modname,"%s_%d",refname,++i);
2541-
}while (!refname_is_unique(modname,dpns,parent_namespaces));
2542-
refname=modname;
2631+
/* Name not previously used, need only initialize hentry */
2632+
hentry->counter=0;
2633+
}
25432634
}
25442635

25452636
dpns->rtable_names=lappend(dpns->rtable_names,refname);
25462637
rtindex++;
25472638
}
2548-
}
2549-
2550-
/*
2551-
* refname_is_unique: is refname distinct from all already-chosen RTE names?
2552-
*/
2553-
staticbool
2554-
refname_is_unique(char*refname,deparse_namespace*dpns,
2555-
List*parent_namespaces)
2556-
{
2557-
ListCell*lc;
25582639

2559-
foreach(lc,dpns->rtable_names)
2560-
{
2561-
char*oldname= (char*)lfirst(lc);
2562-
2563-
if (oldname&&strcmp(oldname,refname)==0)
2564-
return false;
2565-
}
2566-
foreach(lc,parent_namespaces)
2567-
{
2568-
deparse_namespace*olddpns= (deparse_namespace*)lfirst(lc);
2569-
ListCell*lc2;
2570-
2571-
foreach(lc2,olddpns->rtable_names)
2572-
{
2573-
char*oldname= (char*)lfirst(lc2);
2574-
2575-
if (oldname&&strcmp(oldname,refname)==0)
2576-
return false;
2577-
}
2578-
}
2579-
return true;
2640+
hash_destroy(names_hash);
25802641
}
25812642

25822643
/*
@@ -3404,16 +3465,34 @@ make_colname_unique(char *colname, deparse_namespace *dpns,
34043465
deparse_columns*colinfo)
34053466
{
34063467
/*
3407-
* If the selected name isn't unique, append digits to make it so
3468+
* If the selected name isn't unique, append digits to make it so. For a
3469+
* very long input name, we might have to truncate to stay within
3470+
* NAMEDATALEN.
34083471
*/
34093472
if (!colname_is_unique(colname,dpns,colinfo))
34103473
{
3411-
char*modname= (char*)palloc(strlen(colname)+32);
3474+
intcolnamelen=strlen(colname);
3475+
char*modname= (char*)palloc(colnamelen+16);
34123476
inti=0;
34133477

34143478
do
34153479
{
3416-
sprintf(modname,"%s_%d",colname,++i);
3480+
i++;
3481+
for (;;)
3482+
{
3483+
/*
3484+
* We avoid using %.*s here because it can misbehave if the
3485+
* data is not valid in what libc thinks is the prevailing
3486+
* encoding.
3487+
*/
3488+
memcpy(modname,colname,colnamelen);
3489+
sprintf(modname+colnamelen,"_%d",i);
3490+
if (strlen(modname)<NAMEDATALEN)
3491+
break;
3492+
/* drop chars from colname to keep all the digits */
3493+
colnamelen=pg_mbcliplen(colname,colnamelen,
3494+
colnamelen-1);
3495+
}
34173496
}while (!colname_is_unique(modname,dpns,colinfo));
34183497
colname=modname;
34193498
}

‎src/test/regress/expected/create_view.out

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1475,6 +1475,33 @@ select * from int8_tbl i where i.* in (values(i.*::int8_tbl));
14751475
4567890123456789 | -4567890123456789
14761476
(5 rows)
14771477

1478+
-- check unique-ification of overlength names
1479+
create view tt18v as
1480+
select * from int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy
1481+
union all
1482+
select * from int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz;
1483+
NOTICE: identifier "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy" will be truncated to "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
1484+
NOTICE: identifier "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz" will be truncated to "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
1485+
select pg_get_viewdef('tt18v', true);
1486+
pg_get_viewdef
1487+
-----------------------------------------------------------------------------------
1488+
SELECT xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.q1, +
1489+
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.q2 +
1490+
FROM int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +
1491+
UNION ALL +
1492+
SELECT xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.q1, +
1493+
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.q2 +
1494+
FROM int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx;
1495+
(1 row)
1496+
1497+
explain (costs off) select * from tt18v;
1498+
QUERY PLAN
1499+
--------------------------------------------------------------------------------------------
1500+
Append
1501+
-> Seq Scan on int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
1502+
-> Seq Scan on int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx_1
1503+
(3 rows)
1504+
14781505
-- clean up all the random objects we made above
14791506
set client_min_messages = warning;
14801507
DROP SCHEMA temp_view_test CASCADE;

‎src/test/regress/sql/create_view.sql

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,15 @@ select * from tt17v;
487487
select pg_get_viewdef('tt17v', true);
488488
select*from int8_tbl iwhere i.*in (values(i.*::int8_tbl));
489489

490+
-- check unique-ification of overlength names
491+
492+
createviewtt18vas
493+
select*from int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy
494+
union all
495+
select*from int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz;
496+
select pg_get_viewdef('tt18v', true);
497+
explain (costs off)select*from tt18v;
498+
490499
-- clean up all the random objects we made above
491500
set client_min_messages= warning;
492501
DROPSCHEMA temp_view_test CASCADE;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp