Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit464326e

Browse files
committed
Fix nasty performance problem in tsquery_rewrite().
tsquery_rewrite() tries to find matches to subsets of AND/OR conditions;for example, in the query 'a | b | c' the substitution subquery 'a | c'should match and lead to replacement of the first and third items.That's fine, but the matching algorithm apparently takes about O(2^N)for an N-clause query (I say "apparently" because the code is also bothunintelligible and uncommented). We could probably do better than thateven without any extra assumptions --- but actually, we know that thesubclauses are sorted, indeed are depending on that elsewhere in this verysame function. So we can just scan the two lists a single time to detectmatches, as though we were doing a merge join.Also do a re-flattening call (QTNTernary()) in tsquery_rewrite_query, justto make sure that the tree fits the expectations of the next search cycle.I didn't try to devise a test case for this, but I'm pretty sure that theoversight could have led to failure to match in some cases where a matchwould be expected.Improve comments, and also stick a CHECK_FOR_INTERRUPTS intodofindsubquery, just in case it's still too slow for somebody.Per report from Andreas Seltenreich. Back-patch to all supported branches.Discussion: <8760oasf2y.fsf@credativ.de>
1 parent2a2b439 commit464326e

File tree

1 file changed

+110
-80
lines changed

1 file changed

+110
-80
lines changed

‎src/backend/utils/adt/tsquery_rewrite.c

Lines changed: 110 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -21,47 +21,43 @@
2121
#include"utils/builtins.h"
2222

2323

24-
staticint
25-
addone(int*counters,intlast,inttotal)
26-
{
27-
/* since this function recurses, it could be driven to stack overflow. */
28-
check_stack_depth();
29-
30-
counters[last]++;
31-
if (counters[last] >=total)
32-
{
33-
if (last==0)
34-
return0;
35-
if (addone(counters,last-1,total-1)==0)
36-
return0;
37-
counters[last]=counters[last-1]+1;
38-
}
39-
return1;
40-
}
41-
4224
/*
43-
* If node is equal to ex, replace it with subs. Replacement is actually done
44-
* by returning either node or a copy of subs.
25+
* If "node" is equal to "ex", return a copy of "subs" instead.
26+
* If "ex" matches a subset of node's children, return a modified version
27+
* of "node" in which those children are replaced with a copy of "subs".
28+
* Otherwise return "node" unmodified.
29+
*
30+
* The QTN_NOCHANGE bit is set in successfully modified nodes, so that
31+
* we won't uselessly recurse into them.
32+
* Also, set *isfind true if we make a replacement.
4533
*/
4634
staticQTNode*
4735
findeq(QTNode*node,QTNode*ex,QTNode*subs,bool*isfind)
4836
{
37+
/* Can't match unless signature matches and node type matches. */
4938
if ((node->sign&ex->sign)!=ex->sign||
5039
node->valnode->type!=ex->valnode->type)
5140
returnnode;
5241

42+
/* Ignore nodes marked NOCHANGE, too. */
5343
if (node->flags&QTN_NOCHANGE)
5444
returnnode;
5545

5646
if (node->valnode->type==QI_OPR)
5747
{
48+
/* Must be same operator. */
5849
if (node->valnode->qoperator.oper!=ex->valnode->qoperator.oper)
5950
returnnode;
6051

6152
if (node->nchild==ex->nchild)
6253
{
54+
/*
55+
* Simple case: when same number of children, match if equal.
56+
* (This is reliable when the children were sorted earlier.)
57+
*/
6358
if (QTNEq(node,ex))
6459
{
60+
/* Match; delete node and return a copy of subs instead. */
6561
QTNFree(node);
6662
if (subs)
6763
{
@@ -73,79 +69,92 @@ findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
7369
*isfind= true;
7470
}
7571
}
76-
elseif (node->nchild>ex->nchild)
72+
elseif (node->nchild>ex->nchild&&ex->nchild>0)
7773
{
7874
/*
79-
* AND and NOT are commutative, so we check if a subset of the
80-
* children match. For example, if tnode is A | B | C, and ex is B
81-
* | C, we have a match after we convert tnode to A | (B | C).
75+
* AND and OR are commutative/associative, so we should check if a
76+
* subset of the children match. For example, if node is A|B|C,
77+
* and ex is B|C, we have a match after we notionally convert node
78+
* to A|(B|C). This does not work for NOT or PHRASE nodes, but we
79+
* can't get here for those node types because they have a fixed
80+
* number of children.
81+
*
82+
* Because we expect that the children are sorted, it suffices to
83+
* make one pass through the two lists to find the matches.
8284
*/
83-
int*counters= (int*)palloc(sizeof(int)*node->nchild);
84-
inti;
85-
QTNode*tnode= (QTNode*)palloc(sizeof(QTNode));
86-
87-
memset(tnode,0,sizeof(QTNode));
88-
tnode->child= (QTNode**)palloc(sizeof(QTNode*)*ex->nchild);
89-
tnode->nchild=ex->nchild;
90-
tnode->valnode= (QueryItem*)palloc(sizeof(QueryItem));
91-
*(tnode->valnode)=*(ex->valnode);
92-
93-
for (i=0;i<ex->nchild;i++)
94-
counters[i]=i;
95-
96-
do
85+
bool*matched;
86+
intnmatched;
87+
inti,
88+
j;
89+
90+
/* Assert that the subset rule is OK */
91+
Assert(node->valnode->qoperator.oper==OP_AND||
92+
node->valnode->qoperator.oper==OP_OR);
93+
94+
/* matched[] will record which children of node matched */
95+
matched= (bool*)palloc0(node->nchild*sizeof(bool));
96+
nmatched=0;
97+
i=j=0;
98+
while (i<node->nchild&&j<ex->nchild)
9799
{
98-
tnode->sign=0;
99-
for (i=0;i<ex->nchild;i++)
100+
intcmp=QTNodeCompare(node->child[i],ex->child[j]);
101+
102+
if (cmp==0)
100103
{
101-
tnode->child[i]=node->child[counters[i]];
102-
tnode->sign |=tnode->child[i]->sign;
104+
/* match! */
105+
matched[i]= true;
106+
nmatched++;
107+
i++,j++;
103108
}
109+
elseif (cmp<0)
110+
{
111+
/* node->child[i] has no match, ignore it */
112+
i++;
113+
}
114+
else
115+
{
116+
/* ex->child[j] has no match; we can give up immediately */
117+
break;
118+
}
119+
}
104120

105-
if (QTNEq(tnode,ex))
121+
if (nmatched==ex->nchild)
122+
{
123+
/* collapse out the matched children of node */
124+
j=0;
125+
for (i=0;i<node->nchild;i++)
106126
{
107-
intj=0;
108-
109-
pfree(tnode->valnode);
110-
pfree(tnode->child);
111-
pfree(tnode);
112-
if (subs)
113-
{
114-
tnode=QTNCopy(subs);
115-
tnode->flags=QTN_NOCHANGE |QTN_NEEDFREE;
116-
}
127+
if (matched[i])
128+
QTNFree(node->child[i]);
117129
else
118-
tnode=NULL;
119-
120-
node->child[counters[0]]=tnode;
121-
122-
for (i=1;i<ex->nchild;i++)
123-
node->child[counters[i]]=NULL;
124-
for (i=0;i<node->nchild;i++)
125-
{
126-
if (node->child[i])
127-
{
128-
node->child[j]=node->child[i];
129-
j++;
130-
}
131-
}
130+
node->child[j++]=node->child[i];
131+
}
132132

133-
node->nchild=j;
133+
/* and instead insert a copy of subs */
134+
if (subs)
135+
{
136+
subs=QTNCopy(subs);
137+
subs->flags |=QTN_NOCHANGE;
138+
node->child[j++]=subs;
139+
}
134140

135-
*isfind= true;
141+
node->nchild=j;
142+
143+
/*
144+
* Re-sort the node to put new child in the right place. This
145+
* is a bit bogus, because it won't matter for findsubquery's
146+
* remaining processing, and it's insufficient to prepare the
147+
* tree for another search (we would need to re-flatten as
148+
* well, and we don't want to do that because we'd lose the
149+
* QTN_NOCHANGE marking on the new child). But it's needed to
150+
* keep the results the same as the regression tests expect.
151+
*/
152+
QTNSort(node);
136153

137-
break;
138-
}
139-
}while (addone(counters,ex->nchild-1,node->nchild));
140-
if (tnode&& (tnode->flags&QTN_NOCHANGE)==0)
141-
{
142-
pfree(tnode->valnode);
143-
pfree(tnode->child);
144-
pfree(tnode);
154+
*isfind= true;
145155
}
146-
else
147-
QTNSort(node);
148-
pfree(counters);
156+
157+
pfree(matched);
149158
}
150159
}
151160
else
@@ -173,12 +182,20 @@ findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
173182
returnnode;
174183
}
175184

185+
/*
186+
* Recursive guts of findsubquery(): attempt to replace "ex" with "subs"
187+
* at the root node, and if we failed to do so, recursively match against
188+
* child nodes.
189+
*/
176190
staticQTNode*
177191
dofindsubquery(QTNode*root,QTNode*ex,QTNode*subs,bool*isfind)
178192
{
179193
/* since this function recurses, it could be driven to stack overflow. */
180194
check_stack_depth();
181195

196+
/* also, since it's a bit expensive, let's check for query cancel. */
197+
CHECK_FOR_INTERRUPTS();
198+
182199
root=findeq(root,ex,subs,isfind);
183200

184201
if (root&& (root->flags&QTN_NOCHANGE)==0&&root->valnode->type==QI_OPR)
@@ -192,6 +209,10 @@ dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
192209
returnroot;
193210
}
194211

212+
/*
213+
* Delete any void subtrees that may have been inserted when the replacement
214+
* subtree is void.
215+
*/
195216
staticQTNode*
196217
dropvoidsubtree(QTNode*root)
197218
{
@@ -231,6 +252,14 @@ dropvoidsubtree(QTNode *root)
231252
returnroot;
232253
}
233254

255+
/*
256+
* Substitute "subs" for "ex" throughout the QTNode tree at root.
257+
*
258+
* If isfind isn't NULL, set *isfind to show whether we made any substitution.
259+
*
260+
* Both "root" and "ex" must have been through QTNTernary and QTNSort
261+
* to ensure reliable matching.
262+
*/
234263
QTNode*
235264
findsubquery(QTNode*root,QTNode*ex,QTNode*subs,bool*isfind)
236265
{
@@ -344,6 +373,7 @@ tsquery_rewrite_query(PG_FUNCTION_ARGS)
344373
{
345374
/* ready the tree for another pass */
346375
QTNClearFlags(tree,QTN_NOCHANGE);
376+
QTNTernary(tree);
347377
QTNSort(tree);
348378
}
349379
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp