33 * spgtextproc.c
44 * implementation of radix tree (compressed trie) over text
55 *
6+ * In a text_ops SPGiST index, inner tuples can have a prefix which is the
7+ * common prefix of all strings indexed under that tuple. The node labels
8+ * represent the next byte of the string(s) after the prefix. Assuming we
9+ * always use the longest possible prefix, we will get more than one node
10+ * label unless the prefix length is restricted by SPGIST_MAX_PREFIX_LENGTH.
11+ *
12+ * To reconstruct the indexed string for any index entry, concatenate the
13+ * inner-tuple prefixes and node labels starting at the root and working
14+ * down to the leaf entry, then append the datum in the leaf entry.
15+ * (While descending the tree, "level" is the number of bytes reconstructed
16+ * so far.)
17+ *
18+ * However, there are two special cases for node labels: -1 indicates that
19+ * there are no more bytes after the prefix-so-far, and -2 indicates that we
20+ * had to split an existing allTheSame tuple (in such a case we have to create
21+ * a node label that doesn't correspond to any string byte). In either case,
22+ * the node label does not contribute anything to the reconstructed string.
23+ *
24+ * Previously, we used a node label of zero for both special cases, but
25+ * this was problematic because one can't tell whether a string ending at
26+ * the current level can be pushed down into such a child node. For
27+ * backwards compatibility, we still support such node labels for reading;
28+ * but no new entries will ever be pushed down into a zero-labeled child.
29+ * No new entries ever get pushed into a -2-labeled child, either.
30+ *
631 *
732 * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
833 * Portions Copyright (c) 1994, Regents of the University of California
2449
2550/*
2651 * In the worst case, an inner tuple in a text radix tree could have as many
27- * as 256 nodes (one for each possible byte value). Each node can take 16
28- * bytes on MAXALIGN=8 machines. The inner tuple must fit on an index page
29- * of size BLCKSZ. Rather than assuming we know the exact amount of overhead
30- * imposed by page headers, tuple headers, etc, we leave 100 bytes for that
31- * (the actual overhead should be no more than 56 bytes at this writing, so
32- * there is slop in this number). So we can safely create prefixes up to
33- * BLCKSZ - 256 * 16 - 100 bytes long. Unfortunately, because 256 * 16 is
34- * already 4K, there is no safe prefix length when BLCKSZ is less than 8K;
35- * it is always possible to get "SPGiST inner tuple size exceeds maximum"
36- * if there are too many distinct next-byte values at a given place in the
37- * tree. Since use of nonstandard block sizes appears to be negligible in
38- * the field, we just live with that fact for now, choosing a max prefix
39- * size of 32 bytes when BLCKSZ is configured smaller than default.
52+ * as 258 nodes (one for each possible byte value, plus the two special
53+ * cases). Each node can take 16 bytes on MAXALIGN=8 machines. The inner
54+ * tuple must fit on an index page of size BLCKSZ. Rather than assuming we
55+ * know the exact amount of overhead imposed by page headers, tuple headers,
56+ * etc, we leave 100 bytes for that (the actual overhead should be no more
57+ * than 56 bytes at this writing, so there is slop in this number).
58+ * So we can safely create prefixes up to BLCKSZ - 258 * 16 - 100 bytes long.
59+ * Unfortunately, because 258 * 16 is over 4K, there is no safe prefix length
60+ * when BLCKSZ is less than 8K; it is always possible to get "SPGiST inner
61+ * tuple size exceeds maximum" if there are too many distinct next-byte values
62+ * at a given place in the tree. Since use of nonstandard block sizes appears
63+ * to be negligible in the field, we just live with that fact for now,
64+ * choosing a max prefix size of 32 bytes when BLCKSZ is configured smaller
65+ * than default.
4066 */
41- #define SPGIST_MAX_PREFIX_LENGTH Max((int) (BLCKSZ -256 * 16 - 100), 32)
67+ #define SPGIST_MAX_PREFIX_LENGTH Max((int) (BLCKSZ -258 * 16 - 100), 32)
4268
4369/* Struct for sorting values in picksplit */
4470typedef struct spgNodePtr
4571{
4672Datum d ;
4773int i ;
48- uint8 c ;
74+ int16 c ;
4975}spgNodePtr ;
5076
5177
@@ -56,7 +82,7 @@ spg_text_config(PG_FUNCTION_ARGS)
5682spgConfigOut * cfg = (spgConfigOut * )PG_GETARG_POINTER (1 );
5783
5884cfg -> prefixType = TEXTOID ;
59- cfg -> labelType = CHAROID ;
85+ cfg -> labelType = INT2OID ;
6086cfg -> canReturnData = true;
6187cfg -> longValuesOK = true;/* suffixing will shorten long values */
6288PG_RETURN_VOID ();
@@ -107,20 +133,20 @@ commonPrefix(const char *a, const char *b, int lena, int lenb)
107133}
108134
109135/*
110- * Binary search an array ofuint8 datums for a match to c
136+ * Binary search an array ofint16 datums for a match to c
111137 *
112138 * On success, *i gets the match location; on failure, it gets where to insert
113139 */
114140static bool
115- searchChar (Datum * nodeLabels ,int nNodes ,uint8 c ,int * i )
141+ searchChar (Datum * nodeLabels ,int nNodes ,int16 c ,int * i )
116142{
117143int StopLow = 0 ,
118144StopHigh = nNodes ;
119145
120146while (StopLow < StopHigh )
121147{
122148int StopMiddle = (StopLow + StopHigh ) >>1 ;
123- uint8 middle = DatumGetUInt8 (nodeLabels [StopMiddle ]);
149+ int16 middle = DatumGetInt16 (nodeLabels [StopMiddle ]);
124150
125151if (c < middle )
126152StopHigh = StopMiddle ;
@@ -145,16 +171,19 @@ spg_text_choose(PG_FUNCTION_ARGS)
145171text * inText = DatumGetTextPP (in -> datum );
146172char * inStr = VARDATA_ANY (inText );
147173int inSize = VARSIZE_ANY_EXHDR (inText );
148- uint8 nodeChar = '\0' ;
149- int i = 0 ;
174+ char * prefixStr = NULL ;
175+ int prefixSize = 0 ;
150176int commonLen = 0 ;
177+ int16 nodeChar = 0 ;
178+ int i = 0 ;
151179
152180/* Check for prefix match, set nodeChar to first byte after prefix */
153181if (in -> hasPrefix )
154182{
155183text * prefixText = DatumGetTextPP (in -> prefixDatum );
156- char * prefixStr = VARDATA_ANY (prefixText );
157- int prefixSize = VARSIZE_ANY_EXHDR (prefixText );
184+
185+ prefixStr = VARDATA_ANY (prefixText );
186+ prefixSize = VARSIZE_ANY_EXHDR (prefixText );
158187
159188commonLen = commonPrefix (inStr + in -> level ,
160189prefixStr ,
@@ -164,9 +193,9 @@ spg_text_choose(PG_FUNCTION_ARGS)
164193if (commonLen == prefixSize )
165194{
166195if (inSize - in -> level > commonLen )
167- nodeChar = * (uint8 * ) (inStr + in -> level + commonLen );
196+ nodeChar = * (unsigned char * ) (inStr + in -> level + commonLen );
168197else
169- nodeChar = '\0' ;
198+ nodeChar = -1 ;
170199}
171200else
172201{
@@ -184,7 +213,7 @@ spg_text_choose(PG_FUNCTION_ARGS)
184213formTextDatum (prefixStr ,commonLen );
185214}
186215out -> result .splitTuple .nodeLabel =
187- UInt8GetDatum ( * (prefixStr + commonLen ));
216+ Int16GetDatum ( * ( unsigned char * ) (prefixStr + commonLen ));
188217
189218if (prefixSize - commonLen == 1 )
190219{
@@ -203,11 +232,11 @@ spg_text_choose(PG_FUNCTION_ARGS)
203232}
204233else if (inSize > in -> level )
205234{
206- nodeChar = * (uint8 * ) (inStr + in -> level );
235+ nodeChar = * (unsigned char * ) (inStr + in -> level );
207236}
208237else
209238{
210- nodeChar = '\0' ;
239+ nodeChar = -1 ;
211240}
212241
213242/* Look up nodeChar in the node label array */
@@ -219,13 +248,18 @@ spg_text_choose(PG_FUNCTION_ARGS)
219248 * to provide the correct levelAdd and restDatum values, and those are
220249 * the same regardless of which node gets chosen by core.)
221250 */
251+ int levelAdd ;
252+
222253out -> resultType = spgMatchNode ;
223254out -> result .matchNode .nodeN = i ;
224- out -> result .matchNode .levelAdd = commonLen + 1 ;
225- if (inSize - in -> level - commonLen - 1 > 0 )
255+ levelAdd = commonLen ;
256+ if (nodeChar >=0 )
257+ levelAdd ++ ;
258+ out -> result .matchNode .levelAdd = levelAdd ;
259+ if (inSize - in -> level - levelAdd > 0 )
226260out -> result .matchNode .restDatum =
227- formTextDatum (inStr + in -> level + commonLen + 1 ,
228- inSize - in -> level - commonLen - 1 );
261+ formTextDatum (inStr + in -> level + levelAdd ,
262+ inSize - in -> level - levelAdd );
229263else
230264out -> result .matchNode .restDatum =
231265formTextDatum (NULL ,0 );
@@ -234,21 +268,26 @@ spg_text_choose(PG_FUNCTION_ARGS)
234268{
235269/*
236270 * Can't use AddNode action, so split the tuple. The upper tuple has
237- * the same prefix as before and usesan empty node label for the
271+ * the same prefix as before and usesa dummy node label -2 for the
238272 * lower tuple. The lower tuple has no prefix and the same node
239273 * labels as the original tuple.
274+ *
275+ * Note: it might seem tempting to shorten the upper tuple's prefix,
276+ * if it has one, then use its last byte as label for the lower tuple.
277+ * But that doesn't win since we know the incoming value matches the
278+ * whole prefix: we'd just end up splitting the lower tuple again.
240279 */
241280out -> resultType = spgSplitTuple ;
242281out -> result .splitTuple .prefixHasPrefix = in -> hasPrefix ;
243282out -> result .splitTuple .prefixPrefixDatum = in -> prefixDatum ;
244- out -> result .splitTuple .nodeLabel = UInt8GetDatum ( '\0' );
283+ out -> result .splitTuple .nodeLabel = Int16GetDatum ( -2 );
245284out -> result .splitTuple .postfixHasPrefix = false;
246285}
247286else
248287{
249288/* Add a node for the not-previously-seen nodeChar value */
250289out -> resultType = spgAddNode ;
251- out -> result .addNode .nodeLabel = UInt8GetDatum (nodeChar );
290+ out -> result .addNode .nodeLabel = Int16GetDatum (nodeChar );
252291out -> result .addNode .nodeN = i ;
253292}
254293
@@ -262,12 +301,7 @@ cmpNodePtr(const void *a, const void *b)
262301const spgNodePtr * aa = (const spgNodePtr * )a ;
263302const spgNodePtr * bb = (const spgNodePtr * )b ;
264303
265- if (aa -> c == bb -> c )
266- return 0 ;
267- else if (aa -> c > bb -> c )
268- return 1 ;
269- else
270- return -1 ;
304+ return aa -> c - bb -> c ;
271305}
272306
273307Datum
@@ -319,15 +353,15 @@ spg_text_picksplit(PG_FUNCTION_ARGS)
319353text * texti = DatumGetTextPP (in -> datums [i ]);
320354
321355if (commonLen < VARSIZE_ANY_EXHDR (texti ))
322- nodes [i ].c = * (uint8 * ) (VARDATA_ANY (texti )+ commonLen );
356+ nodes [i ].c = * (unsigned char * ) (VARDATA_ANY (texti )+ commonLen );
323357else
324- nodes [i ].c = '\0' ;/* use\0 if string is all common */
358+ nodes [i ].c = -1 ;/* use-1 if string is all common */
325359nodes [i ].i = i ;
326360nodes [i ].d = in -> datums [i ];
327361}
328362
329363/*
330- * Sort by labelbytes so that we can group the values into nodes. This
364+ * Sort by labelvalues so that we can group the values into nodes. This
331365 * also ensures that the nodes are ordered by label value, allowing the
332366 * use of binary search in searchChar.
333367 */
@@ -346,7 +380,7 @@ spg_text_picksplit(PG_FUNCTION_ARGS)
346380
347381if (i == 0 || nodes [i ].c != nodes [i - 1 ].c )
348382{
349- out -> nodeLabels [out -> nNodes ]= UInt8GetDatum (nodes [i ].c );
383+ out -> nodeLabels [out -> nNodes ]= Int16GetDatum (nodes [i ].c );
350384out -> nNodes ++ ;
351385}
352386
@@ -377,9 +411,9 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
377411
378412/*
379413 * Reconstruct values represented at this tuple, including parent data,
380- * prefix of this tuple if any, and the node label ifany. in->level
381- * should be the length of the previously reconstructed value, and the
382- * number of bytes added here is prefixSize or prefixSize + 1.
414+ * prefix of this tuple if any, and the node label ifit's non-dummy.
415+ *in->level should be the length of the previously reconstructed value,
416+ *and the number of bytes added here is prefixSize or prefixSize + 1.
383417 *
384418 * Note: we assume that in->reconstructedValue isn't toasted and doesn't
385419 * have a short varlena header. This is okay because it must have been
@@ -422,17 +456,17 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
422456
423457for (i = 0 ;i < in -> nNodes ;i ++ )
424458{
425- uint8 nodeChar = DatumGetUInt8 (in -> nodeLabels [i ]);
459+ int16 nodeChar = DatumGetInt16 (in -> nodeLabels [i ]);
426460int thisLen ;
427461bool res = true;
428462int j ;
429463
430- /* If nodeChar iszero , don't include it in data */
431- if (nodeChar == '\0' )
464+ /* If nodeChar isa dummy value , don't include it in data */
465+ if (nodeChar <= 0 )
432466thisLen = maxReconstrLen - 1 ;
433467else
434468{
435- ((char * )VARDATA (reconstrText ))[maxReconstrLen - 1 ]= nodeChar ;
469+ ((unsigned char * )VARDATA (reconstrText ))[maxReconstrLen - 1 ]= nodeChar ;
436470thisLen = maxReconstrLen ;
437471}
438472
@@ -447,7 +481,9 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
447481 * If it's a collation-aware operator, but the collation is C, we
448482 * can treat it as non-collation-aware. With non-C collation we
449483 * need to traverse whole tree :-( so there's no point in making
450- * any check here.
484+ * any check here. (Note also that our reconstructed value may
485+ * well end with a partial multibyte character, so that applying
486+ * any encoding-sensitive test to it would be risky anyhow.)
451487 */
452488if (strategy > 10 )
453489{