Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commita76ef15

Browse files
committed
Add sort support routine for the UUID data type.
This introduces a simple encoding scheme to produce abbreviated keys:pack as many bytes of each UUID as will fit into a Datum. Onlittle-endian machines, a byteswap is also performed; the abbreviatedcomparator can therefore just consist of a simple 3-way unsigned integercomparison.The purpose of this change is to speed up sorting data on a columnof type UUID.Peter Geoghegan
1 parent5644419 commita76ef15

File tree

4 files changed

+191
-0
lines changed

4 files changed

+191
-0
lines changed

‎src/backend/utils/adt/uuid.c

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,12 @@
1414
#include"postgres.h"
1515

1616
#include"access/hash.h"
17+
#include"lib/hyperloglog.h"
1718
#include"libpq/pqformat.h"
19+
#include"port/pg_bswap.h"
1820
#include"utils/builtins.h"
21+
#include"utils/guc.h"
22+
#include"utils/sortsupport.h"
1923
#include"utils/uuid.h"
2024

2125
/* uuid size in bytes */
@@ -27,8 +31,21 @@ struct pg_uuid_t
2731
unsignedchardata[UUID_LEN];
2832
};
2933

34+
/* sortsupport for uuid */
35+
typedefstruct
36+
{
37+
int64input_count;/* number of non-null values seen */
38+
boolestimating;/* true if estimating cardinality */
39+
40+
hyperLogLogStateabbr_card;/* cardinality estimator */
41+
}uuid_sortsupport_state;
42+
3043
staticvoidstring_to_uuid(constchar*source,pg_uuid_t*uuid);
3144
staticintuuid_internal_cmp(constpg_uuid_t*arg1,constpg_uuid_t*arg2);
45+
staticintuuid_fast_cmp(Datumx,Datumy,SortSupportssup);
46+
staticintuuid_cmp_abbrev(Datumx,Datumy,SortSupportssup);
47+
staticbooluuid_abbrev_abort(intmemtupcount,SortSupportssup);
48+
staticDatumuuid_abbrev_convert(Datumoriginal,SortSupportssup);
3249

3350
Datum
3451
uuid_in(PG_FUNCTION_ARGS)
@@ -222,6 +239,176 @@ uuid_cmp(PG_FUNCTION_ARGS)
222239
PG_RETURN_INT32(uuid_internal_cmp(arg1,arg2));
223240
}
224241

242+
/*
243+
* Sort support strategy routine
244+
*/
245+
Datum
246+
uuid_sortsupport(PG_FUNCTION_ARGS)
247+
{
248+
SortSupportssup= (SortSupport)PG_GETARG_POINTER(0);
249+
250+
ssup->comparator=uuid_fast_cmp;
251+
ssup->ssup_extra=NULL;
252+
253+
if (ssup->abbreviate)
254+
{
255+
uuid_sortsupport_state*uss;
256+
MemoryContextoldcontext;
257+
258+
oldcontext=MemoryContextSwitchTo(ssup->ssup_cxt);
259+
260+
uss=palloc(sizeof(uuid_sortsupport_state));
261+
uss->input_count=0;
262+
uss->estimating= true;
263+
initHyperLogLog(&uss->abbr_card,10);
264+
265+
ssup->ssup_extra=uss;
266+
267+
ssup->comparator=uuid_cmp_abbrev;
268+
ssup->abbrev_converter=uuid_abbrev_convert;
269+
ssup->abbrev_abort=uuid_abbrev_abort;
270+
ssup->abbrev_full_comparator=uuid_fast_cmp;
271+
272+
MemoryContextSwitchTo(oldcontext);
273+
}
274+
275+
PG_RETURN_VOID();
276+
}
277+
278+
/*
279+
* SortSupport comparison func
280+
*/
281+
staticint
282+
uuid_fast_cmp(Datumx,Datumy,SortSupportssup)
283+
{
284+
pg_uuid_t*arg1=DatumGetUUIDP(x);
285+
pg_uuid_t*arg2=DatumGetUUIDP(y);
286+
287+
returnuuid_internal_cmp(arg1,arg2);
288+
}
289+
290+
/*
291+
* Abbreviated key comparison func
292+
*/
293+
staticint
294+
uuid_cmp_abbrev(Datumx,Datumy,SortSupportssup)
295+
{
296+
if (x>y)
297+
return1;
298+
elseif (x==y)
299+
return0;
300+
else
301+
return-1;
302+
}
303+
304+
/*
305+
* Callback for estimating effectiveness of abbreviated key optimization.
306+
*
307+
* We pay no attention to the cardinality of the non-abbreviated data, because
308+
* there is no equality fast-path within authoritative uuid comparator.
309+
*/
310+
staticbool
311+
uuid_abbrev_abort(intmemtupcount,SortSupportssup)
312+
{
313+
uuid_sortsupport_state*uss=ssup->ssup_extra;
314+
doubleabbr_card;
315+
316+
if (memtupcount<10000||uss->input_count<10000|| !uss->estimating)
317+
return false;
318+
319+
abbr_card=estimateHyperLogLog(&uss->abbr_card);
320+
321+
/*
322+
* If we have >100k distinct values, then even if we were sorting many
323+
* billion rows we'd likely still break even, and the penalty of undoing
324+
* that many rows of abbrevs would probably not be worth it. Stop even
325+
* counting at that point.
326+
*/
327+
if (abbr_card>100000.0)
328+
{
329+
#ifdefTRACE_SORT
330+
if (trace_sort)
331+
elog(LOG,
332+
"uuid_abbrev: estimation ends at cardinality %f"
333+
" after "INT64_FORMAT" values (%d rows)",
334+
abbr_card,uss->input_count,memtupcount);
335+
#endif
336+
uss->estimating= false;
337+
return false;
338+
}
339+
340+
/*
341+
* Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
342+
* fudge factor allows us to abort earlier on genuinely pathological data
343+
* where we've had exactly one abbreviated value in the first 2k (non-null)
344+
* rows.
345+
*/
346+
if (abbr_card<uss->input_count /2000.0+0.5)
347+
{
348+
#ifdefTRACE_SORT
349+
if (trace_sort)
350+
elog(LOG,
351+
"uuid_abbrev: aborting abbreviation at cardinality %f"
352+
" below threshold %f after "INT64_FORMAT" values (%d rows)",
353+
abbr_card,uss->input_count /2000.0+0.5,uss->input_count,
354+
memtupcount);
355+
#endif
356+
return true;
357+
}
358+
359+
#ifdefTRACE_SORT
360+
if (trace_sort)
361+
elog(LOG,
362+
"uuid_abbrev: cardinality %f after "INT64_FORMAT
363+
" values (%d rows)",abbr_card,uss->input_count,memtupcount);
364+
#endif
365+
366+
return false;
367+
}
368+
369+
/*
370+
* Conversion routine for sortsupport. Converts original uuid representation
371+
* to abbreviated key representation. Our encoding strategy is simple -- pack
372+
* the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian
373+
* machines, the bytes are stored in reverse order), and treat it as an
374+
* unsigned integer.
375+
*/
376+
staticDatum
377+
uuid_abbrev_convert(Datumoriginal,SortSupportssup)
378+
{
379+
uuid_sortsupport_state*uss=ssup->ssup_extra;
380+
pg_uuid_t*authoritative=DatumGetUUIDP(original);
381+
Datumres;
382+
383+
memcpy((char*)&res,authoritative->data,sizeof(Datum));
384+
uss->input_count+=1;
385+
386+
if (uss->estimating)
387+
{
388+
uint32tmp;
389+
390+
#ifSIZEOF_DATUM==8
391+
tmp= (uint32)res ^ (uint32) ((uint64)res >>32);
392+
#else/* SIZEOF_DATUM != 8 */
393+
tmp= (uint32)res;
394+
#endif
395+
396+
addHyperLogLog(&uss->abbr_card,DatumGetUInt32(hash_uint32(tmp)));
397+
}
398+
399+
/*
400+
* Byteswap on little-endian machines.
401+
*
402+
* This is needed so that uuid_cmp_abbrev() (an unsigned integer 3-way
403+
* comparator) works correctly on all platforms. If we didn't do this, the
404+
* comparator would have to call memcmp() with a pair of pointers to the
405+
* first byte of each abbreviated key, which is slower.
406+
*/
407+
res=DatumBigEndianToNative(res);
408+
409+
returnres;
410+
}
411+
225412
/* hash index support */
226413
Datum
227414
uuid_hash(PG_FUNCTION_ARGS)

‎src/include/catalog/pg_amproc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ DATA(insert (2233 703 703 1 380 ));
134134
DATA(insert (22347047041381 ));
135135
DATA(insert (2789272712794 ));
136136
DATA(insert (29682950295012960 ));
137+
DATA(insert (29682950295023300 ));
137138
DATA(insert (29942249224912987 ));
138139
DATA(insert (31942249224913187 ));
139140
DATA(insert (32533220322013251 ));

‎src/include/catalog/pg_proc.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4467,6 +4467,8 @@ DATA(insert OID = 2958 ( uuid_gt PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0
44674467
DATA(insert OID = 2959 ( uuid_ne PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 16 "2950 2950" _null_ _null_ _null_ _null_ _null_ uuid_ne _null_ _null_ _null_ ));
44684468
DATA(insert OID = 2960 ( uuid_cmp PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 23 "2950 2950" _null_ _null_ _null_ _null_ _null_ uuid_cmp _null_ _null_ _null_ ));
44694469
DESCR("less-equal-greater");
4470+
DATA(insert OID = 3300 ( uuid_sortsupport PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ uuid_sortsupport _null_ _null_ _null_ ));
4471+
DESCR("sort support");
44704472
DATA(insert OID = 2961 ( uuid_recv PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2950 "2281" _null_ _null_ _null_ _null_ _null_ uuid_recv _null_ _null_ _null_ ));
44714473
DESCR("I/O");
44724474
DATA(insert OID = 2962 ( uuid_send PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 17 "2950" _null_ _null_ _null_ _null_ _null_ uuid_send _null_ _null_ _null_ ));

‎src/include/utils/builtins.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,6 +1174,7 @@ extern Datum uuid_ge(PG_FUNCTION_ARGS);
11741174
externDatumuuid_gt(PG_FUNCTION_ARGS);
11751175
externDatumuuid_ne(PG_FUNCTION_ARGS);
11761176
externDatumuuid_cmp(PG_FUNCTION_ARGS);
1177+
externDatumuuid_sortsupport(PG_FUNCTION_ARGS);
11771178
externDatumuuid_hash(PG_FUNCTION_ARGS);
11781179

11791180
/* windowfuncs.c */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp