Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitf90d23d

Browse files
committed
Implement SortSupport for macaddr data type
Introduces a scheme to produce abbreviated keys for the macaddr type.Bump catalog version.Author: Brandur LeachReviewed-by: Julien Rouhaud, Peter Geogheganhttps://commitfest.postgresql.org/13/743/
1 parent5baf869 commitf90d23d

File tree

4 files changed

+215
-2
lines changed

4 files changed

+215
-2
lines changed

‎src/backend/utils/adt/mac.c

Lines changed: 211 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,13 @@
1414
#include"postgres.h"
1515

1616
#include"access/hash.h"
17+
#include"lib/hyperloglog.h"
1718
#include"libpq/pqformat.h"
19+
#include"port/pg_bswap.h"
1820
#include"utils/builtins.h"
21+
#include"utils/guc.h"
1922
#include"utils/inet.h"
23+
#include"utils/sortsupport.h"
2024

2125

2226
/*
@@ -29,6 +33,21 @@
2933
#definelobits(addr) \
3034
((unsigned long)(((addr)->d<<16)|((addr)->e<<8)|((addr)->f)))
3135

36+
/* sortsupport for macaddr */
37+
typedefstruct
38+
{
39+
int64input_count;/* number of non-null values seen */
40+
boolestimating;/* true if estimating cardinality */
41+
42+
hyperLogLogStateabbr_card;/* cardinality estimator */
43+
}macaddr_sortsupport_state;
44+
45+
staticintmacaddr_cmp_internal(macaddr*a1,macaddr*a2);
46+
staticintmacaddr_fast_cmp(Datumx,Datumy,SortSupportssup);
47+
staticintmacaddr_cmp_abbrev(Datumx,Datumy,SortSupportssup);
48+
staticboolmacaddr_abbrev_abort(intmemtupcount,SortSupportssup);
49+
staticDatummacaddr_abbrev_convert(Datumoriginal,SortSupportssup);
50+
3251
/*
3352
*MAC address reader. Accepts several common notations.
3453
*/
@@ -159,7 +178,7 @@ macaddr_send(PG_FUNCTION_ARGS)
159178
*Comparison function for sorting:
160179
*/
161180

162-
staticint32
181+
staticint
163182
macaddr_cmp_internal(macaddr*a1,macaddr*a2)
164183
{
165184
if (hibits(a1)<hibits(a2))
@@ -326,3 +345,194 @@ macaddr_trunc(PG_FUNCTION_ARGS)
326345

327346
PG_RETURN_MACADDR_P(result);
328347
}
348+
349+
/*
350+
* SortSupport strategy function. Populates a SortSupport struct with the
351+
* information necessary to use comparison by abbreviated keys.
352+
*/
353+
Datum
354+
macaddr_sortsupport(PG_FUNCTION_ARGS)
355+
{
356+
SortSupportssup= (SortSupport)PG_GETARG_POINTER(0);
357+
358+
ssup->comparator=macaddr_fast_cmp;
359+
ssup->ssup_extra=NULL;
360+
361+
if (ssup->abbreviate)
362+
{
363+
macaddr_sortsupport_state*uss;
364+
MemoryContextoldcontext;
365+
366+
oldcontext=MemoryContextSwitchTo(ssup->ssup_cxt);
367+
368+
uss=palloc(sizeof(macaddr_sortsupport_state));
369+
uss->input_count=0;
370+
uss->estimating= true;
371+
initHyperLogLog(&uss->abbr_card,10);
372+
373+
ssup->ssup_extra=uss;
374+
375+
ssup->comparator=macaddr_cmp_abbrev;
376+
ssup->abbrev_converter=macaddr_abbrev_convert;
377+
ssup->abbrev_abort=macaddr_abbrev_abort;
378+
ssup->abbrev_full_comparator=macaddr_fast_cmp;
379+
380+
MemoryContextSwitchTo(oldcontext);
381+
}
382+
383+
PG_RETURN_VOID();
384+
}
385+
386+
/*
387+
* SortSupport "traditional" comparison function. Pulls two MAC addresses from
388+
* the heap and runs a standard comparison on them.
389+
*/
390+
staticint
391+
macaddr_fast_cmp(Datumx,Datumy,SortSupportssup)
392+
{
393+
macaddr*arg1=DatumGetMacaddrP(x);
394+
macaddr*arg2=DatumGetMacaddrP(y);
395+
396+
returnmacaddr_cmp_internal(arg1,arg2);
397+
}
398+
399+
/*
400+
* SortSupport abbreviated key comparison function. Compares two MAC addresses
401+
* quickly by treating them like integers, and without having to go the heap.
402+
*/
403+
staticint
404+
macaddr_cmp_abbrev(Datumx,Datumy,SortSupportssup)
405+
{
406+
if (x>y)
407+
return1;
408+
elseif (x==y)
409+
return0;
410+
else
411+
return-1;
412+
}
413+
414+
/*
415+
* Callback for estimating effectiveness of abbreviated key optimization.
416+
*
417+
* We pay no attention to the cardinality of the non-abbreviated data, because
418+
* there is no equality fast-path within authoritative macaddr comparator.
419+
*/
420+
staticbool
421+
macaddr_abbrev_abort(intmemtupcount,SortSupportssup)
422+
{
423+
macaddr_sortsupport_state*uss=ssup->ssup_extra;
424+
doubleabbr_card;
425+
426+
if (memtupcount<10000||uss->input_count<10000|| !uss->estimating)
427+
return false;
428+
429+
abbr_card=estimateHyperLogLog(&uss->abbr_card);
430+
431+
/*
432+
* If we have >100k distinct values, then even if we were sorting many
433+
* billion rows we'd likely still break even, and the penalty of undoing
434+
* that many rows of abbrevs would probably not be worth it. At this point
435+
* we stop counting because we know that we're now fully committed.
436+
*/
437+
if (abbr_card>100000.0)
438+
{
439+
#ifdefTRACE_SORT
440+
if (trace_sort)
441+
elog(LOG,
442+
"macaddr_abbrev: estimation ends at cardinality %f"
443+
" after "INT64_FORMAT" values (%d rows)",
444+
abbr_card,uss->input_count,memtupcount);
445+
#endif
446+
uss->estimating= false;
447+
return false;
448+
}
449+
450+
/*
451+
* Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
452+
* fudge factor allows us to abort earlier on genuinely pathological data
453+
* where we've had exactly one abbreviated value in the first 2k
454+
* (non-null) rows.
455+
*/
456+
if (abbr_card<uss->input_count /2000.0+0.5)
457+
{
458+
#ifdefTRACE_SORT
459+
if (trace_sort)
460+
elog(LOG,
461+
"macaddr_abbrev: aborting abbreviation at cardinality %f"
462+
" below threshold %f after "INT64_FORMAT" values (%d rows)",
463+
abbr_card,uss->input_count /2000.0+0.5,uss->input_count,
464+
memtupcount);
465+
#endif
466+
return true;
467+
}
468+
469+
#ifdefTRACE_SORT
470+
if (trace_sort)
471+
elog(LOG,
472+
"macaddr_abbrev: cardinality %f after "INT64_FORMAT
473+
" values (%d rows)",abbr_card,uss->input_count,memtupcount);
474+
#endif
475+
476+
return false;
477+
}
478+
479+
/*
480+
* SortSupport converstion routine. Converts original macaddr representation
481+
* to abbreviated key representation.
482+
*
483+
* Packs the bytes of a 6-byte MAC address into a Datum and treats it as an
484+
* unsigned integer for purposes of comparison. On a 64-bit machine, there
485+
* will be two zeroed bytes of padding. The integer is converted to native
486+
* endianness to facilitate easy comparison.
487+
*/
488+
staticDatum
489+
macaddr_abbrev_convert(Datumoriginal,SortSupportssup)
490+
{
491+
macaddr_sortsupport_state*uss=ssup->ssup_extra;
492+
macaddr*authoritative=DatumGetMacaddrP(original);
493+
Datumres;
494+
495+
/*
496+
* On a 64-bit machine, zero out the 8-byte datum and copy the 6 bytes of
497+
* the MAC address in. There will be two bytes of zero padding on the end
498+
* of the least significant bits.
499+
*/
500+
#ifSIZEOF_DATUM==8
501+
memset(&res,0,SIZEOF_DATUM);
502+
memcpy(&res,authoritative,sizeof(macaddr));
503+
#else/* SIZEOF_DATUM != 8 */
504+
memcpy(&res,authoritative,SIZEOF_DATUM);
505+
#endif
506+
uss->input_count+=1;
507+
508+
/*
509+
* Cardinality estimation. The estimate uses uint32, so on a 64-bit
510+
* architecture, XOR the two 32-bit halves together to produce slightly
511+
* more entropy. The two zeroed bytes won't have any practical impact on
512+
* this operation.
513+
*/
514+
if (uss->estimating)
515+
{
516+
uint32tmp;
517+
518+
#ifSIZEOF_DATUM==8
519+
tmp= (uint32)res ^ (uint32) ((uint64)res >>32);
520+
#else/* SIZEOF_DATUM != 8 */
521+
tmp= (uint32)res;
522+
#endif
523+
524+
addHyperLogLog(&uss->abbr_card,DatumGetUInt32(hash_uint32(tmp)));
525+
}
526+
527+
/*
528+
* Byteswap on little-endian machines.
529+
*
530+
* This is needed so that macaddr_cmp_abbrev() (an unsigned integer 3-way
531+
* comparator) works correctly on all platforms. Without this, the
532+
* comparator would have to call memcmp() with a pair of pointers to the
533+
* first byte of each abbreviated key, which is slower.
534+
*/
535+
res=DatumBigEndianToNative(res);
536+
537+
returnres;
538+
}

‎src/include/catalog/catversion.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,6 @@
5353
*/
5454

5555
/*yyyymmddN */
56-
#defineCATALOG_VERSION_NO201703291
56+
#defineCATALOG_VERSION_NO201703292
5757

5858
#endif

‎src/include/catalog/pg_amproc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ DATA(insert (1976 20 23 1 2189 ));
117117
DATA(insert (1976202112193 ));
118118
DATA(insert (19821186118611315 ));
119119
DATA(insert (19848298291836 ));
120+
DATA(insert (198482982923359 ));
120121
DATA(insert (198619191359 ));
121122
DATA(insert (1986191923135 ));
122123
DATA(insert (19881700170011769 ));

‎src/include/catalog/pg_proc.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2125,6 +2125,8 @@ DESCR("less-equal-greater");
21252125
DATA(insert OID = 3144 ( macaddr_notPGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 829 "829" _null_ _null_ _null_ _null_ _null_ macaddr_not _null_ _null_ _null_ ));
21262126
DATA(insert OID = 3145 ( macaddr_andPGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 829 "829 829" _null_ _null_ _null_ _null_ _null_ macaddr_and _null_ _null_ _null_ ));
21272127
DATA(insert OID = 3146 ( macaddr_orPGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 829 "829 829" _null_ _null_ _null_ _null_ _null_ macaddr_or _null_ _null_ _null_ ));
2128+
DATA(insert OID = 3359 ( macaddr_sortsupport PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ macaddr_sortsupport _null_ _null_ _null_ ));
2129+
DESCR("sort support");
21282130

21292131
/* for macaddr8 type support */
21302132
DATA(insert OID = 4110 ( macaddr8_inPGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 774 "2275" _null_ _null_ _null_ _null_ _null_ macaddr8_in _null_ _null_ _null_ ));

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp