1313
1414#include "postgres.h"
1515
16+ #include <time.h> /* for clock_gettime() */
17+
1618#include "common/hashfn.h"
1719#include "lib/hyperloglog.h"
1820#include "libpq/pqformat.h"
2325#include "utils/timestamp.h"
2426#include "utils/uuid.h"
2527
28+ /* helper macros */
29+ #define NS_PER_S INT64CONST(1000000000)
30+ #define NS_PER_MS INT64CONST(1000000)
31+ #define NS_PER_US INT64CONST(1000)
32+
33+ /*
34+ * UUID version 7 uses 12 bits in "rand_a" to store 1/4096 (or 2^12) fractions of
35+ * sub-millisecond. While most Unix-like platforms provide nanosecond-precision
36+ * timestamps, some systems only offer microsecond precision, limiting us to 10
37+ * bits of sub-millisecond information. For example, on macOS, real time is
38+ * truncated to microseconds. Additionally, MSVC uses the ported version of
39+ * gettimeofday() that returns microsecond precision.
40+ *
41+ * On systems with only 10 bits of sub-millisecond precision, we still use
42+ * 1/4096 parts of a millisecond, but fill lower 2 bits with random numbers
43+ * (see generate_uuidv7() for details).
44+ *
45+ * SUBMS_MINIMAL_STEP_NS defines the minimum number of nanoseconds that guarantees
46+ * an increase in the UUID's clock precision.
47+ */
48+ #if defined(__darwin__ )|| defined(_MSC_VER )
49+ #define SUBMS_MINIMAL_STEP_BITS 10
50+ #else
51+ #define SUBMS_MINIMAL_STEP_BITS 12
52+ #endif
53+ #define SUBMS_BITS 12
54+ #define SUBMS_MINIMAL_STEP_NS ((NS_PER_MS / (1 << SUBMS_MINIMAL_STEP_BITS)) + 1)
55+
2656/* sortsupport for uuid */
2757typedef struct
2858{
@@ -37,6 +67,8 @@ static intuuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2);
3767static int uuid_fast_cmp (Datum x ,Datum y ,SortSupport ssup );
3868static bool uuid_abbrev_abort (int memtupcount ,SortSupport ssup );
3969static Datum uuid_abbrev_convert (Datum original ,SortSupport ssup );
70+ static inline void uuid_set_version (pg_uuid_t * uuid ,unsignedchar version );
71+ static inline int64 get_real_time_ns_ascending ();
4072
4173Datum
4274uuid_in (PG_FUNCTION_ARGS )
@@ -401,6 +433,25 @@ uuid_hash_extended(PG_FUNCTION_ARGS)
401433return hash_any_extended (key -> data ,UUID_LEN ,PG_GETARG_INT64 (1 ));
402434}
403435
436+ /*
437+ * Set the given UUID version and the variant bits
438+ */
439+ static inline void
440+ uuid_set_version (pg_uuid_t * uuid ,unsignedchar version )
441+ {
442+ /* set version field, top four bits */
443+ uuid -> data [6 ]= (uuid -> data [6 ]& 0x0f ) | (version <<4 );
444+
445+ /* set variant field, top two bits are 1, 0 */
446+ uuid -> data [8 ]= (uuid -> data [8 ]& 0x3f ) |0x80 ;
447+ }
448+
449+ /*
450+ * Generate UUID version 4.
451+ *
452+ * All UUID bytes are filled with strong random numbers except version and
453+ * variant bits.
454+ */
404455Datum
405456gen_random_uuid (PG_FUNCTION_ARGS )
406457{
@@ -412,21 +463,183 @@ gen_random_uuid(PG_FUNCTION_ARGS)
412463errmsg ("could not generate random values" )));
413464
414465/*
415- * Set magic numbers for a "version 4" (pseudorandom) UUID, see
416- *http ://tools .ietf.org/html/rfc4122#section-4. 4
466+ * Set magic numbers for a "version 4" (pseudorandom) UUID and variant,
467+ *see https ://datatracker .ietf.org/doc/ html/rfc9562#name-uuid-version- 4
417468 */
418- uuid -> data [6 ]= (uuid -> data [6 ]& 0x0f ) |0x40 ;/* time_hi_and_version */
419- uuid -> data [8 ]= (uuid -> data [8 ]& 0x3f ) |0x80 ;/* clock_seq_hi_and_reserved */
469+ uuid_set_version (uuid ,4 );
420470
421471PG_RETURN_UUID_P (uuid );
422472}
423473
424- #define UUIDV1_EPOCH_JDATE 2299161/* == date2j(1582,10,15) */
474+ /*
475+ * Get the current timestamp with nanosecond precision for UUID generation.
476+ * The returned timestamp is ensured to be at least SUBMS_MINIMAL_STEP greater
477+ * than the previous returned timestamp (on this backend).
478+ */
479+ static inline int64
480+ get_real_time_ns_ascending ()
481+ {
482+ static int64 previous_ns = 0 ;
483+ int64 ns ;
484+
485+ /* Get the current real timestamp */
486+
487+ #ifdef _MSC_VER
488+ struct timeval tmp ;
489+
490+ gettimeofday (& tmp ,NULL );
491+ ns = tmp .tv_sec * NS_PER_S + tmp .tv_usec * NS_PER_US ;
492+ #else
493+ struct timespec tmp ;
494+
495+ /*
496+ * We don't use gettimeofday(), instead use clock_gettime() with
497+ * CLOCK_REALTIME where available in order to get a high-precision
498+ * (nanoseconds) real timestamp.
499+ *
500+ * Note while a timestamp returned by clock_gettime() with CLOCK_REALTIME
501+ * is nanosecond-precision on most Unix-like platforms, on some platforms
502+ * such as macOS it's restricted to microsecond-precision.
503+ */
504+ clock_gettime (CLOCK_REALTIME ,& tmp );
505+ ns = tmp .tv_sec * NS_PER_S + tmp .tv_nsec ;
506+ #endif
507+
508+ /* Guarantee the minimal step advancement of the timestamp */
509+ if (previous_ns + SUBMS_MINIMAL_STEP_NS >=ns )
510+ ns = previous_ns + SUBMS_MINIMAL_STEP_NS ;
511+ previous_ns = ns ;
512+
513+ return ns ;
514+ }
515+
516+ /*
517+ * Generate UUID version 7 per RFC 9562, with the given timestamp.
518+ *
519+ * UUID version 7 consists of a Unix timestamp in milliseconds (48 bits) and
520+ * 74 random bits, excluding the required version and variant bits. To ensure
521+ * monotonicity in scenarios of high-frequency UUID generation, we employ the
522+ * method "Replace Leftmost Random Bits with Increased Clock Precision (Method 3)",
523+ * described in the RFC. This method utilizes 12 bits from the "rand_a" bits
524+ * to store a 1/4096 (or 2^12) fraction of sub-millisecond precision.
525+ *
526+ * ns is a number of nanoseconds since start of the UNIX epoch. This value is
527+ * used for time-dependent bits of UUID.
528+ */
529+ static pg_uuid_t *
530+ generate_uuidv7 (int64 ns )
531+ {
532+ pg_uuid_t * uuid = palloc (UUID_LEN );
533+ int64 unix_ts_ms ;
534+ int32 increased_clock_precision ;
535+
536+ unix_ts_ms = ns /NS_PER_MS ;
537+
538+ /* Fill in time part */
539+ uuid -> data [0 ]= (unsignedchar ) (unix_ts_ms >>40 );
540+ uuid -> data [1 ]= (unsignedchar ) (unix_ts_ms >>32 );
541+ uuid -> data [2 ]= (unsignedchar ) (unix_ts_ms >>24 );
542+ uuid -> data [3 ]= (unsignedchar ) (unix_ts_ms >>16 );
543+ uuid -> data [4 ]= (unsignedchar ) (unix_ts_ms >>8 );
544+ uuid -> data [5 ]= (unsignedchar )unix_ts_ms ;
545+
546+ /*
547+ * sub-millisecond timestamp fraction (SUBMS_BITS bits, not
548+ * SUBMS_MINIMAL_STEP_BITS)
549+ */
550+ increased_clock_precision = ((ns %NS_PER_MS )* (1 <<SUBMS_BITS )) /NS_PER_MS ;
551+
552+ /* Fill the increased clock precision to "rand_a" bits */
553+ uuid -> data [6 ]= (unsignedchar ) (increased_clock_precision >>8 );
554+ uuid -> data [7 ]= (unsignedchar ) (increased_clock_precision );
555+
556+ /* fill everything after the increased clock precision with random bytes */
557+ if (!pg_strong_random (& uuid -> data [8 ],UUID_LEN - 8 ))
558+ ereport (ERROR ,
559+ (errcode (ERRCODE_INTERNAL_ERROR ),
560+ errmsg ("could not generate random values" )));
561+
562+ #if SUBMS_MINIMAL_STEP_BITS == 10
563+
564+ /*
565+ * On systems that have only 10 bits of sub-ms precision, 2 least
566+ * significant are dependent on other time-specific bits, and they do not
567+ * contribute to uniqueness. To make these bit random we mix in two bits
568+ * from CSPRNG. SUBMS_MINIMAL_STEP is chosen so that we still guarantee
569+ * monotonicity despite altering these bits.
570+ */
571+ uuid -> data [7 ]= uuid -> data [7 ] ^ (uuid -> data [8 ] >>6 );
572+ #endif
573+
574+ /*
575+ * Set magic numbers for a "version 7" (pseudorandom) UUID and variant,
576+ * see https://www.rfc-editor.org/rfc/rfc9562#name-version-field
577+ */
578+ uuid_set_version (uuid ,7 );
579+
580+ return uuid ;
581+ }
582+
583+ /*
584+ * Generate UUID version 7 with the current timestamp.
585+ */
586+ Datum
587+ uuidv7 (PG_FUNCTION_ARGS )
588+ {
589+ pg_uuid_t * uuid = generate_uuidv7 (get_real_time_ns_ascending ());
590+
591+ PG_RETURN_UUID_P (uuid );
592+ }
593+
594+ /*
595+ * Similar to uuidv7() but with the timestamp adjusted by the given interval.
596+ */
597+ Datum
598+ uuidv7_interval (PG_FUNCTION_ARGS )
599+ {
600+ Interval * shift = PG_GETARG_INTERVAL_P (0 );
601+ TimestampTz ts ;
602+ pg_uuid_t * uuid ;
603+ int64 ns = get_real_time_ns_ascending ();
604+
605+ /*
606+ * Shift the current timestamp by the given interval. To calculate time
607+ * shift correctly, we convert the UNIX epoch to TimestampTz and use
608+ * timestamptz_pl_interval(). Since this calculation is done with
609+ * microsecond precision, we carry nanoseconds from original ns value to
610+ * shifted ns value.
611+ */
612+
613+ ts = (TimestampTz ) (ns /NS_PER_US )-
614+ (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE )* SECS_PER_DAY * USECS_PER_SEC ;
615+
616+ /* Compute time shift */
617+ ts = DatumGetTimestampTz (DirectFunctionCall2 (timestamptz_pl_interval ,
618+ TimestampTzGetDatum (ts ),
619+ IntervalPGetDatum (shift )));
620+
621+ /*
622+ * Convert a TimestampTz value back to an UNIX epoch and back nanoseconds.
623+ */
624+ ns = (ts + (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE )* SECS_PER_DAY * USECS_PER_SEC )
625+ * NS_PER_US + ns %NS_PER_US ;
626+
627+ /* Generate an UUIDv7 */
628+ uuid = generate_uuidv7 (ns );
629+
630+ PG_RETURN_UUID_P (uuid );
631+ }
632+
633+ /*
634+ * Start of a Gregorian epoch == date2j(1582,10,15)
635+ * We cast it to 64-bit because it's used in overflow-prone computations
636+ */
637+ #define GREGORIAN_EPOCH_JDATE INT64CONST(2299161)
425638
426639/*
427640 * Extract timestamp from UUID.
428641 *
429- * Returns null if not RFC4122 variant or not a version that has a timestamp.
642+ * Returns null if not RFC9562 variant or not a version that has a timestamp.
430643 */
431644Datum
432645uuid_extract_timestamp (PG_FUNCTION_ARGS )
@@ -436,7 +649,7 @@ uuid_extract_timestamp(PG_FUNCTION_ARGS)
436649uint64 tms ;
437650TimestampTz ts ;
438651
439- /* check if RFC4122 variant */
652+ /* check if RFC9562 variant */
440653if ((uuid -> data [8 ]& 0xc0 )!= 0x80 )
441654PG_RETURN_NULL ();
442655
@@ -455,7 +668,22 @@ uuid_extract_timestamp(PG_FUNCTION_ARGS)
455668
456669/* convert 100-ns intervals to us, then adjust */
457670ts = (TimestampTz ) (tms /10 )-
458- ((uint64 )POSTGRES_EPOCH_JDATE - UUIDV1_EPOCH_JDATE )* SECS_PER_DAY * USECS_PER_SEC ;
671+ ((uint64 )POSTGRES_EPOCH_JDATE - GREGORIAN_EPOCH_JDATE )* SECS_PER_DAY * USECS_PER_SEC ;
672+ PG_RETURN_TIMESTAMPTZ (ts );
673+ }
674+
675+ if (version == 7 )
676+ {
677+ tms = (uuid -> data [5 ])
678+ + (((uint64 )uuid -> data [4 ]) <<8 )
679+ + (((uint64 )uuid -> data [3 ]) <<16 )
680+ + (((uint64 )uuid -> data [2 ]) <<24 )
681+ + (((uint64 )uuid -> data [1 ]) <<32 )
682+ + (((uint64 )uuid -> data [0 ]) <<40 );
683+
684+ /* convert ms to us, then adjust */
685+ ts = (TimestampTz ) (tms * NS_PER_US )-
686+ (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE )* SECS_PER_DAY * USECS_PER_SEC ;
459687
460688PG_RETURN_TIMESTAMPTZ (ts );
461689}
@@ -467,15 +695,15 @@ uuid_extract_timestamp(PG_FUNCTION_ARGS)
467695/*
468696 * Extract UUID version.
469697 *
470- * Returns null if not RFC4122 variant.
698+ * Returns null if not RFC9562 variant.
471699 */
472700Datum
473701uuid_extract_version (PG_FUNCTION_ARGS )
474702{
475703pg_uuid_t * uuid = PG_GETARG_UUID_P (0 );
476704uint16 version ;
477705
478- /* check if RFC4122 variant */
706+ /* check if RFC9562 variant */
479707if ((uuid -> data [8 ]& 0xc0 )!= 0x80 )
480708PG_RETURN_NULL ();
481709