@@ -1404,16 +1404,16 @@ pgstat_ping(void)
14041404 * pgstat_send_inquiry() -
14051405 *
14061406 *Notify collector that we need fresh data.
1407- *ts specifies the minimum acceptable timestamp for the stats file.
14081407 * ----------
14091408 */
14101409static void
1411- pgstat_send_inquiry (TimestampTz ts )
1410+ pgstat_send_inquiry (TimestampTz clock_time , TimestampTz cutoff_time )
14121411{
14131412PgStat_MsgInquiry msg ;
14141413
14151414pgstat_setheader (& msg .m_hdr ,PGSTAT_MTYPE_INQUIRY );
1416- msg .inquiry_time = ts ;
1415+ msg .clock_time = clock_time ;
1416+ msg .cutoff_time = cutoff_time ;
14171417pgstat_send (& msg ,sizeof (msg ));
14181418}
14191419
@@ -3633,7 +3633,7 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
36333633
36343634/*
36353635 * Set the current timestamp (will be kept only in case we can't load an
3636- * existing statsfile.
3636+ * existing statsfile) .
36373637 */
36383638globalStats .stat_reset_timestamp = GetCurrentTimestamp ();
36393639
@@ -3922,53 +3922,98 @@ pgstat_read_statsfile_timestamp(bool permanent, TimestampTz *ts)
39223922static void
39233923backend_read_statsfile (void )
39243924{
3925- TimestampTz cur_ts ;
3926- TimestampTz min_ts ;
3925+ TimestampTz min_ts = 0 ;
3926+ TimestampTz ref_ts = 0 ;
39273927int count ;
39283928
39293929/* already read it? */
39303930if (pgStatDBHash )
39313931return ;
39323932Assert (!pgStatRunningInCollector );
39333933
3934- /*
3935- * We set the minimum acceptable timestamp to PGSTAT_STAT_INTERVAL msec
3936- * before now.This indirectly ensures that the collector needn't write
3937- * the file more often than PGSTAT_STAT_INTERVAL. In an autovacuum
3938- * worker, however, we want a lower delay to avoid using stale data, so we
3939- * use PGSTAT_RETRY_DELAY (since the number of worker is low, this
3940- * shouldn't be a problem).
3941- *
3942- * Note that we don't recompute min_ts after sleeping; so we might end up
3943- * accepting a file a bit older than PGSTAT_STAT_INTERVAL.In practice
3944- * that shouldn't happen, though, as long as the sleep time is less than
3945- * PGSTAT_STAT_INTERVAL; and we don't want to lie to the collector about
3946- * what our cutoff time really is.
3947- */
3948- cur_ts = GetCurrentTimestamp ();
3949- if (IsAutoVacuumWorkerProcess ())
3950- min_ts = TimestampTzPlusMilliseconds (cur_ts ,- PGSTAT_RETRY_DELAY );
3951- else
3952- min_ts = TimestampTzPlusMilliseconds (cur_ts ,- PGSTAT_STAT_INTERVAL );
3953-
39543934/*
39553935 * Loop until fresh enough stats file is available or we ran out of time.
39563936 * The stats inquiry message is sent repeatedly in case collector drops
39573937 * it; but not every single time, as that just swamps the collector.
39583938 */
39593939for (count = 0 ;count < PGSTAT_POLL_LOOP_COUNT ;count ++ )
39603940{
3941+ bool ok ;
39613942TimestampTz file_ts = 0 ;
3943+ TimestampTz cur_ts ;
39623944
39633945CHECK_FOR_INTERRUPTS ();
39643946
3965- if (pgstat_read_statsfile_timestamp (false,& file_ts )&&
3966- file_ts >=min_ts )
3947+ ok = pgstat_read_statsfile_timestamp (false,& file_ts );
3948+
3949+ cur_ts = GetCurrentTimestamp ();
3950+ /* Calculate min acceptable timestamp, if we didn't already */
3951+ if (count == 0 || cur_ts < ref_ts )
3952+ {
3953+ /*
3954+ * We set the minimum acceptable timestamp to PGSTAT_STAT_INTERVAL
3955+ * msec before now. This indirectly ensures that the collector
3956+ * needn't write the file more often than PGSTAT_STAT_INTERVAL.
3957+ * In an autovacuum worker, however, we want a lower delay to
3958+ * avoid using stale data, so we use PGSTAT_RETRY_DELAY (since the
3959+ * number of workers is low, this shouldn't be a problem).
3960+ *
3961+ * We don't recompute min_ts after sleeping, except in the
3962+ * unlikely case that cur_ts went backwards. So we might end up
3963+ * accepting a file a bit older than PGSTAT_STAT_INTERVAL. In
3964+ * practice that shouldn't happen, though, as long as the sleep
3965+ * time is less than PGSTAT_STAT_INTERVAL; and we don't want to
3966+ * tell the collector that our cutoff time is less than what we'd
3967+ * actually accept.
3968+ */
3969+ ref_ts = cur_ts ;
3970+ if (IsAutoVacuumWorkerProcess ())
3971+ min_ts = TimestampTzPlusMilliseconds (ref_ts ,
3972+ - PGSTAT_RETRY_DELAY );
3973+ else
3974+ min_ts = TimestampTzPlusMilliseconds (ref_ts ,
3975+ - PGSTAT_STAT_INTERVAL );
3976+ }
3977+
3978+ /*
3979+ * If the file timestamp is actually newer than cur_ts, we must have
3980+ * had a clock glitch (system time went backwards) or there is clock
3981+ * skew between our processor and the stats collector's processor.
3982+ * Accept the file, but send an inquiry message anyway to make
3983+ * pgstat_recv_inquiry do a sanity check on the collector's time.
3984+ */
3985+ if (ok && file_ts > cur_ts )
3986+ {
3987+ /*
3988+ * A small amount of clock skew between processors isn't terribly
3989+ * surprising, but a large difference is worth logging. We
3990+ * arbitrarily define "large" as 1000 msec.
3991+ */
3992+ if (file_ts >=TimestampTzPlusMilliseconds (cur_ts ,1000 ))
3993+ {
3994+ char * filetime ;
3995+ char * mytime ;
3996+
3997+ /* Copy because timestamptz_to_str returns a static buffer */
3998+ filetime = pstrdup (timestamptz_to_str (file_ts ));
3999+ mytime = pstrdup (timestamptz_to_str (cur_ts ));
4000+ elog (LOG ,"stats collector's time %s is later than backend local time %s" ,
4001+ filetime ,mytime );
4002+ pfree (filetime );
4003+ pfree (mytime );
4004+ }
4005+
4006+ pgstat_send_inquiry (cur_ts ,min_ts );
4007+ break ;
4008+ }
4009+
4010+ /* Normal acceptance case: file is not older than cutoff time */
4011+ if (ok && file_ts >=min_ts )
39674012break ;
39684013
39694014/* Not there or too old, so kick the collector and wait a bit */
39704015if ((count %PGSTAT_INQ_LOOP_COUNT )== 0 )
3971- pgstat_send_inquiry (min_ts );
4016+ pgstat_send_inquiry (cur_ts , min_ts );
39724017
39734018pg_usleep (PGSTAT_RETRY_DELAY * 1000L );
39744019}
@@ -4036,8 +4081,46 @@ pgstat_clear_snapshot(void)
40364081static void
40374082pgstat_recv_inquiry (PgStat_MsgInquiry * msg ,int len )
40384083{
4039- if (msg -> inquiry_time > last_statrequest )
4040- last_statrequest = msg -> inquiry_time ;
4084+ /*
4085+ * Advance last_statrequest if this requestor has a newer cutoff time
4086+ * than any previous request.
4087+ */
4088+ if (msg -> cutoff_time > last_statrequest )
4089+ last_statrequest = msg -> cutoff_time ;
4090+
4091+ /*
4092+ * If the requestor's local clock time is older than last_statwrite, we
4093+ * should suspect a clock glitch, ie system time going backwards; though
4094+ * the more likely explanation is just delayed message receipt. It is
4095+ * worth expending a GetCurrentTimestamp call to be sure, since a large
4096+ * retreat in the system clock reading could otherwise cause us to neglect
4097+ * to update the stats file for a long time.
4098+ */
4099+ if (msg -> clock_time < last_statwrite )
4100+ {
4101+ TimestampTz cur_ts = GetCurrentTimestamp ();
4102+
4103+ if (cur_ts < last_statwrite )
4104+ {
4105+ /*
4106+ * Sure enough, time went backwards. Force a new stats file write
4107+ * to get back in sync; but first, log a complaint.
4108+ */
4109+ char * writetime ;
4110+ char * mytime ;
4111+
4112+ /* Copy because timestamptz_to_str returns a static buffer */
4113+ writetime = pstrdup (timestamptz_to_str (last_statwrite ));
4114+ mytime = pstrdup (timestamptz_to_str (cur_ts ));
4115+ elog (LOG ,"last_statwrite %s is later than collector's time %s" ,
4116+ writetime ,mytime );
4117+ pfree (writetime );
4118+ pfree (mytime );
4119+
4120+ last_statrequest = cur_ts ;
4121+ last_statwrite = last_statrequest - 1 ;
4122+ }
4123+ }
40414124}
40424125
40434126