@@ -2,14 +2,17 @@ package prebuilds
22
33import (
44"context"
5+ "fmt"
6+ "sync/atomic"
57"time"
68
7- "cdr.dev/slog"
8-
99"github.com/prometheus/client_golang/prometheus"
10+ "golang.org/x/xerrors"
11+
12+ "cdr.dev/slog"
1013
1114"github.com/coder/coder/v2/coderd/database"
12- "github.com/coder/coder/v2/coderd/database/dbauthz "
15+ "github.com/coder/coder/v2/coderd/database/dbtime "
1316"github.com/coder/coder/v2/coderd/prebuilds"
1417)
1518
@@ -55,20 +58,34 @@ var (
5558labels ,
5659nil ,
5760)
61+ lastUpdateDesc = prometheus .NewDesc (
62+ "coderd_prebuilt_workspaces_metrics_last_updated" ,
63+ "The unix timestamp when the metrics related to prebuilt workspaces were last updated; these metrics are cached." ,
64+ []string {},
65+ nil ,
66+ )
67+ )
68+
69+ const (
70+ metricsUpdateInterval = time .Second * 15
71+ metricsUpdateTimeout = time .Second * 10
5872)
5973
6074type MetricsCollector struct {
6175database database.Store
6276logger slog.Logger
6377snapshotter prebuilds.StateSnapshotter
78+
79+ latestState atomic.Pointer [metricsState ]
6480}
6581
6682var _ prometheus.Collector = new (MetricsCollector )
6783
6884func NewMetricsCollector (db database.Store ,logger slog.Logger ,snapshotter prebuilds.StateSnapshotter )* MetricsCollector {
85+ log := logger .Named ("prebuilds_metrics_collector" )
6986return & MetricsCollector {
7087database :db ,
71- logger :logger . Named ( "prebuilds_metrics_collector" ) ,
88+ logger :log ,
7289snapshotter :snapshotter ,
7390}
7491}
@@ -80,38 +97,34 @@ func (*MetricsCollector) Describe(descCh chan<- *prometheus.Desc) {
8097descCh <- desiredPrebuildsDesc
8198descCh <- runningPrebuildsDesc
8299descCh <- eligiblePrebuildsDesc
100+ descCh <- lastUpdateDesc
83101}
84102
103+ // Collect uses the cached state to set configured metrics.
104+ // The state is cached because this function can be called multiple times per second and retrieving the current state
105+ // is an expensive operation.
85106func (mc * MetricsCollector )Collect (metricsCh chan <- prometheus.Metric ) {
86- // nolint:gocritic // We need to set an authz context to read metrics from the db.
87- ctx ,cancel := context .WithTimeout (dbauthz .AsPrebuildsOrchestrator (context .Background ()),10 * time .Second )
88- defer cancel ()
89- prebuildMetrics ,err := mc .database .GetPrebuildMetrics (ctx )
90- if err != nil {
91- mc .logger .Error (ctx ,"failed to get prebuild metrics" ,slog .Error (err ))
107+ currentState := mc .latestState .Load ()// Grab a copy; it's ok if it goes stale during the course of this func.
108+ if currentState == nil {
109+ mc .logger .Warn (context .Background (),"failed to set prebuilds metrics; state not set" )
110+ metricsCh <- prometheus .MustNewConstMetric (lastUpdateDesc ,prometheus .GaugeValue ,0 )
92111return
93112}
94113
95- for _ ,metric := range prebuildMetrics {
114+ for _ ,metric := range currentState . prebuildMetrics {
96115metricsCh <- prometheus .MustNewConstMetric (createdPrebuildsDesc ,prometheus .CounterValue ,float64 (metric .CreatedCount ),metric .TemplateName ,metric .PresetName ,metric .OrganizationName )
97116metricsCh <- prometheus .MustNewConstMetric (failedPrebuildsDesc ,prometheus .CounterValue ,float64 (metric .FailedCount ),metric .TemplateName ,metric .PresetName ,metric .OrganizationName )
98117metricsCh <- prometheus .MustNewConstMetric (claimedPrebuildsDesc ,prometheus .CounterValue ,float64 (metric .ClaimedCount ),metric .TemplateName ,metric .PresetName ,metric .OrganizationName )
99118}
100119
101- snapshot ,err := mc .snapshotter .SnapshotState (ctx ,mc .database )
102- if err != nil {
103- mc .logger .Error (ctx ,"failed to get latest prebuild state" ,slog .Error (err ))
104- return
105- }
106-
107- for _ ,preset := range snapshot .Presets {
120+ for _ ,preset := range currentState .snapshot .Presets {
108121if ! preset .UsingActiveVersion {
109122continue
110123}
111124
112- presetSnapshot ,err := snapshot .FilterByPreset (preset .ID )
125+ presetSnapshot ,err := currentState . snapshot .FilterByPreset (preset .ID )
113126if err != nil {
114- mc .logger .Error (ctx ,"failed to filter by preset" ,slog .Error (err ))
127+ mc .logger .Error (context . Background () ,"failed to filter by preset" ,slog .Error (err ))
115128continue
116129}
117130state := presetSnapshot .CalculateState ()
@@ -120,4 +133,57 @@ func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) {
120133metricsCh <- prometheus .MustNewConstMetric (runningPrebuildsDesc ,prometheus .GaugeValue ,float64 (state .Actual ),preset .TemplateName ,preset .Name ,preset .OrganizationName )
121134metricsCh <- prometheus .MustNewConstMetric (eligiblePrebuildsDesc ,prometheus .GaugeValue ,float64 (state .Eligible ),preset .TemplateName ,preset .Name ,preset .OrganizationName )
122135}
136+
137+ metricsCh <- prometheus .MustNewConstMetric (lastUpdateDesc ,prometheus .GaugeValue ,float64 (currentState .createdAt .Unix ()))
138+ }
139+
140+ type metricsState struct {
141+ prebuildMetrics []database.GetPrebuildMetricsRow
142+ snapshot * prebuilds.GlobalSnapshot
143+ createdAt time.Time
144+ }
145+
146+ // BackgroundFetch updates the metrics state every given interval.
147+ func (mc * MetricsCollector )BackgroundFetch (ctx context.Context ,updateInterval ,updateTimeout time.Duration ) {
148+ tick := time .NewTicker (time .Nanosecond )
149+ defer tick .Stop ()
150+
151+ for {
152+ select {
153+ case <- ctx .Done ():
154+ return
155+ case <- tick .C :
156+ // Tick immediately, then set regular interval.
157+ tick .Reset (updateInterval )
158+
159+ if err := mc .UpdateState (ctx ,updateTimeout );err != nil {
160+ mc .logger .Error (ctx ,"failed to update prebuilds metrics state" ,slog .Error (err ))
161+ }
162+ }
163+ }
164+ }
165+
166+ // UpdateState builds the current metrics state.
167+ func (mc * MetricsCollector )UpdateState (ctx context.Context ,timeout time.Duration )error {
168+ start := time .Now ()
169+ fetchCtx ,fetchCancel := context .WithTimeout (ctx ,timeout )
170+ defer fetchCancel ()
171+
172+ prebuildMetrics ,err := mc .database .GetPrebuildMetrics (fetchCtx )
173+ if err != nil {
174+ return xerrors .Errorf ("fetch prebuild metrics: %w" ,err )
175+ }
176+
177+ snapshot ,err := mc .snapshotter .SnapshotState (fetchCtx ,mc .database )
178+ if err != nil {
179+ return xerrors .Errorf ("snapshot state: %w" ,err )
180+ }
181+ mc .logger .Debug (ctx ,"fetched prebuilds metrics state" ,slog .F ("duration_secs" ,fmt .Sprintf ("%.2f" ,time .Since (start ).Seconds ())))
182+
183+ mc .latestState .Store (& metricsState {
184+ prebuildMetrics :prebuildMetrics ,
185+ snapshot :snapshot ,
186+ createdAt :dbtime .Now (),
187+ })
188+ return nil
123189}