Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitb2a1de9

Browse files
authored
feat: fetch prebuilds metrics state in background (#17792)
`Collect()` is called whenever the `/metrics` endpoint is hit toretrieve metrics.The queries used in prebuilds metrics collection are quite heavy, and wewant to avoid having them running concurrently / too often to keep dbload down.Here I'm moving towards a background retrieval of the state required toset the metrics, which gets invalidated every interval.Also introduces `coderd_prebuilt_workspaces_metrics_last_updated` whichoperators can use to determine when these metrics go stale.See#17789 as well.---------Signed-off-by: Danny Kopping <dannykopping@gmail.com>
1 parent709445e commitb2a1de9

File tree

3 files changed

+109
-24
lines changed

3 files changed

+109
-24
lines changed

‎enterprise/coderd/prebuilds/metricscollector.go

Lines changed: 86 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,17 @@ package prebuilds
22

33
import (
44
"context"
5+
"fmt"
6+
"sync/atomic"
57
"time"
68

7-
"cdr.dev/slog"
8-
99
"github.com/prometheus/client_golang/prometheus"
10+
"golang.org/x/xerrors"
11+
12+
"cdr.dev/slog"
1013

1114
"github.com/coder/coder/v2/coderd/database"
12-
"github.com/coder/coder/v2/coderd/database/dbauthz"
15+
"github.com/coder/coder/v2/coderd/database/dbtime"
1316
"github.com/coder/coder/v2/coderd/prebuilds"
1417
)
1518

@@ -55,20 +58,34 @@ var (
5558
labels,
5659
nil,
5760
)
61+
lastUpdateDesc=prometheus.NewDesc(
62+
"coderd_prebuilt_workspaces_metrics_last_updated",
63+
"The unix timestamp when the metrics related to prebuilt workspaces were last updated; these metrics are cached.",
64+
[]string{},
65+
nil,
66+
)
67+
)
68+
69+
const (
70+
metricsUpdateInterval=time.Second*15
71+
metricsUpdateTimeout=time.Second*10
5872
)
5973

6074
typeMetricsCollectorstruct {
6175
database database.Store
6276
logger slog.Logger
6377
snapshotter prebuilds.StateSnapshotter
78+
79+
latestState atomic.Pointer[metricsState]
6480
}
6581

6682
var_ prometheus.Collector=new(MetricsCollector)
6783

6884
funcNewMetricsCollector(db database.Store,logger slog.Logger,snapshotter prebuilds.StateSnapshotter)*MetricsCollector {
85+
log:=logger.Named("prebuilds_metrics_collector")
6986
return&MetricsCollector{
7087
database:db,
71-
logger:logger.Named("prebuilds_metrics_collector"),
88+
logger:log,
7289
snapshotter:snapshotter,
7390
}
7491
}
@@ -80,38 +97,34 @@ func (*MetricsCollector) Describe(descCh chan<- *prometheus.Desc) {
8097
descCh<-desiredPrebuildsDesc
8198
descCh<-runningPrebuildsDesc
8299
descCh<-eligiblePrebuildsDesc
100+
descCh<-lastUpdateDesc
83101
}
84102

103+
// Collect uses the cached state to set configured metrics.
104+
// The state is cached because this function can be called multiple times per second and retrieving the current state
105+
// is an expensive operation.
85106
func (mc*MetricsCollector)Collect(metricsChchan<- prometheus.Metric) {
86-
// nolint:gocritic // We need to set an authz context to read metrics from the db.
87-
ctx,cancel:=context.WithTimeout(dbauthz.AsPrebuildsOrchestrator(context.Background()),10*time.Second)
88-
defercancel()
89-
prebuildMetrics,err:=mc.database.GetPrebuildMetrics(ctx)
90-
iferr!=nil {
91-
mc.logger.Error(ctx,"failed to get prebuild metrics",slog.Error(err))
107+
currentState:=mc.latestState.Load()// Grab a copy; it's ok if it goes stale during the course of this func.
108+
ifcurrentState==nil {
109+
mc.logger.Warn(context.Background(),"failed to set prebuilds metrics; state not set")
110+
metricsCh<-prometheus.MustNewConstMetric(lastUpdateDesc,prometheus.GaugeValue,0)
92111
return
93112
}
94113

95-
for_,metric:=rangeprebuildMetrics {
114+
for_,metric:=rangecurrentState.prebuildMetrics {
96115
metricsCh<-prometheus.MustNewConstMetric(createdPrebuildsDesc,prometheus.CounterValue,float64(metric.CreatedCount),metric.TemplateName,metric.PresetName,metric.OrganizationName)
97116
metricsCh<-prometheus.MustNewConstMetric(failedPrebuildsDesc,prometheus.CounterValue,float64(metric.FailedCount),metric.TemplateName,metric.PresetName,metric.OrganizationName)
98117
metricsCh<-prometheus.MustNewConstMetric(claimedPrebuildsDesc,prometheus.CounterValue,float64(metric.ClaimedCount),metric.TemplateName,metric.PresetName,metric.OrganizationName)
99118
}
100119

101-
snapshot,err:=mc.snapshotter.SnapshotState(ctx,mc.database)
102-
iferr!=nil {
103-
mc.logger.Error(ctx,"failed to get latest prebuild state",slog.Error(err))
104-
return
105-
}
106-
107-
for_,preset:=rangesnapshot.Presets {
120+
for_,preset:=rangecurrentState.snapshot.Presets {
108121
if!preset.UsingActiveVersion {
109122
continue
110123
}
111124

112-
presetSnapshot,err:=snapshot.FilterByPreset(preset.ID)
125+
presetSnapshot,err:=currentState.snapshot.FilterByPreset(preset.ID)
113126
iferr!=nil {
114-
mc.logger.Error(ctx,"failed to filter by preset",slog.Error(err))
127+
mc.logger.Error(context.Background(),"failed to filter by preset",slog.Error(err))
115128
continue
116129
}
117130
state:=presetSnapshot.CalculateState()
@@ -120,4 +133,57 @@ func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) {
120133
metricsCh<-prometheus.MustNewConstMetric(runningPrebuildsDesc,prometheus.GaugeValue,float64(state.Actual),preset.TemplateName,preset.Name,preset.OrganizationName)
121134
metricsCh<-prometheus.MustNewConstMetric(eligiblePrebuildsDesc,prometheus.GaugeValue,float64(state.Eligible),preset.TemplateName,preset.Name,preset.OrganizationName)
122135
}
136+
137+
metricsCh<-prometheus.MustNewConstMetric(lastUpdateDesc,prometheus.GaugeValue,float64(currentState.createdAt.Unix()))
138+
}
139+
140+
typemetricsStatestruct {
141+
prebuildMetrics []database.GetPrebuildMetricsRow
142+
snapshot*prebuilds.GlobalSnapshot
143+
createdAt time.Time
144+
}
145+
146+
// BackgroundFetch updates the metrics state every given interval.
147+
func (mc*MetricsCollector)BackgroundFetch(ctx context.Context,updateInterval,updateTimeout time.Duration) {
148+
tick:=time.NewTicker(time.Nanosecond)
149+
defertick.Stop()
150+
151+
for {
152+
select {
153+
case<-ctx.Done():
154+
return
155+
case<-tick.C:
156+
// Tick immediately, then set regular interval.
157+
tick.Reset(updateInterval)
158+
159+
iferr:=mc.UpdateState(ctx,updateTimeout);err!=nil {
160+
mc.logger.Error(ctx,"failed to update prebuilds metrics state",slog.Error(err))
161+
}
162+
}
163+
}
164+
}
165+
166+
// UpdateState builds the current metrics state.
167+
func (mc*MetricsCollector)UpdateState(ctx context.Context,timeout time.Duration)error {
168+
start:=time.Now()
169+
fetchCtx,fetchCancel:=context.WithTimeout(ctx,timeout)
170+
deferfetchCancel()
171+
172+
prebuildMetrics,err:=mc.database.GetPrebuildMetrics(fetchCtx)
173+
iferr!=nil {
174+
returnxerrors.Errorf("fetch prebuild metrics: %w",err)
175+
}
176+
177+
snapshot,err:=mc.snapshotter.SnapshotState(fetchCtx,mc.database)
178+
iferr!=nil {
179+
returnxerrors.Errorf("snapshot state: %w",err)
180+
}
181+
mc.logger.Debug(ctx,"fetched prebuilds metrics state",slog.F("duration_secs",fmt.Sprintf("%.2f",time.Since(start).Seconds())))
182+
183+
mc.latestState.Store(&metricsState{
184+
prebuildMetrics:prebuildMetrics,
185+
snapshot:snapshot,
186+
createdAt:dbtime.Now(),
187+
})
188+
returnnil
123189
}

‎enterprise/coderd/prebuilds/metricscollector_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"github.com/coder/quartz"
1717

1818
"github.com/coder/coder/v2/coderd/database"
19+
"github.com/coder/coder/v2/coderd/database/dbauthz"
1920
"github.com/coder/coder/v2/coderd/database/dbgen"
2021
"github.com/coder/coder/v2/coderd/database/dbtestutil"
2122
agplprebuilds"github.com/coder/coder/v2/coderd/prebuilds"
@@ -248,6 +249,10 @@ func TestMetricsCollector(t *testing.T) {
248249
setupTestDBWorkspaceAgent(t,db,workspace.ID,eligible)
249250
}
250251

252+
// Force an update to the metrics state to allow the collector to collect fresh metrics.
253+
// nolint:gocritic // Authz context needed to retrieve state.
254+
require.NoError(t,collector.UpdateState(dbauthz.AsPrebuildsOrchestrator(ctx),testutil.WaitLong))
255+
251256
metricsFamilies,err:=registry.Gather()
252257
require.NoError(t,err)
253258

‎enterprise/coderd/prebuilds/reconcile.go

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"database/sql"
66
"fmt"
77
"math"
8+
"sync"
89
"sync/atomic"
910
"time"
1011

@@ -67,10 +68,12 @@ func NewStoreReconciler(store database.Store,
6768
provisionNotifyCh:make(chan database.ProvisionerJob,10),
6869
}
6970

70-
reconciler.metrics=NewMetricsCollector(store,logger,reconciler)
71-
iferr:=registerer.Register(reconciler.metrics);err!=nil {
72-
// If the registerer fails to register the metrics collector, it's not fatal.
73-
logger.Error(context.Background(),"failed to register prometheus metrics",slog.Error(err))
71+
ifregisterer!=nil {
72+
reconciler.metrics=NewMetricsCollector(store,logger,reconciler)
73+
iferr:=registerer.Register(reconciler.metrics);err!=nil {
74+
// If the registerer fails to register the metrics collector, it's not fatal.
75+
logger.Error(context.Background(),"failed to register prometheus metrics",slog.Error(err))
76+
}
7477
}
7578

7679
returnreconciler
@@ -87,16 +90,27 @@ func (c *StoreReconciler) Run(ctx context.Context) {
8790
slog.F("backoff_interval",c.cfg.ReconciliationBackoffInterval.String()),
8891
slog.F("backoff_lookback",c.cfg.ReconciliationBackoffLookback.String()))
8992

93+
varwg sync.WaitGroup
9094
ticker:=c.clock.NewTicker(reconciliationInterval)
9195
deferticker.Stop()
9296
deferfunc() {
97+
wg.Wait()
9398
c.done<-struct{}{}
9499
}()
95100

96101
// nolint:gocritic // Reconciliation Loop needs Prebuilds Orchestrator permissions.
97102
ctx,cancel:=context.WithCancelCause(dbauthz.AsPrebuildsOrchestrator(ctx))
98103
c.cancelFn=cancel
99104

105+
// Start updating metrics in the background.
106+
ifc.metrics!=nil {
107+
wg.Add(1)
108+
gofunc() {
109+
deferwg.Done()
110+
c.metrics.BackgroundFetch(ctx,metricsUpdateInterval,metricsUpdateTimeout)
111+
}()
112+
}
113+
100114
// Everything is in place, reconciler can now be considered as running.
101115
//
102116
// NOTE: without this atomic bool, Stop might race with Run for the c.cancelFn above.

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp