Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitf0b3617

Browse files
committed
feat: add prebuilds reconciliation duration metric
1 parent0d765f5 commitf0b3617

File tree

5 files changed

+119
-31
lines changed

5 files changed

+119
-31
lines changed

‎coderd/prebuilds/api.go‎

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,18 @@ type ReconciliationOrchestrator interface {
3737
TrackResourceReplacement(ctx context.Context,workspaceID,buildID uuid.UUID,replacements []*sdkproto.ResourceReplacement)
3838
}
3939

40+
// ReconcileStats contains statistics about a reconciliation cycle.
41+
typeReconcileStatsstruct {
42+
Elapsed time.Duration
43+
}
44+
4045
typeReconcilerinterface {
4146
StateSnapshotter
4247

4348
// ReconcileAll orchestrates the reconciliation of all prebuilds across all templates.
4449
// It takes a global snapshot of the system state and then reconciles each preset
4550
// in parallel, creating or deleting prebuilds as needed to reach their desired states.
46-
ReconcileAll(ctx context.Context)error
51+
ReconcileAll(ctx context.Context)(ReconcileStats,error)
4752
}
4853

4954
// StateSnapshotter defines the operations necessary to capture workspace prebuilds state.

‎coderd/prebuilds/noop.go‎

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@ func (NoopReconciler) Run(context.Context) {}
1717
func (NoopReconciler)Stop(context.Context,error) {}
1818
func (NoopReconciler)TrackResourceReplacement(context.Context, uuid.UUID, uuid.UUID, []*sdkproto.ResourceReplacement) {
1919
}
20-
func (NoopReconciler)ReconcileAll(context.Context)error {returnnil }
20+
21+
func (NoopReconciler)ReconcileAll(context.Context) (ReconcileStats,error) {
22+
returnReconcileStats{},nil
23+
}
24+
2125
func (NoopReconciler)SnapshotState(context.Context, database.Store) (*GlobalSnapshot,error) {
2226
return&GlobalSnapshot{},nil
2327
}

‎enterprise/coderd/prebuilds/metricscollector_test.go‎

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,7 @@ func TestMetricsCollector_ReconciliationPausedMetric(t *testing.T) {
485485
require.NoError(t,err)
486486

487487
// Run reconciliation to update the metric
488-
err=reconciler.ReconcileAll(ctx)
488+
_,err=reconciler.ReconcileAll(ctx)
489489
require.NoError(t,err)
490490

491491
// Check that the metric shows reconciliation is not paused
@@ -514,7 +514,7 @@ func TestMetricsCollector_ReconciliationPausedMetric(t *testing.T) {
514514
require.NoError(t,err)
515515

516516
// Run reconciliation to update the metric
517-
err=reconciler.ReconcileAll(ctx)
517+
_,err=reconciler.ReconcileAll(ctx)
518518
require.NoError(t,err)
519519

520520
// Check that the metric shows reconciliation is paused
@@ -543,7 +543,7 @@ func TestMetricsCollector_ReconciliationPausedMetric(t *testing.T) {
543543
require.NoError(t,err)
544544

545545
// Run reconciliation to update the metric
546-
err=reconciler.ReconcileAll(ctx)
546+
_,err=reconciler.ReconcileAll(ctx)
547547
require.NoError(t,err)
548548

549549
// Check that the metric shows reconciliation is not paused

‎enterprise/coderd/prebuilds/reconcile.go‎

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"github.com/google/uuid"
1616
"github.com/hashicorp/go-multierror"
1717
"github.com/prometheus/client_golang/prometheus"
18+
"github.com/prometheus/client_golang/prometheus/promauto"
1819
"golang.org/x/sync/errgroup"
1920
"golang.org/x/xerrors"
2021

@@ -44,7 +45,6 @@ type StoreReconciler struct {
4445
logger slog.Logger
4546
clock quartz.Clock
4647
registerer prometheus.Registerer
47-
metrics*MetricsCollector
4848
notifEnq notifications.Enqueuer
4949
buildUsageChecker*atomic.Pointer[wsbuilder.UsageChecker]
5050

@@ -53,6 +53,11 @@ type StoreReconciler struct {
5353
stopped atomic.Bool
5454
donechanstruct{}
5555
provisionNotifyChchan database.ProvisionerJob
56+
57+
// Prebuild state metrics
58+
metrics*MetricsCollector
59+
// Operational metrics
60+
reconciliationDuration prometheus.Histogram
5661
}
5762

5863
var_ prebuilds.ReconciliationOrchestrator=&StoreReconciler{}
@@ -87,6 +92,15 @@ func NewStoreReconciler(store database.Store,
8792
// If the registerer fails to register the metrics collector, it's not fatal.
8893
logger.Error(context.Background(),"failed to register prometheus metrics",slog.Error(err))
8994
}
95+
96+
factory:=promauto.With(registerer)
97+
reconciler.reconciliationDuration=factory.NewHistogram(prometheus.HistogramOpts{
98+
Namespace:"coderd",
99+
Subsystem:"prebuilds",
100+
Name:"reconciliation_duration_seconds",
101+
Help:"Duration of each prebuilds reconciliation cycle.",
102+
Buckets:prometheus.DefBuckets,
103+
})
90104
}
91105

92106
returnreconciler
@@ -158,10 +172,15 @@ func (c *StoreReconciler) Run(ctx context.Context) {
158172
// instead of waiting for the next reconciliation interval
159173
case<-ticker.C:
160174
// Trigger a new iteration on each tick.
161-
err:=c.ReconcileAll(ctx)
175+
stats,err:=c.ReconcileAll(ctx)
162176
iferr!=nil {
163177
c.logger.Error(context.Background(),"reconciliation failed",slog.Error(err))
164178
}
179+
180+
ifc.reconciliationDuration!=nil {
181+
c.reconciliationDuration.Observe(stats.Elapsed.Seconds())
182+
}
183+
c.logger.Debug(ctx,"reconciliation stats",slog.F("elapsed",stats.Elapsed))
165184
case<-ctx.Done():
166185
// nolint:gocritic // it's okay to use slog.F() for an error in this case
167186
// because we want to differentiate two different types of errors: ctx.Err() and context.Cause()
@@ -245,19 +264,24 @@ func (c *StoreReconciler) Stop(ctx context.Context, cause error) {
245264
// be reconciled again, leading to another workspace being provisioned. Two workspace builds will be occurring
246265
// simultaneously for the same preset, but once both jobs have completed the reconciliation loop will notice the
247266
// extraneous instance and delete it.
248-
func (c*StoreReconciler)ReconcileAll(ctx context.Context)error {
267+
func (c*StoreReconciler)ReconcileAll(ctx context.Context) (stats prebuilds.ReconcileStats,errerror) {
268+
start:=c.clock.Now()
269+
deferfunc() {
270+
stats.Elapsed=c.clock.Since(start)
271+
}()
272+
249273
logger:=c.logger.With(slog.F("reconcile_context","all"))
250274

251275
select {
252276
case<-ctx.Done():
253277
logger.Warn(context.Background(),"reconcile exiting prematurely; context done",slog.Error(ctx.Err()))
254-
returnnil
278+
returnstats,nil
255279
default:
256280
}
257281

258282
logger.Debug(ctx,"starting reconciliation")
259283

260-
err:=c.WithReconciliationLock(ctx,logger,func(ctx context.Context,_ database.Store)error {
284+
err=c.WithReconciliationLock(ctx,logger,func(ctx context.Context,_ database.Store)error {
261285
// Check if prebuilds reconciliation is paused
262286
settingsJSON,err:=c.store.GetPrebuildsSettings(ctx)
263287
iferr!=nil {
@@ -330,7 +354,7 @@ func (c *StoreReconciler) ReconcileAll(ctx context.Context) error {
330354
logger.Error(ctx,"failed to reconcile",slog.Error(err))
331355
}
332356

333-
returnerr
357+
returnstats,err
334358
}
335359

336360
func (c*StoreReconciler)reportHardLimitedPresets(snapshot*prebuilds.GlobalSnapshot) {

‎enterprise/coderd/prebuilds/reconcile_test.go‎

Lines changed: 75 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,8 @@ func TestNoReconciliationActionsIfNoPresets(t *testing.T) {
7272
require.Equal(t,templateVersion,gotTemplateVersion)
7373

7474
// when we trigger the reconciliation loop for all templates
75-
require.NoError(t,controller.ReconcileAll(ctx))
75+
_,err=controller.ReconcileAll(ctx)
76+
require.NoError(t,err)
7677

7778
// then no reconciliation actions are taken
7879
// because without presets, there are no prebuilds
@@ -126,7 +127,8 @@ func TestNoReconciliationActionsIfNoPrebuilds(t *testing.T) {
126127
require.NotEmpty(t,presetParameters)
127128

128129
// when we trigger the reconciliation loop for all templates
129-
require.NoError(t,controller.ReconcileAll(ctx))
130+
_,err=controller.ReconcileAll(ctx)
131+
require.NoError(t,err)
130132

131133
// then no reconciliation actions are taken
132134
// because without prebuilds, there is nothing to reconcile
@@ -425,7 +427,8 @@ func (tc testCase) run(t *testing.T) {
425427
// Run the reconciliation multiple times to ensure idempotency
426428
// 8 was arbitrary, but large enough to reasonably trust the result
427429
fori:=1;i<=8;i++ {
428-
require.NoErrorf(t,controller.ReconcileAll(ctx),"failed on iteration %d",i)
430+
_,err:=controller.ReconcileAll(ctx)
431+
require.NoErrorf(t,err,"failed on iteration %d",i)
429432

430433
iftc.shouldCreateNewPrebuild!=nil {
431434
newPrebuildCount:=0
@@ -539,7 +542,8 @@ func TestMultiplePresetsPerTemplateVersion(t *testing.T) {
539542
// Run the reconciliation multiple times to ensure idempotency
540543
// 8 was arbitrary, but large enough to reasonably trust the result
541544
fori:=1;i<=8;i++ {
542-
require.NoErrorf(t,controller.ReconcileAll(ctx),"failed on iteration %d",i)
545+
_,err:=controller.ReconcileAll(ctx)
546+
require.NoErrorf(t,err,"failed on iteration %d",i)
543547

544548
newPrebuildCount:=0
545549
workspaces,err:=db.GetWorkspacesByTemplateID(ctx,template.ID)
@@ -665,7 +669,7 @@ func TestPrebuildScheduling(t *testing.T) {
665669
DesiredInstances:5,
666670
})
667671

668-
err:=controller.ReconcileAll(ctx)
672+
_,err:=controller.ReconcileAll(ctx)
669673
require.NoError(t,err)
670674

671675
// get workspace builds
@@ -748,7 +752,8 @@ func TestInvalidPreset(t *testing.T) {
748752
// Run the reconciliation multiple times to ensure idempotency
749753
// 8 was arbitrary, but large enough to reasonably trust the result
750754
fori:=1;i<=8;i++ {
751-
require.NoErrorf(t,controller.ReconcileAll(ctx),"failed on iteration %d",i)
755+
_,err:=controller.ReconcileAll(ctx)
756+
require.NoErrorf(t,err,"failed on iteration %d",i)
752757

753758
workspaces,err:=db.GetWorkspacesByTemplateID(ctx,template.ID)
754759
require.NoError(t,err)
@@ -814,7 +819,8 @@ func TestDeletionOfPrebuiltWorkspaceWithInvalidPreset(t *testing.T) {
814819
})
815820

816821
// Old prebuilt workspace should be deleted.
817-
require.NoError(t,controller.ReconcileAll(ctx))
822+
_,err=controller.ReconcileAll(ctx)
823+
require.NoError(t,err)
818824

819825
builds,err:=db.GetWorkspaceBuildsByWorkspaceID(ctx, database.GetWorkspaceBuildsByWorkspaceIDParams{
820826
WorkspaceID:prebuiltWorkspace.ID,
@@ -913,12 +919,15 @@ func TestSkippingHardLimitedPresets(t *testing.T) {
913919

914920
// Trigger reconciliation to attempt creating a new prebuild.
915921
// The outcome depends on whether the hard limit has been reached.
916-
require.NoError(t,controller.ReconcileAll(ctx))
922+
_,err=controller.ReconcileAll(ctx)
923+
require.NoError(t,err)
917924

918925
// These two additional calls to ReconcileAll should not trigger any notifications.
919926
// A notification is only sent once.
920-
require.NoError(t,controller.ReconcileAll(ctx))
921-
require.NoError(t,controller.ReconcileAll(ctx))
927+
_,err=controller.ReconcileAll(ctx)
928+
require.NoError(t,err)
929+
_,err=controller.ReconcileAll(ctx)
930+
require.NoError(t,err)
922931

923932
// Verify the final state after reconciliation.
924933
workspaces,err=db.GetWorkspacesByTemplateID(ctx,template.ID)
@@ -1090,12 +1099,15 @@ func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) {
10901099

10911100
// Trigger reconciliation to attempt creating a new prebuild.
10921101
// The outcome depends on whether the hard limit has been reached.
1093-
require.NoError(t,controller.ReconcileAll(ctx))
1102+
_,err=controller.ReconcileAll(ctx)
1103+
require.NoError(t,err)
10941104

10951105
// These two additional calls to ReconcileAll should not trigger any notifications.
10961106
// A notification is only sent once.
1097-
require.NoError(t,controller.ReconcileAll(ctx))
1098-
require.NoError(t,controller.ReconcileAll(ctx))
1107+
_,err=controller.ReconcileAll(ctx)
1108+
require.NoError(t,err)
1109+
_,err=controller.ReconcileAll(ctx)
1110+
require.NoError(t,err)
10991111

11001112
// Verify the final state after reconciliation.
11011113
// When hard limit is reached, no new workspace should be created.
@@ -1138,7 +1150,8 @@ func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) {
11381150
}
11391151

11401152
// Trigger reconciliation to make sure that successful, but outdated prebuilt workspace will be deleted.
1141-
require.NoError(t,controller.ReconcileAll(ctx))
1153+
_,err=controller.ReconcileAll(ctx)
1154+
require.NoError(t,err)
11421155

11431156
workspaces,err=db.GetWorkspacesByTemplateID(ctx,template.ID)
11441157
require.NoError(t,err)
@@ -1737,7 +1750,8 @@ func TestExpiredPrebuildsMultipleActions(t *testing.T) {
17371750
}
17381751

17391752
// Trigger reconciliation to process expired prebuilds and enforce desired state.
1740-
require.NoError(t,controller.ReconcileAll(ctx))
1753+
_,err=controller.ReconcileAll(ctx)
1754+
require.NoError(t,err)
17411755

17421756
// Sort non-expired workspaces by CreatedAt in ascending order (oldest first)
17431757
sort.Slice(nonExpiredWorkspaces,func(i,jint)bool {
@@ -2142,7 +2156,8 @@ func TestCancelPendingPrebuilds(t *testing.T) {
21422156
require.NoError(t,err)
21432157

21442158
// When: the reconciliation loop is triggered
2145-
require.NoError(t,reconciler.ReconcileAll(ctx))
2159+
_,err=reconciler.ReconcileAll(ctx)
2160+
require.NoError(t,err)
21462161

21472162
iftt.shouldCancel {
21482163
// Then: the prebuild related jobs from non-active version should be canceled
@@ -2306,7 +2321,8 @@ func TestCancelPendingPrebuilds(t *testing.T) {
23062321
templateBVersion3Pending:=setupPrebuilds(t,db,owner.OrganizationID,templateBID,templateBVersion3ID,templateBVersion3PresetID,1,true)
23072322

23082323
// When: the reconciliation loop is executed
2309-
require.NoError(t,reconciler.ReconcileAll(ctx))
2324+
_,err:=reconciler.ReconcileAll(ctx)
2325+
require.NoError(t,err)
23102326

23112327
// Then: template A version 1 running workspaces should not be canceled
23122328
checkIfJobCanceled(t,clock,ctx,db,false,templateAVersion1Running)
@@ -2328,6 +2344,45 @@ func TestCancelPendingPrebuilds(t *testing.T) {
23282344
})
23292345
}
23302346

2347+
funcTestReconciliationStats(t*testing.T) {
2348+
t.Parallel()
2349+
2350+
// Setup
2351+
clock:=quartz.NewReal()
2352+
db,ps:=dbtestutil.NewDB(t)
2353+
client,_,_:=coderdtest.NewWithAPI(t,&coderdtest.Options{
2354+
IncludeProvisionerDaemon:false,
2355+
Database:db,
2356+
Pubsub:ps,
2357+
Clock:clock,
2358+
})
2359+
fakeEnqueuer:=newFakeEnqueuer()
2360+
registry:=prometheus.NewRegistry()
2361+
cache:=files.New(registry,&coderdtest.FakeAuthorizer{})
2362+
logger:=slogtest.Make(t,&slogtest.Options{IgnoreErrors:false}).Leveled(slog.LevelDebug)
2363+
reconciler:=prebuilds.NewStoreReconciler(db,ps,cache, codersdk.PrebuildsConfig{},logger,clock,registry,fakeEnqueuer,newNoopUsageCheckerPtr())
2364+
owner:=coderdtest.CreateFirstUser(t,client)
2365+
2366+
ctx,cancel:=context.WithTimeout(context.Background(),testutil.WaitLong)
2367+
defercancel()
2368+
2369+
// Create a template version with a preset
2370+
dbfake.TemplateVersion(t,db).Seed(database.TemplateVersion{
2371+
OrganizationID:owner.OrganizationID,
2372+
CreatedBy:owner.UserID,
2373+
}).Preset(database.TemplateVersionPreset{
2374+
DesiredInstances: sql.NullInt32{
2375+
Int32:1,
2376+
Valid:true,
2377+
},
2378+
}).Do()
2379+
2380+
// Check reconciliation loop stats
2381+
stats,err:=reconciler.ReconcileAll(ctx)
2382+
require.NoError(t,err)
2383+
require.Greater(t,stats.Elapsed,time.Duration(0))
2384+
}
2385+
23312386
funcnewNoopEnqueuer()*notifications.NoopEnqueuer {
23322387
returnnotifications.NewNoopEnqueuer()
23332388
}
@@ -2822,7 +2877,7 @@ func TestReconciliationRespectsPauseSetting(t *testing.T) {
28222877
_=setupTestDBPreset(t,db,templateVersionID,2,"test")
28232878

28242879
// Initially, reconciliation should create prebuilds
2825-
err:=reconciler.ReconcileAll(ctx)
2880+
_,err:=reconciler.ReconcileAll(ctx)
28262881
require.NoError(t,err)
28272882

28282883
// Verify that prebuilds were created
@@ -2849,7 +2904,7 @@ func TestReconciliationRespectsPauseSetting(t *testing.T) {
28492904
require.Len(t,workspaces,0,"prebuilds should be deleted")
28502905

28512906
// Run reconciliation again - it should be paused and not recreate prebuilds
2852-
err=reconciler.ReconcileAll(ctx)
2907+
_,err=reconciler.ReconcileAll(ctx)
28532908
require.NoError(t,err)
28542909

28552910
// Verify that no new prebuilds were created because reconciliation is paused
@@ -2862,7 +2917,7 @@ func TestReconciliationRespectsPauseSetting(t *testing.T) {
28622917
require.NoError(t,err)
28632918

28642919
// Run reconciliation again - it should now recreate the prebuilds
2865-
err=reconciler.ReconcileAll(ctx)
2920+
_,err=reconciler.ReconcileAll(ctx)
28662921
require.NoError(t,err)
28672922

28682923
// Verify that prebuilds were recreated

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp