Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit6424093

Browse files
authored
feat: add prebuilds reconciliation duration metric (#20535) (#20581)
Related to PR:#20535(cherry picked from commitaad1b40)
1 parent2cf4b5c commit6424093

File tree

5 files changed

+125
-31
lines changed

5 files changed

+125
-31
lines changed

‎coderd/prebuilds/api.go‎

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,18 @@ type ReconciliationOrchestrator interface {
3737
TrackResourceReplacement(ctx context.Context,workspaceID,buildID uuid.UUID,replacements []*sdkproto.ResourceReplacement)
3838
}
3939

40+
// ReconcileStats contains statistics about a reconciliation cycle.
41+
typeReconcileStatsstruct {
42+
Elapsed time.Duration
43+
}
44+
4045
typeReconcilerinterface {
4146
StateSnapshotter
4247

4348
// ReconcileAll orchestrates the reconciliation of all prebuilds across all templates.
4449
// It takes a global snapshot of the system state and then reconciles each preset
4550
// in parallel, creating or deleting prebuilds as needed to reach their desired states.
46-
ReconcileAll(ctx context.Context)error
51+
ReconcileAll(ctx context.Context)(ReconcileStats,error)
4752
}
4853

4954
// StateSnapshotter defines the operations necessary to capture workspace prebuilds state.

‎coderd/prebuilds/noop.go‎

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@ func (NoopReconciler) Run(context.Context) {}
1717
func (NoopReconciler)Stop(context.Context,error) {}
1818
func (NoopReconciler)TrackResourceReplacement(context.Context, uuid.UUID, uuid.UUID, []*sdkproto.ResourceReplacement) {
1919
}
20-
func (NoopReconciler)ReconcileAll(context.Context)error {returnnil }
20+
21+
func (NoopReconciler)ReconcileAll(context.Context) (ReconcileStats,error) {
22+
returnReconcileStats{},nil
23+
}
24+
2125
func (NoopReconciler)SnapshotState(context.Context, database.Store) (*GlobalSnapshot,error) {
2226
return&GlobalSnapshot{},nil
2327
}

‎enterprise/coderd/prebuilds/metricscollector_test.go‎

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,7 @@ func TestMetricsCollector_ReconciliationPausedMetric(t *testing.T) {
485485
require.NoError(t,err)
486486

487487
// Run reconciliation to update the metric
488-
err=reconciler.ReconcileAll(ctx)
488+
_,err=reconciler.ReconcileAll(ctx)
489489
require.NoError(t,err)
490490

491491
// Check that the metric shows reconciliation is not paused
@@ -514,7 +514,7 @@ func TestMetricsCollector_ReconciliationPausedMetric(t *testing.T) {
514514
require.NoError(t,err)
515515

516516
// Run reconciliation to update the metric
517-
err=reconciler.ReconcileAll(ctx)
517+
_,err=reconciler.ReconcileAll(ctx)
518518
require.NoError(t,err)
519519

520520
// Check that the metric shows reconciliation is paused
@@ -543,7 +543,7 @@ func TestMetricsCollector_ReconciliationPausedMetric(t *testing.T) {
543543
require.NoError(t,err)
544544

545545
// Run reconciliation to update the metric
546-
err=reconciler.ReconcileAll(ctx)
546+
_,err=reconciler.ReconcileAll(ctx)
547547
require.NoError(t,err)
548548

549549
// Check that the metric shows reconciliation is not paused

‎enterprise/coderd/prebuilds/reconcile.go‎

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"github.com/google/uuid"
1616
"github.com/hashicorp/go-multierror"
1717
"github.com/prometheus/client_golang/prometheus"
18+
"github.com/prometheus/client_golang/prometheus/promauto"
1819
"golang.org/x/sync/errgroup"
1920
"golang.org/x/xerrors"
2021

@@ -44,7 +45,6 @@ type StoreReconciler struct {
4445
logger slog.Logger
4546
clock quartz.Clock
4647
registerer prometheus.Registerer
47-
metrics*MetricsCollector
4848
notifEnq notifications.Enqueuer
4949
buildUsageChecker*atomic.Pointer[wsbuilder.UsageChecker]
5050

@@ -53,6 +53,11 @@ type StoreReconciler struct {
5353
stopped atomic.Bool
5454
donechanstruct{}
5555
provisionNotifyChchan database.ProvisionerJob
56+
57+
// Prebuild state metrics
58+
metrics*MetricsCollector
59+
// Operational metrics
60+
reconciliationDuration prometheus.Histogram
5661
}
5762

5863
var_ prebuilds.ReconciliationOrchestrator=&StoreReconciler{}
@@ -105,6 +110,15 @@ func NewStoreReconciler(store database.Store,
105110
// If the registerer fails to register the metrics collector, it's not fatal.
106111
logger.Error(context.Background(),"failed to register prometheus metrics",slog.Error(err))
107112
}
113+
114+
factory:=promauto.With(registerer)
115+
reconciler.reconciliationDuration=factory.NewHistogram(prometheus.HistogramOpts{
116+
Namespace:"coderd",
117+
Subsystem:"prebuilds",
118+
Name:"reconciliation_duration_seconds",
119+
Help:"Duration of each prebuilds reconciliation cycle.",
120+
Buckets:prometheus.DefBuckets,
121+
})
108122
}
109123

110124
returnreconciler
@@ -176,10 +190,15 @@ func (c *StoreReconciler) Run(ctx context.Context) {
176190
// instead of waiting for the next reconciliation interval
177191
case<-ticker.C:
178192
// Trigger a new iteration on each tick.
179-
err:=c.ReconcileAll(ctx)
193+
stats,err:=c.ReconcileAll(ctx)
180194
iferr!=nil {
181195
c.logger.Error(context.Background(),"reconciliation failed",slog.Error(err))
182196
}
197+
198+
ifc.reconciliationDuration!=nil {
199+
c.reconciliationDuration.Observe(stats.Elapsed.Seconds())
200+
}
201+
c.logger.Debug(ctx,"reconciliation stats",slog.F("elapsed",stats.Elapsed))
183202
case<-ctx.Done():
184203
// nolint:gocritic // it's okay to use slog.F() for an error in this case
185204
// because we want to differentiate two different types of errors: ctx.Err() and context.Cause()
@@ -263,19 +282,24 @@ func (c *StoreReconciler) Stop(ctx context.Context, cause error) {
263282
// be reconciled again, leading to another workspace being provisioned. Two workspace builds will be occurring
264283
// simultaneously for the same preset, but once both jobs have completed the reconciliation loop will notice the
265284
// extraneous instance and delete it.
266-
func (c*StoreReconciler)ReconcileAll(ctx context.Context)error {
285+
func (c*StoreReconciler)ReconcileAll(ctx context.Context) (stats prebuilds.ReconcileStats,errerror) {
286+
start:=c.clock.Now()
287+
deferfunc() {
288+
stats.Elapsed=c.clock.Since(start)
289+
}()
290+
267291
logger:=c.logger.With(slog.F("reconcile_context","all"))
268292

269293
select {
270294
case<-ctx.Done():
271295
logger.Warn(context.Background(),"reconcile exiting prematurely; context done",slog.Error(ctx.Err()))
272-
returnnil
296+
returnstats,nil
273297
default:
274298
}
275299

276300
logger.Debug(ctx,"starting reconciliation")
277301

278-
err:=c.WithReconciliationLock(ctx,logger,func(ctx context.Context,_ database.Store)error {
302+
err=c.WithReconciliationLock(ctx,logger,func(ctx context.Context,_ database.Store)error {
279303
// Check if prebuilds reconciliation is paused
280304
settingsJSON,err:=c.store.GetPrebuildsSettings(ctx)
281305
iferr!=nil {
@@ -348,7 +372,7 @@ func (c *StoreReconciler) ReconcileAll(ctx context.Context) error {
348372
logger.Error(ctx,"failed to reconcile",slog.Error(err))
349373
}
350374

351-
returnerr
375+
returnstats,err
352376
}
353377

354378
func (c*StoreReconciler)reportHardLimitedPresets(snapshot*prebuilds.GlobalSnapshot) {

‎enterprise/coderd/prebuilds/reconcile_test.go‎

Lines changed: 81 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,8 @@ func TestNoReconciliationActionsIfNoPresets(t *testing.T) {
7272
require.Equal(t,templateVersion,gotTemplateVersion)
7373

7474
// when we trigger the reconciliation loop for all templates
75-
require.NoError(t,controller.ReconcileAll(ctx))
75+
_,err=controller.ReconcileAll(ctx)
76+
require.NoError(t,err)
7677

7778
// then no reconciliation actions are taken
7879
// because without presets, there are no prebuilds
@@ -126,7 +127,8 @@ func TestNoReconciliationActionsIfNoPrebuilds(t *testing.T) {
126127
require.NotEmpty(t,presetParameters)
127128

128129
// when we trigger the reconciliation loop for all templates
129-
require.NoError(t,controller.ReconcileAll(ctx))
130+
_,err=controller.ReconcileAll(ctx)
131+
require.NoError(t,err)
130132

131133
// then no reconciliation actions are taken
132134
// because without prebuilds, there is nothing to reconcile
@@ -428,7 +430,8 @@ func (tc testCase) run(t *testing.T) {
428430
// Run the reconciliation multiple times to ensure idempotency
429431
// 8 was arbitrary, but large enough to reasonably trust the result
430432
fori:=1;i<=8;i++ {
431-
require.NoErrorf(t,controller.ReconcileAll(ctx),"failed on iteration %d",i)
433+
_,err:=controller.ReconcileAll(ctx)
434+
require.NoErrorf(t,err,"failed on iteration %d",i)
432435

433436
iftc.shouldCreateNewPrebuild!=nil {
434437
newPrebuildCount:=0
@@ -542,7 +545,8 @@ func TestMultiplePresetsPerTemplateVersion(t *testing.T) {
542545
// Run the reconciliation multiple times to ensure idempotency
543546
// 8 was arbitrary, but large enough to reasonably trust the result
544547
fori:=1;i<=8;i++ {
545-
require.NoErrorf(t,controller.ReconcileAll(ctx),"failed on iteration %d",i)
548+
_,err:=controller.ReconcileAll(ctx)
549+
require.NoErrorf(t,err,"failed on iteration %d",i)
546550

547551
newPrebuildCount:=0
548552
workspaces,err:=db.GetWorkspacesByTemplateID(ctx,template.ID)
@@ -668,7 +672,7 @@ func TestPrebuildScheduling(t *testing.T) {
668672
DesiredInstances:5,
669673
})
670674

671-
err:=controller.ReconcileAll(ctx)
675+
_,err:=controller.ReconcileAll(ctx)
672676
require.NoError(t,err)
673677

674678
// get workspace builds
@@ -751,7 +755,8 @@ func TestInvalidPreset(t *testing.T) {
751755
// Run the reconciliation multiple times to ensure idempotency
752756
// 8 was arbitrary, but large enough to reasonably trust the result
753757
fori:=1;i<=8;i++ {
754-
require.NoErrorf(t,controller.ReconcileAll(ctx),"failed on iteration %d",i)
758+
_,err:=controller.ReconcileAll(ctx)
759+
require.NoErrorf(t,err,"failed on iteration %d",i)
755760

756761
workspaces,err:=db.GetWorkspacesByTemplateID(ctx,template.ID)
757762
require.NoError(t,err)
@@ -817,7 +822,8 @@ func TestDeletionOfPrebuiltWorkspaceWithInvalidPreset(t *testing.T) {
817822
})
818823

819824
// Old prebuilt workspace should be deleted.
820-
require.NoError(t,controller.ReconcileAll(ctx))
825+
_,err=controller.ReconcileAll(ctx)
826+
require.NoError(t,err)
821827

822828
builds,err:=db.GetWorkspaceBuildsByWorkspaceID(ctx, database.GetWorkspaceBuildsByWorkspaceIDParams{
823829
WorkspaceID:prebuiltWorkspace.ID,
@@ -916,12 +922,15 @@ func TestSkippingHardLimitedPresets(t *testing.T) {
916922

917923
// Trigger reconciliation to attempt creating a new prebuild.
918924
// The outcome depends on whether the hard limit has been reached.
919-
require.NoError(t,controller.ReconcileAll(ctx))
925+
_,err=controller.ReconcileAll(ctx)
926+
require.NoError(t,err)
920927

921928
// These two additional calls to ReconcileAll should not trigger any notifications.
922929
// A notification is only sent once.
923-
require.NoError(t,controller.ReconcileAll(ctx))
924-
require.NoError(t,controller.ReconcileAll(ctx))
930+
_,err=controller.ReconcileAll(ctx)
931+
require.NoError(t,err)
932+
_,err=controller.ReconcileAll(ctx)
933+
require.NoError(t,err)
925934

926935
// Verify the final state after reconciliation.
927936
workspaces,err=db.GetWorkspacesByTemplateID(ctx,template.ID)
@@ -1093,12 +1102,15 @@ func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) {
10931102

10941103
// Trigger reconciliation to attempt creating a new prebuild.
10951104
// The outcome depends on whether the hard limit has been reached.
1096-
require.NoError(t,controller.ReconcileAll(ctx))
1105+
_,err=controller.ReconcileAll(ctx)
1106+
require.NoError(t,err)
10971107

10981108
// These two additional calls to ReconcileAll should not trigger any notifications.
10991109
// A notification is only sent once.
1100-
require.NoError(t,controller.ReconcileAll(ctx))
1101-
require.NoError(t,controller.ReconcileAll(ctx))
1110+
_,err=controller.ReconcileAll(ctx)
1111+
require.NoError(t,err)
1112+
_,err=controller.ReconcileAll(ctx)
1113+
require.NoError(t,err)
11021114

11031115
// Verify the final state after reconciliation.
11041116
// When hard limit is reached, no new workspace should be created.
@@ -1141,7 +1153,8 @@ func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) {
11411153
}
11421154

11431155
// Trigger reconciliation to make sure that successful, but outdated prebuilt workspace will be deleted.
1144-
require.NoError(t,controller.ReconcileAll(ctx))
1156+
_,err=controller.ReconcileAll(ctx)
1157+
require.NoError(t,err)
11451158

11461159
workspaces,err=db.GetWorkspacesByTemplateID(ctx,template.ID)
11471160
require.NoError(t,err)
@@ -1740,7 +1753,8 @@ func TestExpiredPrebuildsMultipleActions(t *testing.T) {
17401753
}
17411754

17421755
// Trigger reconciliation to process expired prebuilds and enforce desired state.
1743-
require.NoError(t,controller.ReconcileAll(ctx))
1756+
_,err=controller.ReconcileAll(ctx)
1757+
require.NoError(t,err)
17441758

17451759
// Sort non-expired workspaces by CreatedAt in ascending order (oldest first)
17461760
sort.Slice(nonExpiredWorkspaces,func(i,jint)bool {
@@ -2145,7 +2159,8 @@ func TestCancelPendingPrebuilds(t *testing.T) {
21452159
require.NoError(t,err)
21462160

21472161
// When: the reconciliation loop is triggered
2148-
require.NoError(t,reconciler.ReconcileAll(ctx))
2162+
_,err=reconciler.ReconcileAll(ctx)
2163+
require.NoError(t,err)
21492164

21502165
iftt.shouldCancel {
21512166
// Then: the pending prebuild job from non-active version should be canceled
@@ -2347,7 +2362,8 @@ func TestCancelPendingPrebuilds(t *testing.T) {
23472362
templateBVersion3Pending:=setupPrebuilds(t,db,owner.OrganizationID,templateBID,templateBVersion3ID,templateBVersion3PresetID,1,true)
23482363

23492364
// When: the reconciliation loop is executed
2350-
require.NoError(t,reconciler.ReconcileAll(ctx))
2365+
_,err:=reconciler.ReconcileAll(ctx)
2366+
require.NoError(t,err)
23512367

23522368
// Then: template A version 1 running workspaces should not be canceled
23532369
checkIfJobCanceledAndDeleted(t,clock,ctx,db,false,templateAVersion1Running)
@@ -2369,6 +2385,51 @@ func TestCancelPendingPrebuilds(t *testing.T) {
23692385
})
23702386
}
23712387

2388+
funcTestReconciliationStats(t*testing.T) {
2389+
t.Parallel()
2390+
2391+
// Setup
2392+
clock:=quartz.NewReal()
2393+
db,ps:=dbtestutil.NewDB(t)
2394+
client,_,_:=coderdtest.NewWithAPI(t,&coderdtest.Options{
2395+
Database:db,
2396+
Pubsub:ps,
2397+
Clock:clock,
2398+
})
2399+
fakeEnqueuer:=newFakeEnqueuer()
2400+
registry:=prometheus.NewRegistry()
2401+
cache:=files.New(registry,&coderdtest.FakeAuthorizer{})
2402+
logger:=slogtest.Make(t,&slogtest.Options{IgnoreErrors:false}).Leveled(slog.LevelDebug)
2403+
reconciler:=prebuilds.NewStoreReconciler(db,ps,cache, codersdk.PrebuildsConfig{},logger,clock,registry,fakeEnqueuer,newNoopUsageCheckerPtr())
2404+
owner:=coderdtest.CreateFirstUser(t,client)
2405+
2406+
ctx,cancel:=context.WithTimeout(context.Background(),testutil.WaitShort)
2407+
defercancel()
2408+
2409+
// Create a template version with a preset
2410+
dbfake.TemplateVersion(t,db).Seed(database.TemplateVersion{
2411+
OrganizationID:owner.OrganizationID,
2412+
CreatedBy:owner.UserID,
2413+
}).Preset(database.TemplateVersionPreset{
2414+
DesiredInstances: sql.NullInt32{
2415+
Int32:1,
2416+
Valid:true,
2417+
},
2418+
}).Do()
2419+
2420+
// Verify that ReconcileAll tracks and returns elapsed time
2421+
start:=time.Now()
2422+
stats,err:=reconciler.ReconcileAll(ctx)
2423+
actualElapsed:=time.Since(start)
2424+
require.NoError(t,err)
2425+
require.Greater(t,stats.Elapsed,time.Duration(0))
2426+
2427+
// Verify stats.Elapsed matches actual execution time
2428+
require.InDelta(t,actualElapsed.Milliseconds(),stats.Elapsed.Milliseconds(),100)
2429+
// Verify reconciliation loop is not unexpectedly slow
2430+
require.Less(t,stats.Elapsed,5*time.Second)
2431+
}
2432+
23722433
funcnewNoopEnqueuer()*notifications.NoopEnqueuer {
23732434
returnnotifications.NewNoopEnqueuer()
23742435
}
@@ -2863,7 +2924,7 @@ func TestReconciliationRespectsPauseSetting(t *testing.T) {
28632924
_=setupTestDBPreset(t,db,templateVersionID,2,"test")
28642925

28652926
// Initially, reconciliation should create prebuilds
2866-
err:=reconciler.ReconcileAll(ctx)
2927+
_,err:=reconciler.ReconcileAll(ctx)
28672928
require.NoError(t,err)
28682929

28692930
// Verify that prebuilds were created
@@ -2890,7 +2951,7 @@ func TestReconciliationRespectsPauseSetting(t *testing.T) {
28902951
require.Len(t,workspaces,0,"prebuilds should be deleted")
28912952

28922953
// Run reconciliation again - it should be paused and not recreate prebuilds
2893-
err=reconciler.ReconcileAll(ctx)
2954+
_,err=reconciler.ReconcileAll(ctx)
28942955
require.NoError(t,err)
28952956

28962957
// Verify that no new prebuilds were created because reconciliation is paused
@@ -2903,7 +2964,7 @@ func TestReconciliationRespectsPauseSetting(t *testing.T) {
29032964
require.NoError(t,err)
29042965

29052966
// Run reconciliation again - it should now recreate the prebuilds
2906-
err=reconciler.ReconcileAll(ctx)
2967+
_,err=reconciler.ReconcileAll(ctx)
29072968
require.NoError(t,err)
29082969

29092970
// Verify that prebuilds were recreated

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp