Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

feat: add prebuilds reconciliation duration metric (#20535)#20581

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes fromall commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletioncoderd/prebuilds/api.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -37,13 +37,18 @@ type ReconciliationOrchestrator interface {
TrackResourceReplacement(ctx context.Context, workspaceID, buildID uuid.UUID, replacements []*sdkproto.ResourceReplacement)
}

// ReconcileStats contains statistics about a reconciliation cycle.
type ReconcileStats struct {
Elapsed time.Duration
}

type Reconciler interface {
StateSnapshotter

// ReconcileAll orchestrates the reconciliation of all prebuilds across all templates.
// It takes a global snapshot of the system state and then reconciles each preset
// in parallel, creating or deleting prebuilds as needed to reach their desired states.
ReconcileAll(ctx context.Context) error
ReconcileAll(ctx context.Context)(ReconcileStats,error)
}

// StateSnapshotter defines the operations necessary to capture workspace prebuilds state.
Expand Down
6 changes: 5 additions & 1 deletioncoderd/prebuilds/noop.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -17,7 +17,11 @@ func (NoopReconciler) Run(context.Context) {}
func (NoopReconciler) Stop(context.Context, error) {}
func (NoopReconciler) TrackResourceReplacement(context.Context, uuid.UUID, uuid.UUID, []*sdkproto.ResourceReplacement) {
}
func (NoopReconciler) ReconcileAll(context.Context) error { return nil }

func (NoopReconciler) ReconcileAll(context.Context) (ReconcileStats, error) {
return ReconcileStats{}, nil
}

func (NoopReconciler) SnapshotState(context.Context, database.Store) (*GlobalSnapshot, error) {
return &GlobalSnapshot{}, nil
}
Expand Down
6 changes: 3 additions & 3 deletionsenterprise/coderd/prebuilds/metricscollector_test.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -485,7 +485,7 @@ func TestMetricsCollector_ReconciliationPausedMetric(t *testing.T) {
require.NoError(t, err)

// Run reconciliation to update the metric
err = reconciler.ReconcileAll(ctx)
_,err = reconciler.ReconcileAll(ctx)
require.NoError(t, err)

// Check that the metric shows reconciliation is not paused
Expand DownExpand Up@@ -514,7 +514,7 @@ func TestMetricsCollector_ReconciliationPausedMetric(t *testing.T) {
require.NoError(t, err)

// Run reconciliation to update the metric
err = reconciler.ReconcileAll(ctx)
_,err = reconciler.ReconcileAll(ctx)
require.NoError(t, err)

// Check that the metric shows reconciliation is paused
Expand DownExpand Up@@ -543,7 +543,7 @@ func TestMetricsCollector_ReconciliationPausedMetric(t *testing.T) {
require.NoError(t, err)

// Run reconciliation to update the metric
err = reconciler.ReconcileAll(ctx)
_,err = reconciler.ReconcileAll(ctx)
require.NoError(t, err)

// Check that the metric shows reconciliation is not paused
Expand Down
36 changes: 30 additions & 6 deletionsenterprise/coderd/prebuilds/reconcile.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -15,6 +15,7 @@ import (
"github.com/google/uuid"
"github.com/hashicorp/go-multierror"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"golang.org/x/sync/errgroup"
"golang.org/x/xerrors"

Expand DownExpand Up@@ -44,7 +45,6 @@ type StoreReconciler struct {
logger slog.Logger
clock quartz.Clock
registerer prometheus.Registerer
metrics *MetricsCollector
notifEnq notifications.Enqueuer
buildUsageChecker *atomic.Pointer[wsbuilder.UsageChecker]

Expand All@@ -53,6 +53,11 @@ type StoreReconciler struct {
stopped atomic.Bool
done chan struct{}
provisionNotifyCh chan database.ProvisionerJob

// Prebuild state metrics
metrics *MetricsCollector
// Operational metrics
reconciliationDuration prometheus.Histogram
}

var _ prebuilds.ReconciliationOrchestrator = &StoreReconciler{}
Expand DownExpand Up@@ -105,6 +110,15 @@ func NewStoreReconciler(store database.Store,
// If the registerer fails to register the metrics collector, it's not fatal.
logger.Error(context.Background(), "failed to register prometheus metrics", slog.Error(err))
}

factory := promauto.With(registerer)
reconciler.reconciliationDuration = factory.NewHistogram(prometheus.HistogramOpts{
Namespace: "coderd",
Subsystem: "prebuilds",
Name: "reconciliation_duration_seconds",
Help: "Duration of each prebuilds reconciliation cycle.",
Buckets: prometheus.DefBuckets,
})
}

return reconciler
Expand DownExpand Up@@ -176,10 +190,15 @@ func (c *StoreReconciler) Run(ctx context.Context) {
// instead of waiting for the next reconciliation interval
case <-ticker.C:
// Trigger a new iteration on each tick.
err := c.ReconcileAll(ctx)
stats,err := c.ReconcileAll(ctx)
if err != nil {
c.logger.Error(context.Background(), "reconciliation failed", slog.Error(err))
}

if c.reconciliationDuration != nil {
c.reconciliationDuration.Observe(stats.Elapsed.Seconds())
}
c.logger.Debug(ctx, "reconciliation stats", slog.F("elapsed", stats.Elapsed))
case <-ctx.Done():
// nolint:gocritic // it's okay to use slog.F() for an error in this case
// because we want to differentiate two different types of errors: ctx.Err() and context.Cause()
Expand DownExpand Up@@ -263,19 +282,24 @@ func (c *StoreReconciler) Stop(ctx context.Context, cause error) {
// be reconciled again, leading to another workspace being provisioned. Two workspace builds will be occurring
// simultaneously for the same preset, but once both jobs have completed the reconciliation loop will notice the
// extraneous instance and delete it.
func (c *StoreReconciler) ReconcileAll(ctx context.Context) error {
func (c *StoreReconciler) ReconcileAll(ctx context.Context) (stats prebuilds.ReconcileStats, err error) {
start := c.clock.Now()
defer func() {
stats.Elapsed = c.clock.Since(start)
}()

logger := c.logger.With(slog.F("reconcile_context", "all"))

select {
case <-ctx.Done():
logger.Warn(context.Background(), "reconcile exiting prematurely; context done", slog.Error(ctx.Err()))
return nil
returnstats,nil
default:
}

logger.Debug(ctx, "starting reconciliation")

err:= c.WithReconciliationLock(ctx, logger, func(ctx context.Context, _ database.Store) error {
err = c.WithReconciliationLock(ctx, logger, func(ctx context.Context, _ database.Store) error {
// Check if prebuilds reconciliation is paused
settingsJSON, err := c.store.GetPrebuildsSettings(ctx)
if err != nil {
Expand DownExpand Up@@ -348,7 +372,7 @@ func (c *StoreReconciler) ReconcileAll(ctx context.Context) error {
logger.Error(ctx, "failed to reconcile", slog.Error(err))
}

return err
returnstats,err
}

func (c *StoreReconciler) reportHardLimitedPresets(snapshot *prebuilds.GlobalSnapshot) {
Expand Down
101 changes: 81 additions & 20 deletionsenterprise/coderd/prebuilds/reconcile_test.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -72,7 +72,8 @@ func TestNoReconciliationActionsIfNoPresets(t *testing.T) {
require.Equal(t, templateVersion, gotTemplateVersion)

// when we trigger the reconciliation loop for all templates
require.NoError(t, controller.ReconcileAll(ctx))
_, err = controller.ReconcileAll(ctx)
require.NoError(t, err)

// then no reconciliation actions are taken
// because without presets, there are no prebuilds
Expand DownExpand Up@@ -126,7 +127,8 @@ func TestNoReconciliationActionsIfNoPrebuilds(t *testing.T) {
require.NotEmpty(t, presetParameters)

// when we trigger the reconciliation loop for all templates
require.NoError(t, controller.ReconcileAll(ctx))
_, err = controller.ReconcileAll(ctx)
require.NoError(t, err)

// then no reconciliation actions are taken
// because without prebuilds, there is nothing to reconcile
Expand DownExpand Up@@ -428,7 +430,8 @@ func (tc testCase) run(t *testing.T) {
// Run the reconciliation multiple times to ensure idempotency
// 8 was arbitrary, but large enough to reasonably trust the result
for i := 1; i <= 8; i++ {
require.NoErrorf(t, controller.ReconcileAll(ctx), "failed on iteration %d", i)
_, err := controller.ReconcileAll(ctx)
require.NoErrorf(t, err, "failed on iteration %d", i)

if tc.shouldCreateNewPrebuild != nil {
newPrebuildCount := 0
Expand DownExpand Up@@ -542,7 +545,8 @@ func TestMultiplePresetsPerTemplateVersion(t *testing.T) {
// Run the reconciliation multiple times to ensure idempotency
// 8 was arbitrary, but large enough to reasonably trust the result
for i := 1; i <= 8; i++ {
require.NoErrorf(t, controller.ReconcileAll(ctx), "failed on iteration %d", i)
_, err := controller.ReconcileAll(ctx)
require.NoErrorf(t, err, "failed on iteration %d", i)

newPrebuildCount := 0
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
Expand DownExpand Up@@ -668,7 +672,7 @@ func TestPrebuildScheduling(t *testing.T) {
DesiredInstances: 5,
})

err := controller.ReconcileAll(ctx)
_,err := controller.ReconcileAll(ctx)
require.NoError(t, err)

// get workspace builds
Expand DownExpand Up@@ -751,7 +755,8 @@ func TestInvalidPreset(t *testing.T) {
// Run the reconciliation multiple times to ensure idempotency
// 8 was arbitrary, but large enough to reasonably trust the result
for i := 1; i <= 8; i++ {
require.NoErrorf(t, controller.ReconcileAll(ctx), "failed on iteration %d", i)
_, err := controller.ReconcileAll(ctx)
require.NoErrorf(t, err, "failed on iteration %d", i)

workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
Expand DownExpand Up@@ -817,7 +822,8 @@ func TestDeletionOfPrebuiltWorkspaceWithInvalidPreset(t *testing.T) {
})

// Old prebuilt workspace should be deleted.
require.NoError(t, controller.ReconcileAll(ctx))
_, err = controller.ReconcileAll(ctx)
require.NoError(t, err)

builds, err := db.GetWorkspaceBuildsByWorkspaceID(ctx, database.GetWorkspaceBuildsByWorkspaceIDParams{
WorkspaceID: prebuiltWorkspace.ID,
Expand DownExpand Up@@ -916,12 +922,15 @@ func TestSkippingHardLimitedPresets(t *testing.T) {

// Trigger reconciliation to attempt creating a new prebuild.
// The outcome depends on whether the hard limit has been reached.
require.NoError(t, controller.ReconcileAll(ctx))
_, err = controller.ReconcileAll(ctx)
require.NoError(t, err)

// These two additional calls to ReconcileAll should not trigger any notifications.
// A notification is only sent once.
require.NoError(t, controller.ReconcileAll(ctx))
require.NoError(t, controller.ReconcileAll(ctx))
_, err = controller.ReconcileAll(ctx)
require.NoError(t, err)
_, err = controller.ReconcileAll(ctx)
require.NoError(t, err)

// Verify the final state after reconciliation.
workspaces, err = db.GetWorkspacesByTemplateID(ctx, template.ID)
Expand DownExpand Up@@ -1093,12 +1102,15 @@ func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) {

// Trigger reconciliation to attempt creating a new prebuild.
// The outcome depends on whether the hard limit has been reached.
require.NoError(t, controller.ReconcileAll(ctx))
_, err = controller.ReconcileAll(ctx)
require.NoError(t, err)

// These two additional calls to ReconcileAll should not trigger any notifications.
// A notification is only sent once.
require.NoError(t, controller.ReconcileAll(ctx))
require.NoError(t, controller.ReconcileAll(ctx))
_, err = controller.ReconcileAll(ctx)
require.NoError(t, err)
_, err = controller.ReconcileAll(ctx)
require.NoError(t, err)

// Verify the final state after reconciliation.
// When hard limit is reached, no new workspace should be created.
Expand DownExpand Up@@ -1141,7 +1153,8 @@ func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) {
}

// Trigger reconciliation to make sure that successful, but outdated prebuilt workspace will be deleted.
require.NoError(t, controller.ReconcileAll(ctx))
_, err = controller.ReconcileAll(ctx)
require.NoError(t, err)

workspaces, err = db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
Expand DownExpand Up@@ -1740,7 +1753,8 @@ func TestExpiredPrebuildsMultipleActions(t *testing.T) {
}

// Trigger reconciliation to process expired prebuilds and enforce desired state.
require.NoError(t, controller.ReconcileAll(ctx))
_, err = controller.ReconcileAll(ctx)
require.NoError(t, err)

// Sort non-expired workspaces by CreatedAt in ascending order (oldest first)
sort.Slice(nonExpiredWorkspaces, func(i, j int) bool {
Expand DownExpand Up@@ -2145,7 +2159,8 @@ func TestCancelPendingPrebuilds(t *testing.T) {
require.NoError(t, err)

// When: the reconciliation loop is triggered
require.NoError(t, reconciler.ReconcileAll(ctx))
_, err = reconciler.ReconcileAll(ctx)
require.NoError(t, err)

if tt.shouldCancel {
// Then: the pending prebuild job from non-active version should be canceled
Expand DownExpand Up@@ -2347,7 +2362,8 @@ func TestCancelPendingPrebuilds(t *testing.T) {
templateBVersion3Pending := setupPrebuilds(t, db, owner.OrganizationID, templateBID, templateBVersion3ID, templateBVersion3PresetID, 1, true)

// When: the reconciliation loop is executed
require.NoError(t, reconciler.ReconcileAll(ctx))
_, err := reconciler.ReconcileAll(ctx)
require.NoError(t, err)

// Then: template A version 1 running workspaces should not be canceled
checkIfJobCanceledAndDeleted(t, clock, ctx, db, false, templateAVersion1Running)
Expand All@@ -2369,6 +2385,51 @@ func TestCancelPendingPrebuilds(t *testing.T) {
})
}

func TestReconciliationStats(t *testing.T) {
t.Parallel()

// Setup
clock := quartz.NewReal()
db, ps := dbtestutil.NewDB(t)
client, _, _ := coderdtest.NewWithAPI(t, &coderdtest.Options{
Database: db,
Pubsub: ps,
Clock: clock,
})
fakeEnqueuer := newFakeEnqueuer()
registry := prometheus.NewRegistry()
cache := files.New(registry, &coderdtest.FakeAuthorizer{})
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: false}).Leveled(slog.LevelDebug)
reconciler := prebuilds.NewStoreReconciler(db, ps, cache, codersdk.PrebuildsConfig{}, logger, clock, registry, fakeEnqueuer, newNoopUsageCheckerPtr())
owner := coderdtest.CreateFirstUser(t, client)

ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
defer cancel()

// Create a template version with a preset
dbfake.TemplateVersion(t, db).Seed(database.TemplateVersion{
OrganizationID: owner.OrganizationID,
CreatedBy: owner.UserID,
}).Preset(database.TemplateVersionPreset{
DesiredInstances: sql.NullInt32{
Int32: 1,
Valid: true,
},
}).Do()

// Verify that ReconcileAll tracks and returns elapsed time
start := time.Now()
stats, err := reconciler.ReconcileAll(ctx)
actualElapsed := time.Since(start)
require.NoError(t, err)
require.Greater(t, stats.Elapsed, time.Duration(0))

// Verify stats.Elapsed matches actual execution time
require.InDelta(t, actualElapsed.Milliseconds(), stats.Elapsed.Milliseconds(), 100)
// Verify reconciliation loop is not unexpectedly slow
require.Less(t, stats.Elapsed, 5*time.Second)
}

func newNoopEnqueuer() *notifications.NoopEnqueuer {
return notifications.NewNoopEnqueuer()
}
Expand DownExpand Up@@ -2863,7 +2924,7 @@ func TestReconciliationRespectsPauseSetting(t *testing.T) {
_ = setupTestDBPreset(t, db, templateVersionID, 2, "test")

// Initially, reconciliation should create prebuilds
err := reconciler.ReconcileAll(ctx)
_,err := reconciler.ReconcileAll(ctx)
require.NoError(t, err)

// Verify that prebuilds were created
Expand All@@ -2890,7 +2951,7 @@ func TestReconciliationRespectsPauseSetting(t *testing.T) {
require.Len(t, workspaces, 0, "prebuilds should be deleted")

// Run reconciliation again - it should be paused and not recreate prebuilds
err = reconciler.ReconcileAll(ctx)
_,err = reconciler.ReconcileAll(ctx)
require.NoError(t, err)

// Verify that no new prebuilds were created because reconciliation is paused
Expand All@@ -2903,7 +2964,7 @@ func TestReconciliationRespectsPauseSetting(t *testing.T) {
require.NoError(t, err)

// Run reconciliation again - it should now recreate the prebuilds
err = reconciler.ReconcileAll(ctx)
_,err = reconciler.ReconcileAll(ctx)
require.NoError(t, err)

// Verify that prebuilds were recreated
Expand Down
Loading

[8]ページ先頭

©2009-2025 Movatter.jp