Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

fix: exclude deleted templates from metrics collection#17839

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
dannykopping merged 10 commits intomainfromyevhenii/metrics-fix
May 15, 2025
Merged
Show file tree
Hide file tree
Changes fromall commits
Commits
Show all changes
10 commits
Select commitHold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletionscoderd/database/queries.sql.go
View file
Open in desktop

Some generated files are not rendered by default. Learn more abouthow customized files appear on GitHub.

4 changes: 4 additions & 0 deletionscoderd/database/queries/prebuilds.sql
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -15,6 +15,7 @@ WHERE w.id IN (
AND b.template_version_id = t.active_version_id
AND p.current_preset_id = @preset_id::uuid
AND p.ready
AND NOT t.deleted
LIMIT 1 FOR UPDATE OF p SKIP LOCKED -- Ensure that a concurrent request will not select the same prebuild.
)
RETURNING w.id, w.name;
Expand All@@ -40,6 +41,7 @@ FROM templates t
INNER JOIN template_version_presets tvp ON tvp.template_version_id = tv.id
INNER JOIN organizations o ON o.id = t.organization_id
WHERE tvp.desired_instances IS NOT NULL -- Consider only presets that have a prebuild configuration.
-- AND NOT t.deleted -- We don't exclude deleted templates because there's no constraint in the DB preventing a soft deletion on a template while workspaces are running.
AND (t.id = sqlc.narg('template_id')::uuid OR sqlc.narg('template_id') IS NULL);

-- name: GetRunningPrebuiltWorkspaces :many
Expand DownExpand Up@@ -70,6 +72,7 @@ FROM workspace_latest_builds wlb
-- prebuilds that are still building.
INNER JOIN templates t ON t.active_version_id = wlb.template_version_id
WHERE wlb.job_status IN ('pending'::provisioner_job_status, 'running'::provisioner_job_status)
-- AND NOT t.deleted -- We don't exclude deleted templates because there's no constraint in the DB preventing a soft deletion on a template while workspaces are running.
GROUP BY t.id, wpb.template_version_id, wpb.transition, wlb.template_version_preset_id;

-- GetPresetsBackoff groups workspace builds by preset ID.
Expand DownExpand Up@@ -98,6 +101,7 @@ WITH filtered_builds AS (
WHERE tvp.desired_instances IS NOT NULL -- Consider only presets that have a prebuild configuration.
AND wlb.transition = 'start'::workspace_transition
AND w.owner_id = 'c42fdf75-3097-471c-8c33-fb52454d81c0'
AND NOT t.deleted
),
time_sorted_builds AS (
-- Group builds by preset, then sort each group by created_at.
Expand Down
4 changes: 4 additions & 0 deletionsenterprise/coderd/prebuilds/metricscollector.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -157,6 +157,10 @@ func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) {
continue
}

if preset.Deleted {
continue
}

presetSnapshot, err := currentState.snapshot.FilterByPreset(preset.ID)
if err != nil {
mc.logger.Error(context.Background(), "failed to filter by preset", slog.Error(err))
Expand Down
189 changes: 168 additions & 21 deletionsenterprise/coderd/prebuilds/metricscollector_test.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -19,6 +19,7 @@ import (
"github.com/coder/coder/v2/coderd/database/dbauthz"
"github.com/coder/coder/v2/coderd/database/dbgen"
"github.com/coder/coder/v2/coderd/database/dbtestutil"
"github.com/coder/coder/v2/coderd/database/dbtime"
agplprebuilds "github.com/coder/coder/v2/coderd/prebuilds"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/enterprise/coderd/prebuilds"
Expand DownExpand Up@@ -165,27 +166,12 @@ func TestMetricsCollector(t *testing.T) {
eligible: []bool{false},
},
{
name: "deleted templates never desire prebuilds",
transitions: allTransitions,
jobStatuses: allJobStatuses,
initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID},
ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID, uuid.New()},
metrics: []metricCheck{
{prebuilds.MetricDesiredGauge, ptr.To(0.0), false},
},
templateDeleted: []bool{true},
eligible: []bool{false},
},
{
name: "running prebuilds for deleted templates are still counted, so that they can be deleted",
transitions: []database.WorkspaceTransition{database.WorkspaceTransitionStart},
jobStatuses: []database.ProvisionerJobStatus{database.ProvisionerJobStatusSucceeded},
initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID},
ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID},
metrics: []metricCheck{
{prebuilds.MetricRunningGauge, ptr.To(1.0), false},
{prebuilds.MetricEligibleGauge, ptr.To(0.0), false},
},
name: "deleted templates should not be included in exported metrics",
transitions: allTransitions,
jobStatuses: allJobStatuses,
initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID},
ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID, uuid.New()},
metrics: nil,
templateDeleted: []bool{true},
eligible: []bool{false},
},
Expand DownExpand Up@@ -281,6 +267,12 @@ func TestMetricsCollector(t *testing.T) {
"organization_name": org.Name,
}

// If no expected metrics have been defined, ensure we don't find any metric series (i.e. metrics with given labels).
if test.metrics == nil {
series := findAllMetricSeries(metricsFamilies, labels)
require.Empty(t, series)
}

for _, check := range test.metrics {
metric := findMetric(metricsFamilies, check.name, labels)
if check.value == nil {
Expand All@@ -307,6 +299,131 @@ func TestMetricsCollector(t *testing.T) {
}
}

// TestMetricsCollector_DuplicateTemplateNames validates a bug that we saw previously which caused duplicate metric series
// registration when a template was deleted and a new one created with the same name (and preset name).
// We are now excluding deleted templates from our metric collection.
func TestMetricsCollector_DuplicateTemplateNames(t *testing.T) {
t.Parallel()

if !dbtestutil.WillUsePostgres() {
t.Skip("this test requires postgres")
}

type metricCheck struct {
name string
value *float64
isCounter bool
}

type testCase struct {
transition database.WorkspaceTransition
jobStatus database.ProvisionerJobStatus
initiatorID uuid.UUID
ownerID uuid.UUID
metrics []metricCheck
eligible bool
}

test := testCase{
transition: database.WorkspaceTransitionStart,
jobStatus: database.ProvisionerJobStatusSucceeded,
initiatorID: agplprebuilds.SystemUserID,
ownerID: agplprebuilds.SystemUserID,
metrics: []metricCheck{
{prebuilds.MetricCreatedCount, ptr.To(1.0), true},
{prebuilds.MetricClaimedCount, ptr.To(0.0), true},
{prebuilds.MetricFailedCount, ptr.To(0.0), true},
{prebuilds.MetricDesiredGauge, ptr.To(1.0), false},
{prebuilds.MetricRunningGauge, ptr.To(1.0), false},
{prebuilds.MetricEligibleGauge, ptr.To(1.0), false},
},
eligible: true,
}

logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
clock := quartz.NewMock(t)
db, pubsub := dbtestutil.NewDB(t)
reconciler := prebuilds.NewStoreReconciler(db, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t), prometheus.NewRegistry(), newNoopEnqueuer())
ctx := testutil.Context(t, testutil.WaitLong)

collector := prebuilds.NewMetricsCollector(db, logger, reconciler)
registry := prometheus.NewPedanticRegistry()
registry.Register(collector)

presetName := "default-preset"
defaultOrg := dbgen.Organization(t, db, database.Organization{})
setupTemplateWithDeps := func() database.Template {
template := setupTestDBTemplateWithinOrg(t, db, test.ownerID, false, "default-template", defaultOrg)
templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, pubsub, defaultOrg.ID, test.ownerID, template.ID)
preset := setupTestDBPreset(t, db, templateVersionID, 1, "default-preset")
workspace, _ := setupTestDBWorkspace(
t, clock, db, pubsub,
test.transition, test.jobStatus, defaultOrg.ID, preset, template.ID, templateVersionID, test.initiatorID, test.ownerID,
)
setupTestDBWorkspaceAgent(t, db, workspace.ID, test.eligible)
return template
}

// When: starting with a regular template.
template := setupTemplateWithDeps()
labels := map[string]string{
"template_name": template.Name,
"preset_name": presetName,
"organization_name": defaultOrg.Name,
}

// nolint:gocritic // Authz context needed to retrieve state.
ctx = dbauthz.AsPrebuildsOrchestrator(ctx)

// Then: metrics collect successfully.
require.NoError(t, collector.UpdateState(ctx, testutil.WaitLong))
metricsFamilies, err := registry.Gather()
require.NoError(t, err)
require.NotEmpty(t, findAllMetricSeries(metricsFamilies, labels))

// When: the template is deleted.
require.NoError(t, db.UpdateTemplateDeletedByID(ctx, database.UpdateTemplateDeletedByIDParams{
ID: template.ID,
Deleted: true,
UpdatedAt: dbtime.Now(),
}))

// Then: metrics collect successfully but are empty because the template is deleted.
require.NoError(t, collector.UpdateState(ctx, testutil.WaitLong))
metricsFamilies, err = registry.Gather()
require.NoError(t, err)
require.Empty(t, findAllMetricSeries(metricsFamilies, labels))

// When: a new template is created with the same name as the deleted template.
newTemplate := setupTemplateWithDeps()

// Ensure the database has both the new and old (delete) template.
{
deleted, err := db.GetTemplateByOrganizationAndName(ctx, database.GetTemplateByOrganizationAndNameParams{
OrganizationID: template.OrganizationID,
Deleted: true,
Name: template.Name,
})
require.NoError(t, err)
require.Equal(t, template.ID, deleted.ID)

current, err := db.GetTemplateByOrganizationAndName(ctx, database.GetTemplateByOrganizationAndNameParams{
// Use details from deleted template to ensure they're aligned.
OrganizationID: template.OrganizationID,
Deleted: false,
Name: template.Name,
})
require.NoError(t, err)
require.Equal(t, newTemplate.ID, current.ID)
}

// Then: metrics collect successfully.
require.NoError(t, collector.UpdateState(ctx, testutil.WaitLong))
metricsFamilies, err = registry.Gather()
require.NoError(t, err)
require.NotEmpty(t, findAllMetricSeries(metricsFamilies, labels))
}

func findMetric(metricsFamilies []*prometheus_client.MetricFamily, name string, labels map[string]string) *prometheus_client.Metric {
for _, metricFamily := range metricsFamilies {
if metricFamily.GetName() != name {
Expand DownExpand Up@@ -334,3 +451,33 @@ func findMetric(metricsFamilies []*prometheus_client.MetricFamily, name string,
}
return nil
}

// findAllMetricSeries finds all metrics with a given set of labels.
func findAllMetricSeries(metricsFamilies []*prometheus_client.MetricFamily, labels map[string]string) map[string]*prometheus_client.Metric {
series := make(map[string]*prometheus_client.Metric)
for _, metricFamily := range metricsFamilies {
for _, metric := range metricFamily.GetMetric() {
labelPairs := metric.GetLabel()

if len(labelPairs) != len(labels) {
continue
}

// Convert label pairs to map for easier lookup
metricLabels := make(map[string]string, len(labelPairs))
for _, label := range labelPairs {
metricLabels[label.GetName()] = label.GetValue()
}

// Check if all requested labels match
for wantName, wantValue := range labels {
if metricLabels[wantName] != wantValue {
continue
}
}

series[metricFamily.GetName()] = metric
}
}
return series
}
34 changes: 33 additions & 1 deletionenterprise/coderd/prebuilds/reconcile_test.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -294,10 +294,15 @@ func TestPrebuildReconciliation(t *testing.T) {
templateDeleted: []bool{false},
},
{
name: "delete prebuilds for deleted templates",
// Templates can be soft-deleted (`deleted=true`) or hard-deleted (row is removed).
// On the former there is *no* DB constraint to prevent soft deletion, so we have to ensure that if somehow
// the template was soft-deleted any running prebuilds will be removed.
// On the latter there is a DB constraint to prevent row deletion if any workspaces reference the deleting template.
name: "soft-deleted templates MAY have prebuilds",
prebuildLatestTransitions: []database.WorkspaceTransition{database.WorkspaceTransitionStart},
prebuildJobStatuses: []database.ProvisionerJobStatus{database.ProvisionerJobStatusSucceeded},
templateVersionActive: []bool{true, false},
shouldCreateNewPrebuild: ptr.To(false),
shouldDeleteOldPrebuild: ptr.To(true),
templateDeleted: []bool{true},
},
Expand DownExpand Up@@ -1060,6 +1065,33 @@ func setupTestDBTemplate(
return org, template
}

// nolint:revive // It's a control flag, but this is a test.
func setupTestDBTemplateWithinOrg(
t *testing.T,
db database.Store,
userID uuid.UUID,
templateDeleted bool,
templateName string,
org database.Organization,
) database.Template {
t.Helper()

template := dbgen.Template(t, db, database.Template{
Name: templateName,
CreatedBy: userID,
OrganizationID: org.ID,
CreatedAt: time.Now().Add(muchEarlier),
})
if templateDeleted {
ctx := testutil.Context(t, testutil.WaitShort)
require.NoError(t, db.UpdateTemplateDeletedByID(ctx, database.UpdateTemplateDeletedByIDParams{
ID: template.ID,
Deleted: true,
}))
}
return template
}

const (
earlier = -time.Hour
muchEarlier = -time.Hour * 2
Expand Down
Loading

[8]ページ先頭

©2009-2025 Movatter.jp