- Notifications
You must be signed in to change notification settings - Fork906
fix: exclude deleted templates from metrics collection#17839
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.
Already on GitHub?Sign in to your account
Uh oh!
There was an error while loading.Please reload this page.
Changes fromall commits
fa348d0
c062274
d28fb15
9ba369d
e42a083
d320cbc
4c1c469
9e8bf7e
b539ddc
9cc5049
File filter
Filter by extension
Conversations
Uh oh!
There was an error while loading.Please reload this page.
Jump to
Uh oh!
There was an error while loading.Please reload this page.
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more abouthow customized files appear on GitHub.
Uh oh!
There was an error while loading.Please reload this page.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -157,6 +157,10 @@ func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) { | ||
continue | ||
} | ||
if preset.Deleted { | ||
SasSwart marked this conversation as resolved. Show resolvedHide resolvedUh oh!There was an error while loading.Please reload this page. | ||
continue | ||
} | ||
presetSnapshot, err := currentState.snapshot.FilterByPreset(preset.ID) | ||
if err != nil { | ||
mc.logger.Error(context.Background(), "failed to filter by preset", slog.Error(err)) | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -19,6 +19,7 @@ import ( | ||
"github.com/coder/coder/v2/coderd/database/dbauthz" | ||
"github.com/coder/coder/v2/coderd/database/dbgen" | ||
"github.com/coder/coder/v2/coderd/database/dbtestutil" | ||
"github.com/coder/coder/v2/coderd/database/dbtime" | ||
agplprebuilds "github.com/coder/coder/v2/coderd/prebuilds" | ||
"github.com/coder/coder/v2/codersdk" | ||
"github.com/coder/coder/v2/enterprise/coderd/prebuilds" | ||
@@ -165,27 +166,12 @@ func TestMetricsCollector(t *testing.T) { | ||
eligible: []bool{false}, | ||
}, | ||
{ | ||
dannykopping marked this conversation as resolved. Show resolvedHide resolvedUh oh!There was an error while loading.Please reload this page. | ||
name: "deleted templates should not be included in exported metrics", | ||
transitions: allTransitions, | ||
jobStatuses: allJobStatuses, | ||
initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID}, | ||
ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID, uuid.New()}, | ||
metrics: nil, | ||
templateDeleted: []bool{true}, | ||
eligible: []bool{false}, | ||
}, | ||
@@ -281,6 +267,12 @@ func TestMetricsCollector(t *testing.T) { | ||
"organization_name": org.Name, | ||
} | ||
// If no expected metrics have been defined, ensure we don't find any metric series (i.e. metrics with given labels). | ||
if test.metrics == nil { | ||
series := findAllMetricSeries(metricsFamilies, labels) | ||
require.Empty(t, series) | ||
} | ||
for _, check := range test.metrics { | ||
metric := findMetric(metricsFamilies, check.name, labels) | ||
if check.value == nil { | ||
@@ -307,6 +299,131 @@ func TestMetricsCollector(t *testing.T) { | ||
} | ||
} | ||
// TestMetricsCollector_DuplicateTemplateNames validates a bug that we saw previously which caused duplicate metric series | ||
// registration when a template was deleted and a new one created with the same name (and preset name). | ||
SasSwart marked this conversation as resolved. Show resolvedHide resolvedUh oh!There was an error while loading.Please reload this page. | ||
// We are now excluding deleted templates from our metric collection. | ||
func TestMetricsCollector_DuplicateTemplateNames(t *testing.T) { | ||
t.Parallel() | ||
if !dbtestutil.WillUsePostgres() { | ||
t.Skip("this test requires postgres") | ||
dannykopping marked this conversation as resolved. Show resolvedHide resolvedUh oh!There was an error while loading.Please reload this page. | ||
} | ||
type metricCheck struct { | ||
name string | ||
value *float64 | ||
isCounter bool | ||
} | ||
type testCase struct { | ||
transition database.WorkspaceTransition | ||
jobStatus database.ProvisionerJobStatus | ||
initiatorID uuid.UUID | ||
ownerID uuid.UUID | ||
metrics []metricCheck | ||
eligible bool | ||
} | ||
test := testCase{ | ||
transition: database.WorkspaceTransitionStart, | ||
jobStatus: database.ProvisionerJobStatusSucceeded, | ||
initiatorID: agplprebuilds.SystemUserID, | ||
ownerID: agplprebuilds.SystemUserID, | ||
metrics: []metricCheck{ | ||
{prebuilds.MetricCreatedCount, ptr.To(1.0), true}, | ||
{prebuilds.MetricClaimedCount, ptr.To(0.0), true}, | ||
{prebuilds.MetricFailedCount, ptr.To(0.0), true}, | ||
{prebuilds.MetricDesiredGauge, ptr.To(1.0), false}, | ||
{prebuilds.MetricRunningGauge, ptr.To(1.0), false}, | ||
{prebuilds.MetricEligibleGauge, ptr.To(1.0), false}, | ||
}, | ||
eligible: true, | ||
} | ||
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}) | ||
clock := quartz.NewMock(t) | ||
db, pubsub := dbtestutil.NewDB(t) | ||
reconciler := prebuilds.NewStoreReconciler(db, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t), prometheus.NewRegistry(), newNoopEnqueuer()) | ||
ctx := testutil.Context(t, testutil.WaitLong) | ||
collector := prebuilds.NewMetricsCollector(db, logger, reconciler) | ||
registry := prometheus.NewPedanticRegistry() | ||
registry.Register(collector) | ||
presetName := "default-preset" | ||
defaultOrg := dbgen.Organization(t, db, database.Organization{}) | ||
setupTemplateWithDeps := func() database.Template { | ||
template := setupTestDBTemplateWithinOrg(t, db, test.ownerID, false, "default-template", defaultOrg) | ||
templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, pubsub, defaultOrg.ID, test.ownerID, template.ID) | ||
preset := setupTestDBPreset(t, db, templateVersionID, 1, "default-preset") | ||
workspace, _ := setupTestDBWorkspace( | ||
t, clock, db, pubsub, | ||
test.transition, test.jobStatus, defaultOrg.ID, preset, template.ID, templateVersionID, test.initiatorID, test.ownerID, | ||
) | ||
setupTestDBWorkspaceAgent(t, db, workspace.ID, test.eligible) | ||
return template | ||
} | ||
// When: starting with a regular template. | ||
template := setupTemplateWithDeps() | ||
labels := map[string]string{ | ||
"template_name": template.Name, | ||
"preset_name": presetName, | ||
"organization_name": defaultOrg.Name, | ||
} | ||
// nolint:gocritic // Authz context needed to retrieve state. | ||
ctx = dbauthz.AsPrebuildsOrchestrator(ctx) | ||
// Then: metrics collect successfully. | ||
require.NoError(t, collector.UpdateState(ctx, testutil.WaitLong)) | ||
metricsFamilies, err := registry.Gather() | ||
require.NoError(t, err) | ||
require.NotEmpty(t, findAllMetricSeries(metricsFamilies, labels)) | ||
// When: the template is deleted. | ||
require.NoError(t, db.UpdateTemplateDeletedByID(ctx, database.UpdateTemplateDeletedByIDParams{ | ||
ID: template.ID, | ||
Deleted: true, | ||
UpdatedAt: dbtime.Now(), | ||
})) | ||
// Then: metrics collect successfully but are empty because the template is deleted. | ||
SasSwart marked this conversation as resolved. Show resolvedHide resolvedUh oh!There was an error while loading.Please reload this page. | ||
require.NoError(t, collector.UpdateState(ctx, testutil.WaitLong)) | ||
metricsFamilies, err = registry.Gather() | ||
require.NoError(t, err) | ||
require.Empty(t, findAllMetricSeries(metricsFamilies, labels)) | ||
// When: a new template is created with the same name as the deleted template. | ||
newTemplate := setupTemplateWithDeps() | ||
// Ensure the database has both the new and old (delete) template. | ||
{ | ||
deleted, err := db.GetTemplateByOrganizationAndName(ctx, database.GetTemplateByOrganizationAndNameParams{ | ||
OrganizationID: template.OrganizationID, | ||
Deleted: true, | ||
Name: template.Name, | ||
}) | ||
require.NoError(t, err) | ||
require.Equal(t, template.ID, deleted.ID) | ||
current, err := db.GetTemplateByOrganizationAndName(ctx, database.GetTemplateByOrganizationAndNameParams{ | ||
// Use details from deleted template to ensure they're aligned. | ||
OrganizationID: template.OrganizationID, | ||
Deleted: false, | ||
Name: template.Name, | ||
}) | ||
require.NoError(t, err) | ||
require.Equal(t, newTemplate.ID, current.ID) | ||
} | ||
// Then: metrics collect successfully. | ||
require.NoError(t, collector.UpdateState(ctx, testutil.WaitLong)) | ||
metricsFamilies, err = registry.Gather() | ||
require.NoError(t, err) | ||
require.NotEmpty(t, findAllMetricSeries(metricsFamilies, labels)) | ||
} | ||
func findMetric(metricsFamilies []*prometheus_client.MetricFamily, name string, labels map[string]string) *prometheus_client.Metric { | ||
for _, metricFamily := range metricsFamilies { | ||
if metricFamily.GetName() != name { | ||
@@ -334,3 +451,33 @@ func findMetric(metricsFamilies []*prometheus_client.MetricFamily, name string, | ||
} | ||
return nil | ||
} | ||
// findAllMetricSeries finds all metrics with a given set of labels. | ||
func findAllMetricSeries(metricsFamilies []*prometheus_client.MetricFamily, labels map[string]string) map[string]*prometheus_client.Metric { | ||
series := make(map[string]*prometheus_client.Metric) | ||
for _, metricFamily := range metricsFamilies { | ||
for _, metric := range metricFamily.GetMetric() { | ||
labelPairs := metric.GetLabel() | ||
if len(labelPairs) != len(labels) { | ||
continue | ||
} | ||
// Convert label pairs to map for easier lookup | ||
metricLabels := make(map[string]string, len(labelPairs)) | ||
for _, label := range labelPairs { | ||
metricLabels[label.GetName()] = label.GetValue() | ||
} | ||
// Check if all requested labels match | ||
for wantName, wantValue := range labels { | ||
SasSwart marked this conversation as resolved. Show resolvedHide resolvedUh oh!There was an error while loading.Please reload this page. | ||
if metricLabels[wantName] != wantValue { | ||
continue | ||
} | ||
} | ||
series[metricFamily.GetName()] = metric | ||
} | ||
} | ||
return series | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -294,10 +294,15 @@ func TestPrebuildReconciliation(t *testing.T) { | ||
templateDeleted: []bool{false}, | ||
}, | ||
{ | ||
// Templates can be soft-deleted (`deleted=true`) or hard-deleted (row is removed). | ||
// On the former there is *no* DB constraint to prevent soft deletion, so we have to ensure that if somehow | ||
// the template was soft-deleted any running prebuilds will be removed. | ||
// On the latter there is a DB constraint to prevent row deletion if any workspaces reference the deleting template. | ||
name: "soft-deleted templates MAY have prebuilds", | ||
dannykopping marked this conversation as resolved. Show resolvedHide resolvedUh oh!There was an error while loading.Please reload this page. | ||
prebuildLatestTransitions: []database.WorkspaceTransition{database.WorkspaceTransitionStart}, | ||
prebuildJobStatuses: []database.ProvisionerJobStatus{database.ProvisionerJobStatusSucceeded}, | ||
templateVersionActive: []bool{true, false}, | ||
shouldCreateNewPrebuild: ptr.To(false), | ||
shouldDeleteOldPrebuild: ptr.To(true), | ||
templateDeleted: []bool{true}, | ||
}, | ||
@@ -1060,6 +1065,33 @@ func setupTestDBTemplate( | ||
return org, template | ||
} | ||
// nolint:revive // It's a control flag, but this is a test. | ||
func setupTestDBTemplateWithinOrg( | ||
t *testing.T, | ||
db database.Store, | ||
userID uuid.UUID, | ||
templateDeleted bool, | ||
templateName string, | ||
org database.Organization, | ||
) database.Template { | ||
t.Helper() | ||
template := dbgen.Template(t, db, database.Template{ | ||
Name: templateName, | ||
CreatedBy: userID, | ||
OrganizationID: org.ID, | ||
CreatedAt: time.Now().Add(muchEarlier), | ||
}) | ||
if templateDeleted { | ||
ctx := testutil.Context(t, testutil.WaitShort) | ||
require.NoError(t, db.UpdateTemplateDeletedByID(ctx, database.UpdateTemplateDeletedByIDParams{ | ||
ID: template.ID, | ||
Deleted: true, | ||
})) | ||
} | ||
return template | ||
} | ||
const ( | ||
earlier = -time.Hour | ||
muchEarlier = -time.Hour * 2 | ||
Uh oh!
There was an error while loading.Please reload this page.