Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit53e8e9c

Browse files
fix: reduce cost of prebuild failure (#17697)
Relates to#17432### Part 1:Notes:- `GetPresetsAtFailureLimit` SQL query is added, which is similar to`GetPresetsBackoff`, they use same CTEs: `filtered_builds`,`time_sorted_builds`, but they are still different.- Query is executed on every loop iteration. We can consider markingspecific preset as permanently failed as an optimization to avoidexecuting query on every loop iteration. But I decided don't do it fornow.- By default `FailureHardLimit` is set to 3.- `FailureHardLimit` is configurable. Setting it to zero - means thathard limit is disabled.### Part 2Notes:- `PrebuildFailureLimitReached` notification is added.- Notification is sent to template admins.- Notification is sent only the first time, when hard limit is reached.But it will `log.Warn` on every loop iteration.- I introduced this enum:```sqlCREATE TYPE prebuild_status AS ENUM ( 'normal', -- Prebuilds are working as expected; this is the default, healthy state. 'hard_limited', -- Prebuilds have failed repeatedly and hit the configured hard failure limit; won't be retried anymore. 'validation_failed' -- Prebuilds failed due to a non-retryable validation error (e.g. template misconfiguration); won't be retried.);````validation_failed` not used in this PR, but I think it will be used innext one, so I wanted to save us an extra migration.- Notification looks like this:<img width="472" alt="image"src="https://github.com/user-attachments/assets/e10efea0-1790-4e7f-a65c-f94c40fced27"/>### Latest notification views:<img width="463" alt="image"src="https://github.com/user-attachments/assets/11310c58-68d1-4075-a497-f76d854633fe"/><img width="725" alt="image"src="https://github.com/user-attachments/assets/6bbfe21a-91ac-47c3-a9d1-21807bb0c53a"/>
1 parente1934fe commit53e8e9c

File tree

32 files changed

+1160
-60
lines changed

32 files changed

+1160
-60
lines changed

‎cli/testdata/server-config.yaml.golden

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -704,3 +704,7 @@ workspace_prebuilds:
704704
# backoff.
705705
# (default: 1h0m0s, type: duration)
706706
reconciliation_backoff_lookback_period: 1h0m0s
707+
# Maximum number of consecutive failed prebuilds before a preset hits the hard
708+
# limit; disabled when set to zero.
709+
# (default: 3, type: int)
710+
failure_hard_limit: 3

‎coderd/apidoc/docs.go

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more aboutcustomizing how changed files appear on GitHub.

‎coderd/apidoc/swagger.json

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more aboutcustomizing how changed files appear on GitHub.

‎coderd/database/dbauthz/dbauthz.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2226,6 +2226,15 @@ func (q *querier) GetPresetParametersByTemplateVersionID(ctx context.Context, ar
22262226
returnq.db.GetPresetParametersByTemplateVersionID(ctx,args)
22272227
}
22282228

2229+
func (q*querier)GetPresetsAtFailureLimit(ctx context.Context,hardLimitint64) ([]database.GetPresetsAtFailureLimitRow,error) {
2230+
// GetPresetsAtFailureLimit returns a list of template version presets that have reached the hard failure limit.
2231+
// Request the same authorization permissions as GetPresetsBackoff, since the methods are similar.
2232+
iferr:=q.authorizeContext(ctx,policy.ActionViewInsights,rbac.ResourceTemplate.All());err!=nil {
2233+
returnnil,err
2234+
}
2235+
returnq.db.GetPresetsAtFailureLimit(ctx,hardLimit)
2236+
}
2237+
22292238
func (q*querier)GetPresetsBackoff(ctx context.Context,lookback time.Time) ([]database.GetPresetsBackoffRow,error) {
22302239
// GetPresetsBackoff returns a list of template version presets along with metadata such as the number of failed prebuilds.
22312240
iferr:=q.authorizeContext(ctx,policy.ActionViewInsights,rbac.ResourceTemplate.All());err!=nil {
@@ -4201,6 +4210,24 @@ func (q *querier) UpdateOrganizationDeletedByID(ctx context.Context, arg databas
42014210
returndeleteQ(q.log,q.auth,q.db.GetOrganizationByID,deleteF)(ctx,arg.ID)
42024211
}
42034212

4213+
func (q*querier)UpdatePresetPrebuildStatus(ctx context.Context,arg database.UpdatePresetPrebuildStatusParams)error {
4214+
preset,err:=q.db.GetPresetByID(ctx,arg.PresetID)
4215+
iferr!=nil {
4216+
returnerr
4217+
}
4218+
4219+
object:=rbac.ResourceTemplate.
4220+
WithID(preset.TemplateID.UUID).
4221+
InOrg(preset.OrganizationID)
4222+
4223+
err=q.authorizeContext(ctx,policy.ActionUpdate,object)
4224+
iferr!=nil {
4225+
returnerr
4226+
}
4227+
4228+
returnq.db.UpdatePresetPrebuildStatus(ctx,arg)
4229+
}
4230+
42044231
func (q*querier)UpdateProvisionerDaemonLastSeenAt(ctx context.Context,arg database.UpdateProvisionerDaemonLastSeenAtParams)error {
42054232
iferr:=q.authorizeContext(ctx,policy.ActionUpdate,rbac.ResourceProvisionerDaemon);err!=nil {
42064233
returnerr

‎coderd/database/dbauthz/dbauthz_test.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4924,6 +4924,11 @@ func (s *MethodTestSuite) TestPrebuilds() {
49244924
Asserts(rbac.ResourceWorkspace.All(),policy.ActionRead).
49254925
ErrorsWithInMemDB(dbmem.ErrUnimplemented)
49264926
}))
4927+
s.Run("GetPresetsAtFailureLimit",s.Subtest(func(_ database.Store,check*expects) {
4928+
check.Args(int64(0)).
4929+
Asserts(rbac.ResourceTemplate.All(),policy.ActionViewInsights).
4930+
ErrorsWithInMemDB(dbmem.ErrUnimplemented)
4931+
}))
49274932
s.Run("GetPresetsBackoff",s.Subtest(func(_ database.Store,check*expects) {
49284933
check.Args(time.Time{}).
49294934
Asserts(rbac.ResourceTemplate.All(),policy.ActionViewInsights).
@@ -4971,8 +4976,34 @@ func (s *MethodTestSuite) TestPrebuilds() {
49714976
},
49724977
InvalidateAfterSecs:preset.InvalidateAfterSecs,
49734978
OrganizationID:org.ID,
4979+
PrebuildStatus:database.PrebuildStatusHealthy,
49744980
})
49754981
}))
4982+
s.Run("UpdatePresetPrebuildStatus",s.Subtest(func(db database.Store,check*expects) {
4983+
org:=dbgen.Organization(s.T(),db, database.Organization{})
4984+
user:=dbgen.User(s.T(),db, database.User{})
4985+
template:=dbgen.Template(s.T(),db, database.Template{
4986+
OrganizationID:org.ID,
4987+
CreatedBy:user.ID,
4988+
})
4989+
templateVersion:=dbgen.TemplateVersion(s.T(),db, database.TemplateVersion{
4990+
TemplateID: uuid.NullUUID{
4991+
UUID:template.ID,
4992+
Valid:true,
4993+
},
4994+
OrganizationID:org.ID,
4995+
CreatedBy:user.ID,
4996+
})
4997+
preset:=dbgen.Preset(s.T(),db, database.InsertPresetParams{
4998+
TemplateVersionID:templateVersion.ID,
4999+
})
5000+
req:= database.UpdatePresetPrebuildStatusParams{
5001+
PresetID:preset.ID,
5002+
Status:database.PrebuildStatusHealthy,
5003+
}
5004+
check.Args(req).
5005+
Asserts(rbac.ResourceTemplate.WithID(template.ID).InOrg(org.ID),policy.ActionUpdate)
5006+
}))
49765007
}
49775008

49785009
func (s*MethodTestSuite)TestOAuth2ProviderApps() {

‎coderd/database/dbmem/dbmem.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4287,6 +4287,7 @@ func (q *FakeQuerier) GetPresetByID(ctx context.Context, presetID uuid.UUID) (da
42874287
CreatedAt:preset.CreatedAt,
42884288
DesiredInstances:preset.DesiredInstances,
42894289
InvalidateAfterSecs:preset.InvalidateAfterSecs,
4290+
PrebuildStatus:preset.PrebuildStatus,
42904291
TemplateID:tv.TemplateID,
42914292
OrganizationID:tv.OrganizationID,
42924293
},nil
@@ -4352,6 +4353,10 @@ func (q *FakeQuerier) GetPresetParametersByTemplateVersionID(_ context.Context,
43524353
returnparameters,nil
43534354
}
43544355

4356+
func (q*FakeQuerier)GetPresetsAtFailureLimit(ctx context.Context,hardLimitint64) ([]database.GetPresetsAtFailureLimitRow,error) {
4357+
returnnil,ErrUnimplemented
4358+
}
4359+
43554360
func (*FakeQuerier)GetPresetsBackoff(_ context.Context,_ time.Time) ([]database.GetPresetsBackoffRow,error) {
43564361
returnnil,ErrUnimplemented
43574362
}
@@ -9089,6 +9094,7 @@ func (q *FakeQuerier) InsertPreset(_ context.Context, arg database.InsertPresetP
90899094
Int32:0,
90909095
Valid:true,
90919096
},
9097+
PrebuildStatus:database.PrebuildStatusHealthy,
90929098
}
90939099
q.presets=append(q.presets,preset)
90949100
returnpreset,nil
@@ -10917,6 +10923,25 @@ func (q *FakeQuerier) UpdateOrganizationDeletedByID(_ context.Context, arg datab
1091710923
returnsql.ErrNoRows
1091810924
}
1091910925

10926+
func (q*FakeQuerier)UpdatePresetPrebuildStatus(ctx context.Context,arg database.UpdatePresetPrebuildStatusParams)error {
10927+
err:=validateDatabaseType(arg)
10928+
iferr!=nil {
10929+
returnerr
10930+
}
10931+
10932+
q.mutex.RLock()
10933+
deferq.mutex.RUnlock()
10934+
10935+
for_,preset:=rangeq.presets {
10936+
ifpreset.ID==arg.PresetID {
10937+
preset.PrebuildStatus=arg.Status
10938+
returnnil
10939+
}
10940+
}
10941+
10942+
returnxerrors.Errorf("preset %v does not exist",arg.PresetID)
10943+
}
10944+
1092010945
func (q*FakeQuerier)UpdateProvisionerDaemonLastSeenAt(_ context.Context,arg database.UpdateProvisionerDaemonLastSeenAtParams)error {
1092110946
err:=validateDatabaseType(arg)
1092210947
iferr!=nil {

‎coderd/database/dbmetrics/querymetrics.go

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more aboutcustomizing how changed files appear on GitHub.

‎coderd/database/dbmock/dbmock.go

Lines changed: 29 additions & 0 deletions
Some generated files are not rendered by default. Learn more aboutcustomizing how changed files appear on GitHub.

‎coderd/database/dump.sql

Lines changed: 8 additions & 1 deletion
Some generated files are not rendered by default. Learn more aboutcustomizing how changed files appear on GitHub.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
DELETEFROM notification_templatesWHERE id='414d9331-c1fc-4761-b40c-d1f4702279eb';
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
INSERT INTO notification_templates
2+
(id, name, title_template, body_template,"group", actions)
3+
VALUES ('414d9331-c1fc-4761-b40c-d1f4702279eb',
4+
'Prebuild Failure Limit Reached',
5+
E'There is a problem creating prebuilt workspaces',
6+
$$
7+
Thenumber of failed prebuild attempts has reached the hardlimit for template**{{ .Labels.template }}**and preset**{{ .Labels.preset }}**.
8+
9+
To resume prebuilds, fix the underlying issueand upload a new template version.
10+
11+
Refer to the documentation for more details:
12+
- [Troubleshooting templates](https://coder.com/docs/admin/templates/troubleshooting)
13+
- [Troubleshooting of prebuilt workspaces](https://coder.com/docs/admin/templates/extending-templates/prebuilt-workspaces#administration-and-troubleshooting)
14+
$$,
15+
'Template Events',
16+
'[
17+
{
18+
"label": "View failed prebuilt workspaces",
19+
"url": "{{base_url}}/workspaces?filter=owner:prebuilds+status:failed+template:{{.Labels.template}}"
20+
},
21+
{
22+
"label": "View template version",
23+
"url": "{{base_url}}/templates/{{.Labels.org}}/{{.Labels.template}}/versions/{{.Labels.template_version}}"
24+
}
25+
]'::jsonb);
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
-- Remove the column from the table first (must happen before dropping the enum type)
2+
ALTERTABLE template_version_presets DROP COLUMN prebuild_status;
3+
4+
-- Then drop the enum type
5+
DROPTYPE prebuild_status;
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
CREATETYPEprebuild_statusAS ENUM (
2+
'healthy',-- Prebuilds are working as expected; this is the default, healthy state.
3+
'hard_limited',-- Prebuilds have failed repeatedly and hit the configured hard failure limit; won't be retried anymore.
4+
'validation_failed'-- Prebuilds failed due to a non-retryable validation error (e.g. template misconfiguration); won't be retried.
5+
);
6+
7+
ALTERTABLE template_version_presets ADD COLUMN prebuild_status prebuild_statusNOT NULL DEFAULT'healthy'::prebuild_status;

‎coderd/database/models.go

Lines changed: 68 additions & 6 deletions
Some generated files are not rendered by default. Learn more aboutcustomizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp