@@ -31,9 +31,14 @@ const (
31
31
// PresetSnapshot is a filtered view of GlobalSnapshot focused on a single preset.
32
32
// It contains the raw data needed to calculate the current state of a preset's prebuilds,
33
33
// including running prebuilds, in-progress builds, and backoff information.
34
+ // - Running: prebuilds running and non-expired
35
+ // - Expired: prebuilds running and expired due to the preset's TTL
36
+ // - InProgress: prebuilds currently in progress
37
+ // - Backoff: holds failure info to decide if prebuild creation should be backed off
34
38
type PresetSnapshot struct {
35
39
Preset database.GetTemplatePresetsWithPrebuildsRow
36
40
Running []database.GetRunningPrebuiltWorkspacesRow
41
+ Expired []database.GetRunningPrebuiltWorkspacesRow
37
42
InProgress []database.CountInProgressPrebuildsRow
38
43
Backoff * database.GetPresetsBackoffRow
39
44
IsHardLimited bool
@@ -43,10 +48,11 @@ type PresetSnapshot struct {
43
48
// calculated from a PresetSnapshot. While PresetSnapshot contains raw data,
44
49
// ReconciliationState contains derived metrics that are directly used to
45
50
// determine what actions are needed (create, delete, or backoff).
46
- // For example, it calculates how many prebuilds areeligible, how many are
47
- // extraneous, and how many are in various transition states.
51
+ // For example, it calculates how many prebuilds areexpired, eligible,
52
+ //how many are extraneous, and how many are in various transition states.
48
53
type ReconciliationState struct {
49
- Actual int32 // Number of currently running prebuilds
54
+ Actual int32 // Number of currently valid running prebuilds, i.e., non-expired prebuilds
55
+ Expired int32 // Number of currently running prebuilds that exceeded their allowed time-to-live (TTL)
50
56
Desired int32 // Number of prebuilds desired as defined in the preset
51
57
Eligible int32 // Number of prebuilds that are ready to be claimed
52
58
Extraneous int32 // Number of extra running prebuilds beyond the desired count
@@ -78,7 +84,8 @@ func (ra *ReconciliationActions) IsNoop() bool {
78
84
}
79
85
80
86
// CalculateState computes the current state of prebuilds for a preset, including:
81
- // - Actual: Number of currently running prebuilds
87
+ // - Actual: Number of currently valid running prebuilds, i.e., non-expired prebuilds
88
+ // - Expired: Number of currently running expired prebuilds
82
89
// - Desired: Number of prebuilds desired as defined in the preset
83
90
// - Eligible: Number of prebuilds that are ready to be claimed
84
91
// - Extraneous: Number of extra running prebuilds beyond the desired count
@@ -92,13 +99,17 @@ func (p PresetSnapshot) CalculateState() *ReconciliationState {
92
99
var (
93
100
actual int32
94
101
desired int32
102
+ expired int32
95
103
eligible int32
96
104
extraneous int32
97
105
)
98
106
99
107
// #nosec G115 - Safe conversion as p.Running slice length is expected to be within int32 range
100
108
actual = int32 (len (p .Running ))
101
109
110
+ // #nosec G115 - Safe conversion as p.Expired slice length is expected to be within int32 range
111
+ expired = int32 (len (p .Expired ))
112
+
102
113
if p .isActive () {
103
114
desired = p .Preset .DesiredInstances .Int32
104
115
eligible = p .countEligible ()
@@ -109,6 +120,7 @@ func (p PresetSnapshot) CalculateState() *ReconciliationState {
109
120
110
121
return & ReconciliationState {
111
122
Actual :actual ,
123
+ Expired :expired ,
112
124
Desired :desired ,
113
125
Eligible :eligible ,
114
126
Extraneous :extraneous ,
@@ -125,15 +137,16 @@ func (p PresetSnapshot) CalculateState() *ReconciliationState {
125
137
// 2. If the preset is inactive (template version is not active), it will delete all running prebuilds
126
138
// 3. For active presets, it calculates the number of prebuilds to create or delete based on:
127
139
// - The desired number of instances
128
- // - Currently running prebuilds
140
+ // - Currently running non-expired prebuilds
141
+ // - Currently running expired prebuilds
129
142
// - Prebuilds in transition states (starting/stopping/deleting)
130
143
// - Any extraneous prebuilds that need to be removed
131
144
//
132
145
// The function returns a ReconciliationActions struct that will have exactly one action type set:
133
146
// - ActionTypeBackoff: Only BackoffUntil is set, indicating when to retry
134
147
// - ActionTypeCreate: Only Create is set, indicating how many prebuilds to create
135
148
// - ActionTypeDelete: Only DeleteIDs is set, containing IDs of prebuilds to delete
136
- func (p PresetSnapshot )CalculateActions (clock quartz.Clock ,backoffInterval time.Duration ) (* ReconciliationActions ,error ) {
149
+ func (p PresetSnapshot )CalculateActions (clock quartz.Clock ,backoffInterval time.Duration ) ([] * ReconciliationActions ,error ) {
137
150
// TODO: align workspace states with how we represent them on the FE and the CLI
138
151
// right now there's some slight differences which can lead to additional prebuilds being created
139
152
@@ -158,45 +171,74 @@ func (p PresetSnapshot) isActive() bool {
158
171
return p .Preset .UsingActiveVersion && ! p .Preset .Deleted && ! p .Preset .Deprecated
159
172
}
160
173
161
- // handleActiveTemplateVersion deletes excess prebuilds if there are too many,
162
- // otherwise creates new ones to reach the desired count.
163
- func (p PresetSnapshot )handleActiveTemplateVersion () (* ReconciliationActions ,error ) {
174
+ // handleActiveTemplateVersion determines the reconciliation actions for a preset with an active template version.
175
+ // It ensures the system moves towards the desired number of healthy prebuilds.
176
+ //
177
+ // The reconciliation follows this order:
178
+ // 1. Delete expired prebuilds: These are no longer valid and must be removed first.
179
+ // 2. Delete extraneous prebuilds: After expired ones are removed, if the number of running prebuilds (excluding expired)
180
+ // still exceeds the desired count, the oldest prebuilds are deleted to reduce excess.
181
+ // 3. Create missing prebuilds: If the number of non-expired, non-starting prebuilds is still below the desired count,
182
+ // create the necessary number of prebuilds to reach the target.
183
+ //
184
+ // The function returns a list of actions to be executed to achieve the desired state.
185
+ func (p PresetSnapshot )handleActiveTemplateVersion () (actions []* ReconciliationActions ,err error ) {
164
186
state := p .CalculateState ()
165
187
166
- // If we have more prebuilds than desired, delete the oldest ones
188
+ // If we have expired prebuilds, delete them
189
+ if state .Expired > 0 {
190
+ var deleteIDs []uuid.UUID
191
+ for _ ,expired := range p .Expired {
192
+ deleteIDs = append (deleteIDs ,expired .ID )
193
+ }
194
+ actions = append (actions ,
195
+ & ReconciliationActions {
196
+ ActionType :ActionTypeDelete ,
197
+ DeleteIDs :deleteIDs ,
198
+ })
199
+ }
200
+
201
+ // If we still have more prebuilds than desired, delete the oldest ones
167
202
if state .Extraneous > 0 {
168
- return & ReconciliationActions {
169
- ActionType :ActionTypeDelete ,
170
- DeleteIDs :p .getOldestPrebuildIDs (int (state .Extraneous )),
171
- },nil
203
+ actions = append (actions ,
204
+ & ReconciliationActions {
205
+ ActionType :ActionTypeDelete ,
206
+ DeleteIDs :p .getOldestPrebuildIDs (int (state .Extraneous )),
207
+ })
172
208
}
173
209
174
210
// Calculate how many new prebuilds we need to create
175
211
// We subtract starting prebuilds since they're already being created
176
212
prebuildsToCreate := max (state .Desired - state .Actual - state .Starting ,0 )
213
+ if prebuildsToCreate > 0 {
214
+ actions = append (actions ,
215
+ & ReconciliationActions {
216
+ ActionType :ActionTypeCreate ,
217
+ Create :prebuildsToCreate ,
218
+ })
219
+ }
177
220
178
- return & ReconciliationActions {
179
- ActionType :ActionTypeCreate ,
180
- Create :prebuildsToCreate ,
181
- },nil
221
+ return actions ,nil
182
222
}
183
223
184
224
// handleInactiveTemplateVersion deletes all running prebuilds except those already being deleted
185
225
// to avoid duplicate deletion attempts.
186
- func (p PresetSnapshot )handleInactiveTemplateVersion () (* ReconciliationActions ,error ) {
226
+ func (p PresetSnapshot )handleInactiveTemplateVersion () ([] * ReconciliationActions ,error ) {
187
227
prebuildsToDelete := len (p .Running )
188
228
deleteIDs := p .getOldestPrebuildIDs (prebuildsToDelete )
189
229
190
- return & ReconciliationActions {
191
- ActionType :ActionTypeDelete ,
192
- DeleteIDs :deleteIDs ,
230
+ return []* ReconciliationActions {
231
+ {
232
+ ActionType :ActionTypeDelete ,
233
+ DeleteIDs :deleteIDs ,
234
+ },
193
235
},nil
194
236
}
195
237
196
238
// needsBackoffPeriod checks if we should delay prebuild creation due to recent failures.
197
239
// If there were failures, it calculates a backoff period based on the number of failures
198
240
// and returns true if we're still within that period.
199
- func (p PresetSnapshot )needsBackoffPeriod (clock quartz.Clock ,backoffInterval time.Duration ) (* ReconciliationActions ,bool ) {
241
+ func (p PresetSnapshot )needsBackoffPeriod (clock quartz.Clock ,backoffInterval time.Duration ) ([] * ReconciliationActions ,bool ) {
200
242
if p .Backoff == nil || p .Backoff .NumFailed == 0 {
201
243
return nil ,false
202
244
}
@@ -205,9 +247,11 @@ func (p PresetSnapshot) needsBackoffPeriod(clock quartz.Clock, backoffInterval t
205
247
return nil ,false
206
248
}
207
249
208
- return & ReconciliationActions {
209
- ActionType :ActionTypeBackoff ,
210
- BackoffUntil :backoffUntil ,
250
+ return []* ReconciliationActions {
251
+ {
252
+ ActionType :ActionTypeBackoff ,
253
+ BackoffUntil :backoffUntil ,
254
+ },
211
255
},true
212
256
}
213
257