@@ -31,9 +31,14 @@ const (
3131// PresetSnapshot is a filtered view of GlobalSnapshot focused on a single preset.
3232// It contains the raw data needed to calculate the current state of a preset's prebuilds,
3333// including running prebuilds, in-progress builds, and backoff information.
34+ // - Running: prebuilds running and non-expired
35+ // - Expired: prebuilds running and expired due to the preset's TTL
36+ // - InProgress: prebuilds currently in progress
37+ // - Backoff: holds failure info to decide if prebuild creation should be backed off
3438type PresetSnapshot struct {
3539Preset database.GetTemplatePresetsWithPrebuildsRow
3640Running []database.GetRunningPrebuiltWorkspacesRow
41+ Expired []database.GetRunningPrebuiltWorkspacesRow
3742InProgress []database.CountInProgressPrebuildsRow
3843Backoff * database.GetPresetsBackoffRow
3944IsHardLimited bool
@@ -43,10 +48,11 @@ type PresetSnapshot struct {
4348// calculated from a PresetSnapshot. While PresetSnapshot contains raw data,
4449// ReconciliationState contains derived metrics that are directly used to
4550// determine what actions are needed (create, delete, or backoff).
46- // For example, it calculates how many prebuilds areeligible, how many are
47- // extraneous, and how many are in various transition states.
51+ // For example, it calculates how many prebuilds areexpired, eligible,
52+ //how many are extraneous, and how many are in various transition states.
4853type ReconciliationState struct {
49- Actual int32 // Number of currently running prebuilds
54+ Actual int32 // Number of currently running prebuilds, i.e., non-expired, expired and extraneous prebuilds
55+ Expired int32 // Number of currently running prebuilds that exceeded their allowed time-to-live (TTL)
5056Desired int32 // Number of prebuilds desired as defined in the preset
5157Eligible int32 // Number of prebuilds that are ready to be claimed
5258Extraneous int32 // Number of extra running prebuilds beyond the desired count
@@ -78,7 +84,8 @@ func (ra *ReconciliationActions) IsNoop() bool {
7884}
7985
8086// CalculateState computes the current state of prebuilds for a preset, including:
81- // - Actual: Number of currently running prebuilds
87+ // - Actual: Number of currently running prebuilds, i.e., non-expired and expired prebuilds
88+ // - Expired: Number of currently running expired prebuilds
8289// - Desired: Number of prebuilds desired as defined in the preset
8390// - Eligible: Number of prebuilds that are ready to be claimed
8491// - Extraneous: Number of extra running prebuilds beyond the desired count
@@ -92,23 +99,28 @@ func (p PresetSnapshot) CalculateState() *ReconciliationState {
9299var (
93100actual int32
94101desired int32
102+ expired int32
95103eligible int32
96104extraneous int32
97105)
98106
99- // #nosec G115 - Safe conversion as p.Running slice length is expected to be within int32 range
100- actual = int32 (len (p .Running ))
107+ // #nosec G115 - Safe conversion as p.Running and p.Expired slice length is expected to be within int32 range
108+ actual = int32 (len (p .Running )+ len (p .Expired ))
109+
110+ // #nosec G115 - Safe conversion as p.Expired slice length is expected to be within int32 range
111+ expired = int32 (len (p .Expired ))
101112
102113if p .isActive () {
103114desired = p .Preset .DesiredInstances .Int32
104115eligible = p .countEligible ()
105- extraneous = max (actual - desired ,0 )
116+ extraneous = max (actual - expired - desired ,0 )
106117}
107118
108119starting ,stopping ,deleting := p .countInProgress ()
109120
110121return & ReconciliationState {
111122Actual :actual ,
123+ Expired :expired ,
112124Desired :desired ,
113125Eligible :eligible ,
114126Extraneous :extraneous ,
@@ -126,14 +138,15 @@ func (p PresetSnapshot) CalculateState() *ReconciliationState {
126138// 3. For active presets, it calculates the number of prebuilds to create or delete based on:
127139// - The desired number of instances
128140// - Currently running prebuilds
141+ // - Currently running expired prebuilds
129142// - Prebuilds in transition states (starting/stopping/deleting)
130143// - Any extraneous prebuilds that need to be removed
131144//
132145// The function returns a ReconciliationActions struct that will have exactly one action type set:
133146// - ActionTypeBackoff: Only BackoffUntil is set, indicating when to retry
134147// - ActionTypeCreate: Only Create is set, indicating how many prebuilds to create
135148// - ActionTypeDelete: Only DeleteIDs is set, containing IDs of prebuilds to delete
136- func (p PresetSnapshot )CalculateActions (clock quartz.Clock ,backoffInterval time.Duration ) (* ReconciliationActions ,error ) {
149+ func (p PresetSnapshot )CalculateActions (clock quartz.Clock ,backoffInterval time.Duration ) ([] * ReconciliationActions ,error ) {
137150// TODO: align workspace states with how we represent them on the FE and the CLI
138151// right now there's some slight differences which can lead to additional prebuilds being created
139152
@@ -158,45 +171,77 @@ func (p PresetSnapshot) isActive() bool {
158171return p .Preset .UsingActiveVersion && ! p .Preset .Deleted && ! p .Preset .Deprecated
159172}
160173
161- // handleActiveTemplateVersion deletes excess prebuilds if there are too many,
162- // otherwise creates new ones to reach the desired count.
163- func (p PresetSnapshot )handleActiveTemplateVersion () (* ReconciliationActions ,error ) {
174+ // handleActiveTemplateVersion determines the reconciliation actions for a preset with an active template version.
175+ // It ensures the system moves towards the desired number of healthy prebuilds.
176+ //
177+ // The reconciliation follows this order:
178+ // 1. Delete expired prebuilds: These are no longer valid and must be removed first.
179+ // 2. Delete extraneous prebuilds: After expired ones are removed, if the number of running non-expired prebuilds
180+ // still exceeds the desired count, the oldest prebuilds are deleted to reduce excess.
181+ // 3. Create missing prebuilds: If the number of non-expired, non-starting prebuilds is still below the desired count,
182+ // create the necessary number of prebuilds to reach the target.
183+ //
184+ // The function returns a list of actions to be executed to achieve the desired state.
185+ func (p PresetSnapshot )handleActiveTemplateVersion () (actions []* ReconciliationActions ,err error ) {
164186state := p .CalculateState ()
165187
166- // If we have more prebuilds than desired, delete the oldest ones
188+ // If we have expired prebuilds, delete them
189+ if state .Expired > 0 {
190+ var deleteIDs []uuid.UUID
191+ for _ ,expired := range p .Expired {
192+ deleteIDs = append (deleteIDs ,expired .ID )
193+ }
194+ actions = append (actions ,
195+ & ReconciliationActions {
196+ ActionType :ActionTypeDelete ,
197+ DeleteIDs :deleteIDs ,
198+ })
199+ }
200+
201+ // If we still have more prebuilds than desired, delete the oldest ones
167202if state .Extraneous > 0 {
168- return & ReconciliationActions {
169- ActionType :ActionTypeDelete ,
170- DeleteIDs :p .getOldestPrebuildIDs (int (state .Extraneous )),
171- },nil
203+ actions = append (actions ,
204+ & ReconciliationActions {
205+ ActionType :ActionTypeDelete ,
206+ DeleteIDs :p .getOldestPrebuildIDs (int (state .Extraneous )),
207+ })
172208}
173209
210+ // Number of running prebuilds excluding the recently deleted Expired
211+ runningValid := state .Actual - state .Expired
212+
174213// Calculate how many new prebuilds we need to create
175214// We subtract starting prebuilds since they're already being created
176- prebuildsToCreate := max (state .Desired - state .Actual - state .Starting ,0 )
215+ prebuildsToCreate := max (state .Desired - runningValid - state .Starting ,0 )
216+ if prebuildsToCreate > 0 {
217+ actions = append (actions ,
218+ & ReconciliationActions {
219+ ActionType :ActionTypeCreate ,
220+ Create :prebuildsToCreate ,
221+ })
222+ }
177223
178- return & ReconciliationActions {
179- ActionType :ActionTypeCreate ,
180- Create :prebuildsToCreate ,
181- },nil
224+ return actions ,nil
182225}
183226
184227// handleInactiveTemplateVersion deletes all running prebuilds except those already being deleted
185228// to avoid duplicate deletion attempts.
186- func (p PresetSnapshot )handleInactiveTemplateVersion () (* ReconciliationActions ,error ) {
229+ func (p PresetSnapshot )handleInactiveTemplateVersion () ([] * ReconciliationActions ,error ) {
187230prebuildsToDelete := len (p .Running )
188231deleteIDs := p .getOldestPrebuildIDs (prebuildsToDelete )
189232
190- return & ReconciliationActions {
191- ActionType :ActionTypeDelete ,
192- DeleteIDs :deleteIDs ,
233+ return []* ReconciliationActions {
234+ {
235+ ActionType :ActionTypeDelete ,
236+ DeleteIDs :deleteIDs ,
237+ },
193238},nil
194239}
195240
196241// needsBackoffPeriod checks if we should delay prebuild creation due to recent failures.
197242// If there were failures, it calculates a backoff period based on the number of failures
198243// and returns true if we're still within that period.
199- func (p PresetSnapshot )needsBackoffPeriod (clock quartz.Clock ,backoffInterval time.Duration ) (* ReconciliationActions ,bool ) {
244+ func (p PresetSnapshot )needsBackoffPeriod (clock quartz.Clock ,backoffInterval time.Duration ) ([] * ReconciliationActions ,bool ) {
200245if p .Backoff == nil || p .Backoff .NumFailed == 0 {
201246return nil ,false
202247}
@@ -205,9 +250,11 @@ func (p PresetSnapshot) needsBackoffPeriod(clock quartz.Clock, backoffInterval t
205250return nil ,false
206251}
207252
208- return & ReconciliationActions {
209- ActionType :ActionTypeBackoff ,
210- BackoffUntil :backoffUntil ,
253+ return []* ReconciliationActions {
254+ {
255+ ActionType :ActionTypeBackoff ,
256+ BackoffUntil :backoffUntil ,
257+ },
211258},true
212259}
213260