@@ -31,9 +31,14 @@ const (
3131// PresetSnapshot is a filtered view of GlobalSnapshot focused on a single preset.
3232// It contains the raw data needed to calculate the current state of a preset's prebuilds,
3333// including running prebuilds, in-progress builds, and backoff information.
34+ // - Running: prebuilds running and non-expired
35+ // - Expired: prebuilds running and expired due to the preset's TTL
36+ // - InProgress: prebuilds currently in progress
37+ // - Backoff: holds failure info to decide if prebuild creation should be backed off
3438type PresetSnapshot struct {
3539Preset database.GetTemplatePresetsWithPrebuildsRow
3640Running []database.GetRunningPrebuiltWorkspacesRow
41+ Expired []database.GetRunningPrebuiltWorkspacesRow
3742InProgress []database.CountInProgressPrebuildsRow
3843Backoff * database.GetPresetsBackoffRow
3944IsHardLimited bool
@@ -43,10 +48,11 @@ type PresetSnapshot struct {
4348// calculated from a PresetSnapshot. While PresetSnapshot contains raw data,
4449// ReconciliationState contains derived metrics that are directly used to
4550// determine what actions are needed (create, delete, or backoff).
46- // For example, it calculates how many prebuilds areeligible, how many are
47- // extraneous, and how many are in various transition states.
51+ // For example, it calculates how many prebuilds areexpired, eligible,
52+ //how many are extraneous, and how many are in various transition states.
4853type ReconciliationState struct {
4954Actual int32 // Number of currently running prebuilds
55+ Expired int32 // Number of currently running prebuilds that exceeded their allowed time-to-live (TTL)
5056Desired int32 // Number of prebuilds desired as defined in the preset
5157Eligible int32 // Number of prebuilds that are ready to be claimed
5258Extraneous int32 // Number of extra running prebuilds beyond the desired count
@@ -78,7 +84,8 @@ func (ra *ReconciliationActions) IsNoop() bool {
7884}
7985
8086// CalculateState computes the current state of prebuilds for a preset, including:
81- // - Actual: Number of currently running prebuilds
87+ // - Actual: Number of currently valid running prebuilds, i.e., non-expired prebuilds
88+ // - Expired: Number of currently running expired prebuilds
8289// - Desired: Number of prebuilds desired as defined in the preset
8390// - Eligible: Number of prebuilds that are ready to be claimed
8491// - Extraneous: Number of extra running prebuilds beyond the desired count
@@ -92,13 +99,17 @@ func (p PresetSnapshot) CalculateState() *ReconciliationState {
9299var (
93100actual int32
94101desired int32
102+ expired int32
95103eligible int32
96104extraneous int32
97105)
98106
99107// #nosec G115 - Safe conversion as p.Running slice length is expected to be within int32 range
100108actual = int32 (len (p .Running ))
101109
110+ // #nosec G115 - Safe conversion as p.Expired slice length is expected to be within int32 range
111+ expired = int32 (len (p .Expired ))
112+
102113if p .isActive () {
103114desired = p .Preset .DesiredInstances .Int32
104115eligible = p .countEligible ()
@@ -109,6 +120,7 @@ func (p PresetSnapshot) CalculateState() *ReconciliationState {
109120
110121return & ReconciliationState {
111122Actual :actual ,
123+ Expired :expired ,
112124Desired :desired ,
113125Eligible :eligible ,
114126Extraneous :extraneous ,
@@ -125,15 +137,16 @@ func (p PresetSnapshot) CalculateState() *ReconciliationState {
125137// 2. If the preset is inactive (template version is not active), it will delete all running prebuilds
126138// 3. For active presets, it calculates the number of prebuilds to create or delete based on:
127139// - The desired number of instances
128- // - Currently running prebuilds
140+ // - Currently running non-expired prebuilds
141+ // - Currently running expired prebuilds
129142// - Prebuilds in transition states (starting/stopping/deleting)
130143// - Any extraneous prebuilds that need to be removed
131144//
132145// The function returns a ReconciliationActions struct that will have exactly one action type set:
133146// - ActionTypeBackoff: Only BackoffUntil is set, indicating when to retry
134147// - ActionTypeCreate: Only Create is set, indicating how many prebuilds to create
135148// - ActionTypeDelete: Only DeleteIDs is set, containing IDs of prebuilds to delete
136- func (p PresetSnapshot )CalculateActions (clock quartz.Clock ,backoffInterval time.Duration ) (* ReconciliationActions ,error ) {
149+ func (p PresetSnapshot )CalculateActions (clock quartz.Clock ,backoffInterval time.Duration ) ([] * ReconciliationActions ,error ) {
137150// TODO: align workspace states with how we represent them on the FE and the CLI
138151// right now there's some slight differences which can lead to additional prebuilds being created
139152
@@ -158,45 +171,74 @@ func (p PresetSnapshot) isActive() bool {
158171return p .Preset .UsingActiveVersion && ! p .Preset .Deleted && ! p .Preset .Deprecated
159172}
160173
161- // handleActiveTemplateVersion deletes excess prebuilds if there are too many,
162- // otherwise creates new ones to reach the desired count.
163- func (p PresetSnapshot )handleActiveTemplateVersion () (* ReconciliationActions ,error ) {
174+ // handleActiveTemplateVersion determines the reconciliation actions for a preset with an active template version.
175+ // It ensures the system moves towards the desired number of healthy prebuilds.
176+ //
177+ // The reconciliation follows this order:
178+ // 1. Delete expired prebuilds: These are no longer valid and must be removed first.
179+ // 2. Delete extraneous prebuilds: After expired ones are removed, if the number of running prebuilds (excluding expired)
180+ // still exceeds the desired count, the oldest prebuilds are deleted to reduce excess.
181+ // 3. Create missing prebuilds: If the number of non-expired, non-starting prebuilds is still below the desired count,
182+ // create the necessary number of prebuilds to reach the target.
183+ //
184+ // The function returns a list of actions to be executed to achieve the desired state.
185+ func (p PresetSnapshot )handleActiveTemplateVersion () (actions []* ReconciliationActions ,err error ) {
164186state := p .CalculateState ()
165187
166- // If we have more prebuilds than desired, delete the oldest ones
188+ // If we have expired prebuilds, delete them
189+ if state .Expired > 0 {
190+ var deleteIDs []uuid.UUID
191+ for _ ,expired := range p .Expired {
192+ deleteIDs = append (deleteIDs ,expired .ID )
193+ }
194+ actions = append (actions ,
195+ & ReconciliationActions {
196+ ActionType :ActionTypeDelete ,
197+ DeleteIDs :deleteIDs ,
198+ })
199+ }
200+
201+ // If we still have more prebuilds than desired, delete the oldest ones
167202if state .Extraneous > 0 {
168- return & ReconciliationActions {
169- ActionType :ActionTypeDelete ,
170- DeleteIDs :p .getOldestPrebuildIDs (int (state .Extraneous )),
171- },nil
203+ actions = append (actions ,
204+ & ReconciliationActions {
205+ ActionType :ActionTypeDelete ,
206+ DeleteIDs :p .getOldestPrebuildIDs (int (state .Extraneous )),
207+ })
172208}
173209
174210// Calculate how many new prebuilds we need to create
175211// We subtract starting prebuilds since they're already being created
176212prebuildsToCreate := max (state .Desired - state .Actual - state .Starting ,0 )
213+ if prebuildsToCreate > 0 {
214+ actions = append (actions ,
215+ & ReconciliationActions {
216+ ActionType :ActionTypeCreate ,
217+ Create :prebuildsToCreate ,
218+ })
219+ }
177220
178- return & ReconciliationActions {
179- ActionType :ActionTypeCreate ,
180- Create :prebuildsToCreate ,
181- },nil
221+ return actions ,nil
182222}
183223
184224// handleInactiveTemplateVersion deletes all running prebuilds except those already being deleted
185225// to avoid duplicate deletion attempts.
186- func (p PresetSnapshot )handleInactiveTemplateVersion () (* ReconciliationActions ,error ) {
226+ func (p PresetSnapshot )handleInactiveTemplateVersion () ([] * ReconciliationActions ,error ) {
187227prebuildsToDelete := len (p .Running )
188228deleteIDs := p .getOldestPrebuildIDs (prebuildsToDelete )
189229
190- return & ReconciliationActions {
191- ActionType :ActionTypeDelete ,
192- DeleteIDs :deleteIDs ,
230+ return []* ReconciliationActions {
231+ {
232+ ActionType :ActionTypeDelete ,
233+ DeleteIDs :deleteIDs ,
234+ },
193235},nil
194236}
195237
196238// needsBackoffPeriod checks if we should delay prebuild creation due to recent failures.
197239// If there were failures, it calculates a backoff period based on the number of failures
198240// and returns true if we're still within that period.
199- func (p PresetSnapshot )needsBackoffPeriod (clock quartz.Clock ,backoffInterval time.Duration ) (* ReconciliationActions ,bool ) {
241+ func (p PresetSnapshot )needsBackoffPeriod (clock quartz.Clock ,backoffInterval time.Duration ) ([] * ReconciliationActions ,bool ) {
200242if p .Backoff == nil || p .Backoff .NumFailed == 0 {
201243return nil ,false
202244}
@@ -205,9 +247,11 @@ func (p PresetSnapshot) needsBackoffPeriod(clock quartz.Clock, backoffInterval t
205247return nil ,false
206248}
207249
208- return & ReconciliationActions {
209- ActionType :ActionTypeBackoff ,
210- BackoffUntil :backoffUntil ,
250+ return []* ReconciliationActions {
251+ {
252+ ActionType :ActionTypeBackoff ,
253+ BackoffUntil :backoffUntil ,
254+ },
211255},true
212256}
213257