|
| 1 | +package prebuilds |
| 2 | + |
| 3 | +import ( |
| 4 | +"math" |
| 5 | +"slices" |
| 6 | +"time" |
| 7 | + |
| 8 | +"github.com/coder/quartz" |
| 9 | + |
| 10 | +"github.com/coder/coder/v2/coderd/database" |
| 11 | +) |
| 12 | + |
| 13 | +func (pPresetState)CalculateActions(clock quartz.Clock,backoffInterval time.Duration) (*ReconciliationActions,error) { |
| 14 | +// TODO: align workspace states with how we represent them on the FE and the CLI |
| 15 | +// right now there's some slight differences which can lead to additional prebuilds being created |
| 16 | + |
| 17 | +// TODO: add mechanism to prevent prebuilds being reconciled from being claimable by users; i.e. if a prebuild is |
| 18 | +// about to be deleted, it should not be deleted if it has been claimed - beware of TOCTOU races! |
| 19 | + |
| 20 | +var ( |
| 21 | +actualint32// Running prebuilds for active version. |
| 22 | +desiredint32// Active template version's desired instances as defined in preset. |
| 23 | +eligibleint32// Prebuilds which can be claimed. |
| 24 | +outdatedint32// Prebuilds which no longer match the active template version. |
| 25 | +extraneousint32// Extra running prebuilds for active version (somehow). |
| 26 | +starting,stopping,deletingint32// Prebuilds currently being provisioned up or down. |
| 27 | +) |
| 28 | + |
| 29 | +ifp.Preset.UsingActiveVersion { |
| 30 | +actual=int32(len(p.Running)) |
| 31 | +desired=p.Preset.DesiredInstances.Int32 |
| 32 | +} |
| 33 | + |
| 34 | +for_,prebuild:=rangep.Running { |
| 35 | +ifp.Preset.UsingActiveVersion { |
| 36 | +ifprebuild.Ready { |
| 37 | +eligible++ |
| 38 | +} |
| 39 | + |
| 40 | +extraneous=int32(math.Max(float64(actual-p.Preset.DesiredInstances.Int32),0)) |
| 41 | +} |
| 42 | + |
| 43 | +ifprebuild.TemplateVersionID==p.Preset.TemplateVersionID&&!p.Preset.UsingActiveVersion { |
| 44 | +outdated++ |
| 45 | +} |
| 46 | +} |
| 47 | + |
| 48 | +// In-progress builds are common across all presets belonging to a given template. |
| 49 | +// In other words: these values will be identical across all presets belonging to this template. |
| 50 | +for_,progress:=rangep.InProgress { |
| 51 | +num:=progress.Count |
| 52 | +switchprogress.Transition { |
| 53 | +casedatabase.WorkspaceTransitionStart: |
| 54 | +starting+=num |
| 55 | +casedatabase.WorkspaceTransitionStop: |
| 56 | +stopping+=num |
| 57 | +casedatabase.WorkspaceTransitionDelete: |
| 58 | +deleting+=num |
| 59 | +} |
| 60 | +} |
| 61 | + |
| 62 | +var ( |
| 63 | +toCreate=int(math.Max(0,float64( |
| 64 | +desired-(actual+starting)),// The number of prebuilds currently being stopped (should be 0) |
| 65 | +)) |
| 66 | +toDelete=int(math.Max(0,float64( |
| 67 | +outdated-// The number of prebuilds running above the desired count for active version |
| 68 | +deleting),// The number of prebuilds currently being deleted |
| 69 | +)) |
| 70 | + |
| 71 | +actions=&ReconciliationActions{ |
| 72 | +Actual:actual, |
| 73 | +Desired:desired, |
| 74 | +Eligible:eligible, |
| 75 | +Outdated:outdated, |
| 76 | +Extraneous:extraneous, |
| 77 | +Starting:starting, |
| 78 | +Stopping:stopping, |
| 79 | +Deleting:deleting, |
| 80 | +} |
| 81 | +) |
| 82 | + |
| 83 | +// If the template has become deleted or deprecated since the last reconciliation, we need to ensure we |
| 84 | +// scale those prebuilds down to zero. |
| 85 | +ifp.Preset.Deleted||p.Preset.Deprecated { |
| 86 | +toCreate=0 |
| 87 | +toDelete=int(actual+outdated) |
| 88 | +actions.Desired=0 |
| 89 | +} |
| 90 | + |
| 91 | +// We backoff when the last build failed, to give the operator some time to investigate the issue and to not provision |
| 92 | +// a tonne of prebuilds (_n_ on each reconciliation iteration). |
| 93 | +ifp.Backoff!=nil&&p.Backoff.NumFailed>0 { |
| 94 | +actions.Failed=p.Backoff.NumFailed |
| 95 | + |
| 96 | +backoffUntil:=p.Backoff.LastBuildAt.Add(time.Duration(p.Backoff.NumFailed)*backoffInterval) |
| 97 | + |
| 98 | +ifclock.Now().Before(backoffUntil) { |
| 99 | +actions.Create=0 |
| 100 | +actions.DeleteIDs=nil |
| 101 | +actions.BackoffUntil=backoffUntil |
| 102 | + |
| 103 | +// Return early here; we should not perform any reconciliation actions if we're in a backoff period. |
| 104 | +returnactions,nil |
| 105 | +} |
| 106 | +} |
| 107 | + |
| 108 | +// It's possible that an operator could stop/start prebuilds which interfere with the reconciliation loop, so |
| 109 | +// we check if there are somehow more prebuilds than we expect, and then pick random victims to be deleted. |
| 110 | +ifextraneous>0 { |
| 111 | +// Sort running IDs by creation time so we always delete the oldest prebuilds. |
| 112 | +// In general, we want fresher prebuilds (imagine a mono-repo is cloned; newer is better). |
| 113 | +slices.SortFunc(p.Running,func(a,b database.GetRunningPrebuiltWorkspacesRow)int { |
| 114 | +ifa.CreatedAt.Before(b.CreatedAt) { |
| 115 | +return-1 |
| 116 | +} |
| 117 | +ifa.CreatedAt.After(b.CreatedAt) { |
| 118 | +return1 |
| 119 | +} |
| 120 | + |
| 121 | +return0 |
| 122 | +}) |
| 123 | + |
| 124 | +fori:=0;i<int(extraneous);i++ { |
| 125 | +ifi>=len(p.Running) { |
| 126 | +// This should never happen. |
| 127 | +// TODO: move up |
| 128 | +// c.logger.Warn(ctx, "unexpected reconciliation state; extraneous count exceeds running prebuilds count!", |
| 129 | +//slog.F("running_count", len(p.Running)), |
| 130 | +//slog.F("extraneous", extraneous)) |
| 131 | +continue |
| 132 | +} |
| 133 | + |
| 134 | +actions.DeleteIDs=append(actions.DeleteIDs,p.Running[i].ID) |
| 135 | +} |
| 136 | + |
| 137 | +// TODO: move up |
| 138 | +// c.logger.Warn(ctx, "found extra prebuilds running, picking random victim(s)", |
| 139 | +//slog.F("template_id", p.Preset.TemplateID.String()), slog.F("desired", desired), slog.F("actual", actual), slog.F("extra", extraneous), |
| 140 | +//slog.F("victims", victims)) |
| 141 | + |
| 142 | +// Prevent the rest of the reconciliation from completing |
| 143 | +returnactions,nil |
| 144 | +} |
| 145 | + |
| 146 | +actions.Create=int32(toCreate) |
| 147 | + |
| 148 | +// if toDelete > 0 && len(p.Running) != toDelete { |
| 149 | +// TODO: move up |
| 150 | +// c.logger.Warn(ctx, "mismatch between running prebuilds and expected deletion count!", |
| 151 | +//slog.F("template_id", s.preset.TemplateID.String()), slog.F("running", len(p.Running)), slog.F("to_delete", toDelete)) |
| 152 | +// } |
| 153 | + |
| 154 | +// TODO: implement lookup to not perform same action on workspace multiple times in $period |
| 155 | +// i.e. a workspace cannot be deleted for some reason, which continually makes it eligible for deletion |
| 156 | +fori:=0;i<toDelete;i++ { |
| 157 | +ifi>=len(p.Running) { |
| 158 | +// TODO: move up |
| 159 | +// Above warning will have already addressed this. |
| 160 | +continue |
| 161 | +} |
| 162 | + |
| 163 | +actions.DeleteIDs=append(actions.DeleteIDs,p.Running[i].ID) |
| 164 | +} |
| 165 | + |
| 166 | +returnactions,nil |
| 167 | +} |