|
| 1 | +package autostart |
| 2 | + |
| 3 | +import ( |
| 4 | +"context" |
| 5 | +"fmt" |
| 6 | +"io" |
| 7 | +"time" |
| 8 | + |
| 9 | +"golang.org/x/xerrors" |
| 10 | + |
| 11 | +"cdr.dev/slog" |
| 12 | +"cdr.dev/slog/sloggers/sloghuman" |
| 13 | +"github.com/coder/coder/v2/coderd/tracing" |
| 14 | +"github.com/coder/coder/v2/codersdk" |
| 15 | +"github.com/coder/coder/v2/scaletest/createusers" |
| 16 | +"github.com/coder/coder/v2/scaletest/harness" |
| 17 | +"github.com/coder/coder/v2/scaletest/loadtestutil" |
| 18 | +"github.com/coder/coder/v2/scaletest/workspacebuild" |
| 19 | +) |
| 20 | + |
| 21 | +typeRunnerstruct { |
| 22 | +client*codersdk.Client |
| 23 | +cfgConfig |
| 24 | + |
| 25 | +createUserRunner*createusers.Runner |
| 26 | +workspacebuildRunner*workspacebuild.Runner |
| 27 | + |
| 28 | +autostartTotalLatency time.Duration |
| 29 | +autostartJobCreationLatency time.Duration |
| 30 | +autostartJobAcquiredLatency time.Duration |
| 31 | +} |
| 32 | + |
| 33 | +funcNewRunner(client*codersdk.Client,cfgConfig)*Runner { |
| 34 | +return&Runner{ |
| 35 | +client:client, |
| 36 | +cfg:cfg, |
| 37 | +} |
| 38 | +} |
| 39 | + |
| 40 | +var ( |
| 41 | +_ harness.Runnable=&Runner{} |
| 42 | +_ harness.Cleanable=&Runner{} |
| 43 | +_ harness.Collectable=&Runner{} |
| 44 | +) |
| 45 | + |
| 46 | +func (r*Runner)Run(ctx context.Context,idstring,logs io.Writer)error { |
| 47 | +ctx,span:=tracing.StartSpan(ctx) |
| 48 | +deferspan.End() |
| 49 | + |
| 50 | +reachedBarrier:=false |
| 51 | +deferfunc() { |
| 52 | +if!reachedBarrier { |
| 53 | +r.cfg.SetupBarrier.Done() |
| 54 | +} |
| 55 | +}() |
| 56 | + |
| 57 | +logs=loadtestutil.NewSyncWriter(logs) |
| 58 | +logger:=slog.Make(sloghuman.Sink(logs)).Leveled(slog.LevelDebug) |
| 59 | +r.client.SetLogger(logger) |
| 60 | +r.client.SetLogBodies(true) |
| 61 | + |
| 62 | +r.createUserRunner=createusers.NewRunner(r.client,r.cfg.User) |
| 63 | +newUserAndToken,err:=r.createUserRunner.RunReturningUser(ctx,id,logs) |
| 64 | +iferr!=nil { |
| 65 | +r.cfg.Metrics.AddError("","create_user") |
| 66 | +returnxerrors.Errorf("create user: %w",err) |
| 67 | +} |
| 68 | +newUser:=newUserAndToken.User |
| 69 | + |
| 70 | +newUserClient:=codersdk.New(r.client.URL, |
| 71 | +codersdk.WithSessionToken(newUserAndToken.SessionToken), |
| 72 | +codersdk.WithLogger(logger), |
| 73 | +codersdk.WithLogBodies()) |
| 74 | + |
| 75 | +//nolint:gocritic // short log is fine |
| 76 | +logger.Info(ctx,"user created",slog.F("username",newUser.Username),slog.F("user_id",newUser.ID.String())) |
| 77 | + |
| 78 | +workspaceBuildConfig:=r.cfg.Workspace |
| 79 | +workspaceBuildConfig.OrganizationID=r.cfg.User.OrganizationID |
| 80 | +workspaceBuildConfig.UserID=newUser.ID.String() |
| 81 | +// We'll wait for the build ourselves to avoid multiple API requests |
| 82 | +workspaceBuildConfig.NoWaitForBuild=true |
| 83 | + |
| 84 | +r.workspacebuildRunner=workspacebuild.NewRunner(newUserClient,workspaceBuildConfig) |
| 85 | +workspace,err:=r.workspacebuildRunner.RunReturningWorkspace(ctx,id,logs) |
| 86 | +iferr!=nil { |
| 87 | +r.cfg.Metrics.AddError(newUser.Username,"create_workspace") |
| 88 | +returnxerrors.Errorf("create workspace: %w",err) |
| 89 | +} |
| 90 | + |
| 91 | +watchCtx,cancel:=context.WithCancel(ctx) |
| 92 | +defercancel() |
| 93 | +workspaceUpdates,err:=newUserClient.WatchWorkspace(watchCtx,workspace.ID) |
| 94 | +iferr!=nil { |
| 95 | +r.cfg.Metrics.AddError(newUser.Username,"watch_workspace") |
| 96 | +returnxerrors.Errorf("watch workspace: %w",err) |
| 97 | +} |
| 98 | + |
| 99 | +createWorkspaceCtx,cancel2:=context.WithTimeout(ctx,r.cfg.WorkspaceJobTimeout) |
| 100 | +defercancel2() |
| 101 | + |
| 102 | +err=waitForWorkspaceUpdate(createWorkspaceCtx,logger,workspaceUpdates,func(ws codersdk.Workspace)bool { |
| 103 | +returnws.LatestBuild.Transition==codersdk.WorkspaceTransitionStart&& |
| 104 | +ws.LatestBuild.Job.Status==codersdk.ProvisionerJobSucceeded |
| 105 | +}) |
| 106 | +iferr!=nil { |
| 107 | +r.cfg.Metrics.AddError(newUser.Username,"wait_for_initial_build") |
| 108 | +returnxerrors.Errorf("timeout waiting for initial workspace build to complete: %w",err) |
| 109 | +} |
| 110 | + |
| 111 | +logger.Info(ctx,"stopping workspace",slog.F("workspace_name",workspace.Name)) |
| 112 | + |
| 113 | +_,err=newUserClient.CreateWorkspaceBuild(ctx,workspace.ID, codersdk.CreateWorkspaceBuildRequest{ |
| 114 | +Transition:codersdk.WorkspaceTransitionStop, |
| 115 | +}) |
| 116 | +iferr!=nil { |
| 117 | +r.cfg.Metrics.AddError(newUser.Username,"create_stop_build") |
| 118 | +returnxerrors.Errorf("create stop build: %w",err) |
| 119 | +} |
| 120 | + |
| 121 | +stopBuildCtx,cancel3:=context.WithTimeout(ctx,r.cfg.WorkspaceJobTimeout) |
| 122 | +defercancel3() |
| 123 | + |
| 124 | +err=waitForWorkspaceUpdate(stopBuildCtx,logger,workspaceUpdates,func(ws codersdk.Workspace)bool { |
| 125 | +returnws.LatestBuild.Transition==codersdk.WorkspaceTransitionStop&& |
| 126 | +ws.LatestBuild.Job.Status==codersdk.ProvisionerJobSucceeded |
| 127 | +}) |
| 128 | +iferr!=nil { |
| 129 | +r.cfg.Metrics.AddError(newUser.Username,"wait_for_stop_build") |
| 130 | +returnxerrors.Errorf("timeout waiting for stop build to complete: %w",err) |
| 131 | +} |
| 132 | + |
| 133 | +logger.Info(ctx,"workspace stopped successfully",slog.F("workspace_name",workspace.Name)) |
| 134 | + |
| 135 | +logger.Info(ctx,"waiting for all runners to reach barrier") |
| 136 | +reachedBarrier=true |
| 137 | +r.cfg.SetupBarrier.Done() |
| 138 | +r.cfg.SetupBarrier.Wait() |
| 139 | +logger.Info(ctx,"all runners reached barrier, proceeding with autostart schedule") |
| 140 | + |
| 141 | +testStartTime:=time.Now().UTC() |
| 142 | +autostartTime:=testStartTime.Add(r.cfg.AutostartDelay).Round(time.Minute) |
| 143 | +schedule:=fmt.Sprintf("CRON_TZ=UTC %d %d * * *",autostartTime.Minute(),autostartTime.Hour()) |
| 144 | + |
| 145 | +logger.Info(ctx,"setting autostart schedule for workspace",slog.F("workspace_name",workspace.Name),slog.F("schedule",schedule)) |
| 146 | + |
| 147 | +err=newUserClient.UpdateWorkspaceAutostart(ctx,workspace.ID, codersdk.UpdateWorkspaceAutostartRequest{ |
| 148 | +Schedule:&schedule, |
| 149 | +}) |
| 150 | +iferr!=nil { |
| 151 | +r.cfg.Metrics.AddError(newUser.Username,"update_workspace_autostart") |
| 152 | +returnxerrors.Errorf("update workspace autostart: %w",err) |
| 153 | +} |
| 154 | + |
| 155 | +logger.Info(ctx,"waiting for workspace to autostart",slog.F("workspace_name",workspace.Name)) |
| 156 | + |
| 157 | +autostartInitiateCtx,cancel4:=context.WithDeadline(ctx,autostartTime.Add(r.cfg.AutostartDelay)) |
| 158 | +defercancel4() |
| 159 | + |
| 160 | +logger.Info(ctx,"listening for workspace updates to detect autostart build") |
| 161 | + |
| 162 | +err=waitForWorkspaceUpdate(autostartInitiateCtx,logger,workspaceUpdates,func(ws codersdk.Workspace)bool { |
| 163 | +ifws.LatestBuild.Transition!=codersdk.WorkspaceTransitionStart { |
| 164 | +returnfalse |
| 165 | +} |
| 166 | + |
| 167 | +// The job has been created, but it might be pending |
| 168 | +ifr.autostartJobCreationLatency==0 { |
| 169 | +r.autostartJobCreationLatency=time.Since(autostartTime) |
| 170 | +r.cfg.Metrics.RecordJobCreation(r.autostartJobCreationLatency,newUser.Username,workspace.Name) |
| 171 | +} |
| 172 | + |
| 173 | +ifws.LatestBuild.Job.Status==codersdk.ProvisionerJobRunning|| |
| 174 | +ws.LatestBuild.Job.Status==codersdk.ProvisionerJobSucceeded { |
| 175 | +// Job is no longer pending, but it might not have finished |
| 176 | +ifr.autostartJobAcquiredLatency==0 { |
| 177 | +r.autostartJobAcquiredLatency=time.Since(autostartTime) |
| 178 | +r.cfg.Metrics.RecordJobAcquired(r.autostartJobAcquiredLatency,newUser.Username,workspace.Name) |
| 179 | +} |
| 180 | +returnws.LatestBuild.Job.Status==codersdk.ProvisionerJobSucceeded |
| 181 | +} |
| 182 | + |
| 183 | +returnfalse |
| 184 | +}) |
| 185 | +iferr!=nil { |
| 186 | +r.cfg.Metrics.AddError(newUser.Username,"wait_for_autostart_build") |
| 187 | +returnxerrors.Errorf("timeout waiting for autostart build to be created: %w",err) |
| 188 | +} |
| 189 | + |
| 190 | +r.autostartTotalLatency=time.Since(autostartTime) |
| 191 | + |
| 192 | +logger.Info(ctx,"autostart workspace build complete",slog.F("duration",r.autostartTotalLatency)) |
| 193 | +r.cfg.Metrics.RecordCompletion(r.autostartTotalLatency,newUser.Username,workspace.Name) |
| 194 | + |
| 195 | +returnnil |
| 196 | +} |
| 197 | + |
| 198 | +funcwaitForWorkspaceUpdate(ctx context.Context,logger slog.Logger,updates<-chan codersdk.Workspace,shouldBreakfunc(codersdk.Workspace)bool)error { |
| 199 | +for { |
| 200 | +select { |
| 201 | +case<-ctx.Done(): |
| 202 | +returnctx.Err() |
| 203 | +caseupdatedWorkspace,ok:=<-updates: |
| 204 | +if!ok { |
| 205 | +returnxerrors.New("workspace updates channel closed") |
| 206 | +} |
| 207 | +logger.Debug(ctx,"received workspace update",slog.F("update",updatedWorkspace)) |
| 208 | +ifshouldBreak(updatedWorkspace) { |
| 209 | +returnnil |
| 210 | +} |
| 211 | +} |
| 212 | +} |
| 213 | +} |
| 214 | + |
| 215 | +func (r*Runner)Cleanup(ctx context.Context,idstring,logs io.Writer)error { |
| 216 | +ifr.workspacebuildRunner!=nil { |
| 217 | +_,_=fmt.Fprintln(logs,"Cleaning up workspace...") |
| 218 | +iferr:=r.workspacebuildRunner.Cleanup(ctx,id,logs);err!=nil { |
| 219 | +returnxerrors.Errorf("cleanup workspace: %w",err) |
| 220 | +} |
| 221 | +} |
| 222 | + |
| 223 | +ifr.createUserRunner!=nil { |
| 224 | +_,_=fmt.Fprintln(logs,"Cleaning up user...") |
| 225 | +iferr:=r.createUserRunner.Cleanup(ctx,id,logs);err!=nil { |
| 226 | +returnxerrors.Errorf("cleanup user: %w",err) |
| 227 | +} |
| 228 | +} |
| 229 | + |
| 230 | +returnnil |
| 231 | +} |
| 232 | + |
| 233 | +const ( |
| 234 | +AutostartTotalLatencyMetric="autostart_total_latency_seconds" |
| 235 | +AutostartJobCreationLatencyMetric="autostart_job_creation_latency_seconds" |
| 236 | +AutostartJobAcquiredLatencyMetric="autostart_job_acquired_latency_seconds" |
| 237 | +) |
| 238 | + |
| 239 | +func (r*Runner)GetMetrics()map[string]any { |
| 240 | +returnmap[string]any{ |
| 241 | +AutostartTotalLatencyMetric:r.autostartTotalLatency.Seconds(), |
| 242 | +AutostartJobCreationLatencyMetric:r.autostartJobCreationLatency.Seconds(), |
| 243 | +AutostartJobAcquiredLatencyMetric:r.autostartJobAcquiredLatency.Seconds(), |
| 244 | +} |
| 245 | +} |