@@ -179,6 +179,14 @@ func (d *Detector) run(t time.Time) Stats {
179
179
stats .Error = xerrors .Errorf ("get hung provisioner jobs: %w" ,err )
180
180
return stats
181
181
}
182
+ // Find all provisioner jobs that are currently running but have not
183
+ // received an update in the last 5 minutes.
184
+ if err != nil {
185
+ stats .Error = xerrors .Errorf ("get not started provisioner jobs: %w" ,err )
186
+ return stats
187
+ }
188
+ jobsUnstarted ,err := d .db .GetNotStartedProvisionerJobs (ctx ,t .Add (- HungJobDuration ))
189
+ jobs = append (jobs ,jobsUnstarted ... )
182
190
183
191
// Limit the number of jobs we'll unhang in a single run to avoid
184
192
// timing out.
@@ -229,14 +237,6 @@ func unhangJob(ctx context.Context, log slog.Logger, db database.Store, pub pubs
229
237
return xerrors .Errorf ("get provisioner job: %w" ,err )
230
238
}
231
239
232
- // Check if we should still unhang it.
233
- if ! job .StartedAt .Valid {
234
- // This shouldn't be possible to hit because the query only selects
235
- // started and not completed jobs, and a job can't be "un-started".
236
- return jobIneligibleError {
237
- Err :xerrors .New ("job is not started" ),
238
- }
239
- }
240
240
if job .CompletedAt .Valid {
241
241
return jobIneligibleError {
242
242
Err :xerrors .Errorf ("job is completed (status %s)" ,job .JobStatus ),