Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

feat: cancel stuck pending jobs#17803

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
ibetitsmike merged 24 commits intomainfrommike/16488-cancel-stuck-pending-jobs
May 20, 2025
Merged
Show file tree
Hide file tree
Changes fromall commits
Commits
Show all changes
24 commits
Select commitHold shift + click to select a range
0f51f35
added queries for fetching NotStartedProvisionerJobs
ibetitsmikeApr 1, 2025
2f3d606
added detector handling of not started jobs
ibetitsmikeMay 9, 2025
4b252eb
filling out started_at when unhanging not started jobs
ibetitsmikeMay 9, 2025
ca49519
WIP
ibetitsmikeMay 13, 2025
af994c2
refactored to reaper & added tests
ibetitsmikeMay 13, 2025
3815727
Revert "filling out started_at when unhanging not started jobs"
ibetitsmikeMay 13, 2025
b65f620
created new ORM update to avoid forcing setting StartedAt on every Co…
ibetitsmikeMay 13, 2025
3c7c323
added missing dbauthz tests
ibetitsmikeMay 13, 2025
35df01f
added checks for StartedAt value in the updated jobs
ibetitsmikeMay 13, 2025
8aa1ee2
refactor from reaper to jobreaper
ibetitsmikeMay 14, 2025
4385933
WIP
ibetitsmikeMay 14, 2025
96fee51
WIP
ibetitsmikeMay 14, 2025
d8db119
WIP
ibetitsmikeMay 15, 2025
5120fb1
WIP
ibetitsmikeMay 15, 2025
8d4fa5a
fixed sql comments
ibetitsmikeMay 15, 2025
18b809c
taking a step back with RBAC
ibetitsmikeMay 16, 2025
0fe1404
WIP
ibetitsmikeMay 16, 2025
77be34e
WIP
ibetitsmikeMay 16, 2025
4351529
WIP
ibetitsmikeMay 16, 2025
c03bfa3
fixed InOrg check for provisionerjob resource
ibetitsmikeMay 19, 2025
a15bd1c
PR review; naming in the comments, added comments for SQL, less verbo…
ibetitsmikeMay 19, 2025
5b9348f
fixes to tests after lint remove rand
ibetitsmikeMay 19, 2025
91d2d32
readded rand to fix gen failing in CI
ibetitsmikeMay 19, 2025
767cb77
adjusted TODOs
ibetitsmikeMay 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletionscli/server.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -87,6 +87,7 @@ import (
"github.com/coder/coder/v2/coderd/externalauth"
"github.com/coder/coder/v2/coderd/gitsshkey"
"github.com/coder/coder/v2/coderd/httpmw"
"github.com/coder/coder/v2/coderd/jobreaper"
"github.com/coder/coder/v2/coderd/notifications"
"github.com/coder/coder/v2/coderd/oauthpki"
"github.com/coder/coder/v2/coderd/prometheusmetrics"
Expand All@@ -95,7 +96,6 @@ import (
"github.com/coder/coder/v2/coderd/schedule"
"github.com/coder/coder/v2/coderd/telemetry"
"github.com/coder/coder/v2/coderd/tracing"
"github.com/coder/coder/v2/coderd/unhanger"
"github.com/coder/coder/v2/coderd/updatecheck"
"github.com/coder/coder/v2/coderd/util/ptr"
"github.com/coder/coder/v2/coderd/util/slice"
Expand DownExpand Up@@ -1127,11 +1127,11 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
ctx, options.Database, options.Pubsub, options.PrometheusRegistry, coderAPI.TemplateScheduleStore, &coderAPI.Auditor, coderAPI.AccessControlStore, logger, autobuildTicker.C, options.NotificationsEnqueuer)
autobuildExecutor.Run()

hangDetectorTicker := time.NewTicker(vals.JobHangDetectorInterval.Value())
deferhangDetectorTicker.Stop()
hangDetector :=unhanger.New(ctx, options.Database, options.Pubsub, logger,hangDetectorTicker.C)
hangDetector.Start()
deferhangDetector.Close()
jobReaperTicker := time.NewTicker(vals.JobReaperDetectorInterval.Value())
deferjobReaperTicker.Stop()
jobReaper :=jobreaper.New(ctx, options.Database, options.Pubsub, logger,jobReaperTicker.C)
jobReaper.Start()
deferjobReaper.Close()

waitForProvisionerJobs := false
// Currently there is no way to ask the server to shut
Expand Down
2 changes: 1 addition & 1 deletioncli/testdata/server-config.yaml.golden
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -183,7 +183,7 @@ networking:
# Interval to poll for scheduled workspace builds.
# (default: 1m0s, type: duration)
autobuildPollInterval: 1m0s
# Interval to poll for hung jobs and automatically terminate them.
# Interval to poll for hungand pendingjobs and automatically terminate them.
# (default: 1m0s, type: duration)
jobHangDetectorInterval: 1m0s
introspection:
Expand Down
12 changes: 6 additions & 6 deletionscoderd/coderdtest/coderdtest.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -68,14 +68,14 @@ import (
"github.com/coder/coder/v2/coderd/externalauth"
"github.com/coder/coder/v2/coderd/gitsshkey"
"github.com/coder/coder/v2/coderd/httpmw"
"github.com/coder/coder/v2/coderd/jobreaper"
"github.com/coder/coder/v2/coderd/notifications"
"github.com/coder/coder/v2/coderd/notifications/notificationstest"
"github.com/coder/coder/v2/coderd/rbac"
"github.com/coder/coder/v2/coderd/rbac/policy"
"github.com/coder/coder/v2/coderd/runtimeconfig"
"github.com/coder/coder/v2/coderd/schedule"
"github.com/coder/coder/v2/coderd/telemetry"
"github.com/coder/coder/v2/coderd/unhanger"
"github.com/coder/coder/v2/coderd/updatecheck"
"github.com/coder/coder/v2/coderd/util/ptr"
"github.com/coder/coder/v2/coderd/webpush"
Expand DownExpand Up@@ -365,11 +365,11 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can
).WithStatsChannel(options.AutobuildStats)
lifecycleExecutor.Run()

hangDetectorTicker := time.NewTicker(options.DeploymentValues.JobHangDetectorInterval.Value())
deferhangDetectorTicker.Stop()
hangDetector :=unhanger.New(ctx, options.Database, options.Pubsub, options.Logger.Named("unhanger.detector"),hangDetectorTicker.C)
hangDetector.Start()
t.Cleanup(hangDetector.Close)
jobReaperTicker := time.NewTicker(options.DeploymentValues.JobReaperDetectorInterval.Value())
deferjobReaperTicker.Stop()
jobReaper :=jobreaper.New(ctx, options.Database, options.Pubsub, options.Logger.Named("reaper.detector"),jobReaperTicker.C)
jobReaper.Start()
t.Cleanup(jobReaper.Close)

if options.TelemetryReporter == nil {
options.TelemetryReporter = telemetry.NewNoop()
Expand Down
139 changes: 80 additions & 59 deletionscoderd/database/dbauthz/dbauthz.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -170,10 +170,10 @@ var (
Identifier: rbac.RoleIdentifier{Name: "provisionerd"},
DisplayName: "Provisioner Daemon",
Site: rbac.Permissions(map[string][]policy.Action{
// TODO: Add ProvisionerJob resource type.
rbac.ResourceFile.Type: {policy.ActionRead},
rbac.ResourceSystem.Type: {policy.WildcardSymbol},
rbac.ResourceTemplate.Type: {policy.ActionRead, policy.ActionUpdate},
rbac.ResourceProvisionerJobs.Type: {policy.ActionRead, policy.ActionUpdate, policy.ActionCreate},
rbac.ResourceFile.Type:{policy.ActionRead},
rbac.ResourceSystem.Type:{policy.WildcardSymbol},
rbac.ResourceTemplate.Type:{policy.ActionRead, policy.ActionUpdate},
// Unsure why provisionerd needs update and read personal
rbac.ResourceUser.Type: {policy.ActionRead, policy.ActionReadPersonal, policy.ActionUpdatePersonal},
rbac.ResourceWorkspaceDormant.Type: {policy.ActionDelete, policy.ActionRead, policy.ActionUpdate, policy.ActionWorkspaceStop},
Expand DownExpand Up@@ -219,19 +219,20 @@ var (
Scope: rbac.ScopeAll,
}.WithCachedASTValue()

// Seeunhanger package.
subjectHangDetector = rbac.Subject{
Type: rbac.SubjectTypeHangDetector,
FriendlyName: "Hang Detector",
// Seereaper package.
subjectJobReaper = rbac.Subject{
Type: rbac.SubjectTypeJobReaper,
FriendlyName: "Job Reaper",
ID: uuid.Nil.String(),
Roles: rbac.Roles([]rbac.Role{
{
Identifier: rbac.RoleIdentifier{Name: "hangdetector"},
DisplayName: "Hang Detector Daemon",
Identifier: rbac.RoleIdentifier{Name: "jobreaper"},
DisplayName: "Job Reaper Daemon",
Site: rbac.Permissions(map[string][]policy.Action{
rbac.ResourceSystem.Type: {policy.WildcardSymbol},
rbac.ResourceTemplate.Type: {policy.ActionRead},
rbac.ResourceWorkspace.Type: {policy.ActionRead, policy.ActionUpdate},
rbac.ResourceSystem.Type: {policy.WildcardSymbol},
rbac.ResourceTemplate.Type: {policy.ActionRead},
rbac.ResourceWorkspace.Type: {policy.ActionRead, policy.ActionUpdate},
rbac.ResourceProvisionerJobs.Type: {policy.ActionRead, policy.ActionUpdate},
}),
Org: map[string][]rbac.Permission{},
User: []rbac.Permission{},
Expand DownExpand Up@@ -346,6 +347,7 @@ var (
rbac.ResourceNotificationTemplate.Type: {policy.ActionCreate, policy.ActionUpdate, policy.ActionDelete},
rbac.ResourceCryptoKey.Type: {policy.ActionCreate, policy.ActionUpdate, policy.ActionDelete},
rbac.ResourceFile.Type: {policy.ActionCreate, policy.ActionRead},
rbac.ResourceProvisionerJobs.Type: {policy.ActionRead, policy.ActionUpdate, policy.ActionCreate},
}),
Org: map[string][]rbac.Permission{},
User: []rbac.Permission{},
Expand DownExpand Up@@ -407,10 +409,10 @@ func AsAutostart(ctx context.Context) context.Context {
return As(ctx, subjectAutostart)
}

//AsHangDetector returns a context with an actor that has permissions required
// forunhanger.Detector to function.
funcAsHangDetector(ctx context.Context) context.Context {
return As(ctx,subjectHangDetector)
//AsJobReaper returns a context with an actor that has permissions required
// forreaper.Detector to function.
funcAsJobReaper(ctx context.Context) context.Context {
return As(ctx,subjectJobReaper)
}

// AsKeyRotator returns a context with an actor that has permissions required for rotating crypto keys.
Expand DownExpand Up@@ -1085,11 +1087,10 @@ func (q *querier) AcquireNotificationMessages(ctx context.Context, arg database.
return q.db.AcquireNotificationMessages(ctx, arg)
}

// TODO: We need to create a ProvisionerJob resource type
func (q *querier) AcquireProvisionerJob(ctx context.Context, arg database.AcquireProvisionerJobParams) (database.ProvisionerJob, error) {
//if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceSystem); err != nil {
//return database.ProvisionerJob{}, err
//}
if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceProvisionerJobs); err != nil {
return database.ProvisionerJob{}, err
}
return q.db.AcquireProvisionerJob(ctx, arg)
}

Expand DownExpand Up@@ -1912,14 +1913,6 @@ func (q *querier) GetHealthSettings(ctx context.Context) (string, error) {
return q.db.GetHealthSettings(ctx)
}

// TODO: We need to create a ProvisionerJob resource type
func (q *querier) GetHungProvisionerJobs(ctx context.Context, hungSince time.Time) ([]database.ProvisionerJob, error) {
// if err := q.authorizeContext(ctx, policy.ActionCreate, rbac.ResourceSystem); err != nil {
// return nil, err
// }
return q.db.GetHungProvisionerJobs(ctx, hungSince)
}

func (q *querier) GetInboxNotificationByID(ctx context.Context, id uuid.UUID) (database.InboxNotification, error) {
return fetchWithAction(q.log, q.auth, policy.ActionRead, q.db.GetInboxNotificationByID)(ctx, id)
}
Expand DownExpand Up@@ -2307,6 +2300,13 @@ func (q *querier) GetProvisionerJobByID(ctx context.Context, id uuid.UUID) (data
return job, nil
}

func (q *querier) GetProvisionerJobByIDForUpdate(ctx context.Context, id uuid.UUID) (database.ProvisionerJob, error) {
if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceProvisionerJobs); err != nil {
return database.ProvisionerJob{}, err
}
return q.db.GetProvisionerJobByIDForUpdate(ctx, id)
}

func (q *querier) GetProvisionerJobTimingsByJobID(ctx context.Context, jobID uuid.UUID) ([]database.ProvisionerJobTiming, error) {
_, err := q.GetProvisionerJobByID(ctx, jobID)
if err != nil {
Expand All@@ -2315,31 +2315,49 @@ func (q *querier) GetProvisionerJobTimingsByJobID(ctx context.Context, jobID uui
return q.db.GetProvisionerJobTimingsByJobID(ctx, jobID)
}

// TODO: We have a ProvisionerJobs resource, but it hasn't been checked for this use-case.
func (q *querier) GetProvisionerJobsByIDs(ctx context.Context, ids []uuid.UUID) ([]database.ProvisionerJob, error) {
// if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceSystem); err != nil {
// return nil, err
// }
return q.db.GetProvisionerJobsByIDs(ctx, ids)
provisionerJobs, err := q.db.GetProvisionerJobsByIDs(ctx, ids)
if err != nil {
return nil, err
}
orgIDs := make(map[uuid.UUID]struct{})
for _, job := range provisionerJobs {
orgIDs[job.OrganizationID] = struct{}{}
}
for orgID := range orgIDs {
if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceProvisionerJobs.InOrg(orgID)); err != nil {
return nil, err
}
}
return provisionerJobs, nil
}

// TODO: We have a ProvisionerJobs resource, but it hasn't been checked for this use-case.
func (q *querier) GetProvisionerJobsByIDsWithQueuePosition(ctx context.Context, ids []uuid.UUID) ([]database.GetProvisionerJobsByIDsWithQueuePositionRow, error) {
// TODO: Remove this once we have a proper rbac check for provisioner jobs.
// Details in https://github.com/coder/coder/issues/16160
return q.db.GetProvisionerJobsByIDsWithQueuePosition(ctx, ids)
}

func (q *querier) GetProvisionerJobsByOrganizationAndStatusWithQueuePositionAndProvisioner(ctx context.Context, arg database.GetProvisionerJobsByOrganizationAndStatusWithQueuePositionAndProvisionerParams) ([]database.GetProvisionerJobsByOrganizationAndStatusWithQueuePositionAndProvisionerRow, error) {
// TODO: Remove this once we have a proper rbac check for provisioner jobs.
// Details in https://github.com/coder/coder/issues/16160
return fetchWithPostFilter(q.auth, policy.ActionRead, q.db.GetProvisionerJobsByOrganizationAndStatusWithQueuePositionAndProvisioner)(ctx, arg)
}

// TODO: We have a ProvisionerJobs resource, but it hasn't been checked for this use-case.
func (q *querier) GetProvisionerJobsCreatedAfter(ctx context.Context, createdAt time.Time) ([]database.ProvisionerJob, error) {
//if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceSystem); err != nil {
//return nil, err
//}
if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceProvisionerJobs); err != nil {
return nil, err
}
return q.db.GetProvisionerJobsCreatedAfter(ctx, createdAt)
}

func (q *querier) GetProvisionerJobsToBeReaped(ctx context.Context, arg database.GetProvisionerJobsToBeReapedParams) ([]database.ProvisionerJob, error) {
if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceProvisionerJobs); err != nil {
return nil, err
}
return q.db.GetProvisionerJobsToBeReaped(ctx, arg)
}

func (q *querier) GetProvisionerKeyByHashedSecret(ctx context.Context, hashedSecret []byte) (database.ProvisionerKey, error) {
return fetch(q.log, q.auth, q.db.GetProvisionerKeyByHashedSecret)(ctx, hashedSecret)
}
Expand DownExpand Up@@ -3524,27 +3542,22 @@ func (q *querier) InsertPresetParameters(ctx context.Context, arg database.Inser
return q.db.InsertPresetParameters(ctx, arg)
}

// TODO: We need to create a ProvisionerJob resource type
func (q *querier) InsertProvisionerJob(ctx context.Context, arg database.InsertProvisionerJobParams) (database.ProvisionerJob, error) {
// if err := q.authorizeContext(ctx, policy.ActionCreate, rbac.ResourceSystem); err != nil {
// return database.ProvisionerJob{}, err
// }
// TODO: Remove this once we have a proper rbac check for provisioner jobs.
// Details in https://github.com/coder/coder/issues/16160
return q.db.InsertProvisionerJob(ctx, arg)
}

// TODO: We need to create a ProvisionerJob resource type
func (q *querier) InsertProvisionerJobLogs(ctx context.Context, arg database.InsertProvisionerJobLogsParams) ([]database.ProvisionerJobLog, error) {
// if err := q.authorizeContext(ctx, policy.ActionCreate, rbac.ResourceSystem); err != nil {
// return nil, err
// }
// TODO: Remove this once we have a proper rbac check for provisioner jobs.
// Details in https://github.com/coder/coder/issues/16160
return q.db.InsertProvisionerJobLogs(ctx, arg)
}

// TODO: We need to create a ProvisionerJob resource type
func (q *querier) InsertProvisionerJobTimings(ctx context.Context, arg database.InsertProvisionerJobTimingsParams) ([]database.ProvisionerJobTiming, error) {
//if err := q.authorizeContext(ctx, policy.ActionCreate, rbac.ResourceSystem); err != nil {
//return nil, err
//}
if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceProvisionerJobs); err != nil {
return nil, err
}
return q.db.InsertProvisionerJobTimings(ctx, arg)
}

Expand DownExpand Up@@ -4167,15 +4180,17 @@ func (q *querier) UpdateProvisionerDaemonLastSeenAt(ctx context.Context, arg dat
return q.db.UpdateProvisionerDaemonLastSeenAt(ctx, arg)
}

// TODO: We need to create a ProvisionerJob resource type
func (q *querier) UpdateProvisionerJobByID(ctx context.Context, arg database.UpdateProvisionerJobByIDParams) error {
//if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceSystem); err != nil {
//return err
//}
if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceProvisionerJobs); err != nil {
return err
}
return q.db.UpdateProvisionerJobByID(ctx, arg)
}

func (q *querier) UpdateProvisionerJobWithCancelByID(ctx context.Context, arg database.UpdateProvisionerJobWithCancelByIDParams) error {
// TODO: Remove this once we have a proper rbac check for provisioner jobs.
// Details in https://github.com/coder/coder/issues/16160

job, err := q.db.GetProvisionerJobByID(ctx, arg.ID)
if err != nil {
return err
Expand DownExpand Up@@ -4242,14 +4257,20 @@ func (q *querier) UpdateProvisionerJobWithCancelByID(ctx context.Context, arg da
return q.db.UpdateProvisionerJobWithCancelByID(ctx, arg)
}

// TODO: We need to create a ProvisionerJob resource type
func (q *querier) UpdateProvisionerJobWithCompleteByID(ctx context.Context, arg database.UpdateProvisionerJobWithCompleteByIDParams) error {
//if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceSystem); err != nil {
//return err
//}
if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceProvisionerJobs); err != nil {
return err
}
return q.db.UpdateProvisionerJobWithCompleteByID(ctx, arg)
}

func (q *querier) UpdateProvisionerJobWithCompleteWithStartedAtByID(ctx context.Context, arg database.UpdateProvisionerJobWithCompleteWithStartedAtByIDParams) error {
if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceProvisionerJobs); err != nil {
return err
}
return q.db.UpdateProvisionerJobWithCompleteWithStartedAtByID(ctx, arg)
}

func (q *querier) UpdateReplica(ctx context.Context, arg database.UpdateReplicaParams) (database.Replica, error) {
if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceSystem); err != nil {
return database.Replica{}, err
Expand Down
Loading
Loading

[8]ページ先頭

©2009-2025 Movatter.jp