Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitb7bdb17

Browse files
authored
feat: add metrics to workspace agent scripts (#11132)
* push startup script metrics to agent
1 parent41ed581 commitb7bdb17

File tree

20 files changed

+306
-127
lines changed

20 files changed

+306
-127
lines changed

‎agent/agent.go‎

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ import (
3535
"tailscale.com/types/netlogtype"
3636

3737
"cdr.dev/slog"
38+
"github.com/coder/retry"
39+
3840
"github.com/coder/coder/v2/agent/agentproc"
3941
"github.com/coder/coder/v2/agent/agentscripts"
4042
"github.com/coder/coder/v2/agent/agentssh"
@@ -45,7 +47,6 @@ import (
4547
"github.com/coder/coder/v2/codersdk"
4648
"github.com/coder/coder/v2/codersdk/agentsdk"
4749
"github.com/coder/coder/v2/tailnet"
48-
"github.com/coder/retry"
4950
)
5051

5152
const (
@@ -222,8 +223,10 @@ type agent struct {
222223
connCountReconnectingPTY atomic.Int64
223224

224225
prometheusRegistry*prometheus.Registry
225-
metrics*agentMetrics
226-
syscaller agentproc.Syscaller
226+
// metrics are prometheus registered metrics that will be collected and
227+
// labeled in Coder with the agent + workspace.
228+
metrics*agentMetrics
229+
syscaller agentproc.Syscaller
227230

228231
// modifiedProcs is used for testing process priority management.
229232
modifiedProcschan []*agentproc.Process
@@ -252,6 +255,9 @@ func (a *agent) init(ctx context.Context) {
252255
Filesystem:a.filesystem,
253256
PatchLogs:a.client.PatchLogs,
254257
})
258+
// Register runner metrics. If the prom registry is nil, the metrics
259+
// will not report anywhere.
260+
a.scriptRunner.RegisterMetrics(a.prometheusRegistry)
255261
goa.runLoop(ctx)
256262
}
257263

@@ -745,9 +751,12 @@ func (a *agent) run(ctx context.Context) error {
745751
returnxerrors.Errorf("init script runner: %w",err)
746752
}
747753
err=a.trackConnGoroutine(func() {
754+
start:=time.Now()
748755
err:=a.scriptRunner.Execute(ctx,func(script codersdk.WorkspaceAgentScript)bool {
749756
returnscript.RunOnStart
750757
})
758+
// Measure the time immediately after the script has finished
759+
dur:=time.Since(start).Seconds()
751760
iferr!=nil {
752761
a.logger.Warn(ctx,"startup script(s) failed",slog.Error(err))
753762
iferrors.Is(err,agentscripts.ErrTimeout) {
@@ -758,6 +767,12 @@ func (a *agent) run(ctx context.Context) error {
758767
}else {
759768
a.setLifecycle(ctx,codersdk.WorkspaceAgentLifecycleReady)
760769
}
770+
771+
label:="false"
772+
iferr==nil {
773+
label="true"
774+
}
775+
a.metrics.startupScriptSeconds.WithLabelValues(label).Set(dur)
761776
a.scriptRunner.StartCron()
762777
})
763778
iferr!=nil {

‎agent/agent_test.go‎

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ import (
4646
"cdr.dev/slog"
4747
"cdr.dev/slog/sloggers/sloghuman"
4848
"cdr.dev/slog/sloggers/slogtest"
49+
4950
"github.com/coder/coder/v2/agent"
5051
"github.com/coder/coder/v2/agent/agentproc"
5152
"github.com/coder/coder/v2/agent/agentproc/agentproctest"
@@ -2235,6 +2236,17 @@ func TestAgent_Metrics_SSH(t *testing.T) {
22352236
Type:agentsdk.AgentMetricTypeCounter,
22362237
Value:0,
22372238
},
2239+
{
2240+
Name:"coderd_agentstats_startup_script_seconds",
2241+
Type:agentsdk.AgentMetricTypeGauge,
2242+
Value:0,
2243+
Labels: []agentsdk.AgentMetricLabel{
2244+
{
2245+
Name:"success",
2246+
Value:"true",
2247+
},
2248+
},
2249+
},
22382250
}
22392251

22402252
varactual []*promgo.MetricFamily

‎agent/agentscripts/agentscripts.go‎

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,14 @@ import (
1313
"sync/atomic"
1414
"time"
1515

16+
"github.com/prometheus/client_golang/prometheus"
1617
"github.com/robfig/cron/v3"
1718
"github.com/spf13/afero"
1819
"golang.org/x/sync/errgroup"
1920
"golang.org/x/xerrors"
2021

2122
"cdr.dev/slog"
23+
2224
"github.com/coder/coder/v2/agent/agentssh"
2325
"github.com/coder/coder/v2/codersdk"
2426
"github.com/coder/coder/v2/codersdk/agentsdk"
@@ -57,6 +59,11 @@ func New(opts Options) *Runner {
5759
cronCtxCancel:cronCtxCancel,
5860
cron:cron.New(cron.WithParser(parser)),
5961
closed:make(chanstruct{}),
62+
scriptsExecuted:prometheus.NewCounterVec(prometheus.CounterOpts{
63+
Namespace:"agent",
64+
Subsystem:"scripts",
65+
Name:"executed_total",
66+
}, []string{"success"}),
6067
}
6168
}
6269

@@ -71,6 +78,19 @@ type Runner struct {
7178
cron*cron.Cron
7279
initialized atomic.Bool
7380
scripts []codersdk.WorkspaceAgentScript
81+
82+
// scriptsExecuted includes all scripts executed by the workspace agent. Agents
83+
// execute startup scripts, and scripts on a cron schedule. Both will increment
84+
// this counter.
85+
scriptsExecuted*prometheus.CounterVec
86+
}
87+
88+
func (r*Runner)RegisterMetrics(reg prometheus.Registerer) {
89+
ifreg==nil {
90+
// If no registry, do nothing.
91+
return
92+
}
93+
reg.MustRegister(r.scriptsExecuted)
7494
}
7595

7696
// Init initializes the runner with the provided scripts.
@@ -90,7 +110,7 @@ func (r *Runner) Init(scripts []codersdk.WorkspaceAgentScript) error {
90110
}
91111
script:=script
92112
_,err:=r.cron.AddFunc(script.Cron,func() {
93-
err:=r.run(r.cronCtx,script)
113+
err:=r.trackRun(r.cronCtx,script)
94114
iferr!=nil {
95115
r.Logger.Warn(context.Background(),"run agent script on schedule",slog.Error(err))
96116
}
@@ -131,7 +151,7 @@ func (r *Runner) Execute(ctx context.Context, filter func(script codersdk.Worksp
131151
}
132152
script:=script
133153
eg.Go(func()error {
134-
err:=r.run(ctx,script)
154+
err:=r.trackRun(ctx,script)
135155
iferr!=nil {
136156
returnxerrors.Errorf("run agent script %q: %w",script.LogSourceID,err)
137157
}
@@ -141,6 +161,17 @@ func (r *Runner) Execute(ctx context.Context, filter func(script codersdk.Worksp
141161
returneg.Wait()
142162
}
143163

164+
// trackRun wraps "run" with metrics.
165+
func (r*Runner)trackRun(ctx context.Context,script codersdk.WorkspaceAgentScript)error {
166+
err:=r.run(ctx,script)
167+
iferr!=nil {
168+
r.scriptsExecuted.WithLabelValues("false").Add(1)
169+
}else {
170+
r.scriptsExecuted.WithLabelValues("true").Add(1)
171+
}
172+
returnerr
173+
}
174+
144175
// run executes the provided script with the timeout.
145176
// If the timeout is exceeded, the process is sent an interrupt signal.
146177
// If the process does not exit after a few seconds, it is forcefully killed.

‎agent/metrics.go‎

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ import (
1717
typeagentMetricsstruct {
1818
connectionsTotal prometheus.Counter
1919
reconnectingPTYErrors*prometheus.CounterVec
20+
// startupScriptSeconds is the time in seconds that the start script(s)
21+
// took to run. This is reported once per agent.
22+
startupScriptSeconds*prometheus.GaugeVec
2023
}
2124

2225
funcnewAgentMetrics(registerer prometheus.Registerer)*agentMetrics {
@@ -35,9 +38,18 @@ func newAgentMetrics(registerer prometheus.Registerer) *agentMetrics {
3538
)
3639
registerer.MustRegister(reconnectingPTYErrors)
3740

41+
startupScriptSeconds:=prometheus.NewGaugeVec(prometheus.GaugeOpts{
42+
Namespace:"coderd",
43+
Subsystem:"agentstats",
44+
Name:"startup_script_seconds",
45+
Help:"Amount of time taken to run the startup script in seconds.",
46+
}, []string{"success"})
47+
registerer.MustRegister(startupScriptSeconds)
48+
3849
return&agentMetrics{
3950
connectionsTotal:connectionsTotal,
4051
reconnectingPTYErrors:reconnectingPTYErrors,
52+
startupScriptSeconds:startupScriptSeconds,
4153
}
4254
}
4355

‎coderd/coderd.go‎

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,10 @@ import (
3838
_"github.com/coder/coder/v2/coderd/apidoc"
3939
"github.com/coder/coder/v2/coderd/externalauth"
4040
"github.com/coder/coder/v2/coderd/healthcheck/derphealth"
41+
"github.com/coder/coder/v2/coderd/prometheusmetrics"
4142

4243
"cdr.dev/slog"
44+
4345
"github.com/coder/coder/v2/buildinfo"
4446
"github.com/coder/coder/v2/cli/clibase"
4547
"github.com/coder/coder/v2/coderd/audit"
@@ -168,7 +170,7 @@ type Options struct {
168170

169171
HTTPClient*http.Client
170172

171-
UpdateAgentMetricsfunc(ctx context.Context,username,workspaceName,agentNamestring,metrics []agentsdk.AgentMetric)
173+
UpdateAgentMetricsfunc(ctx context.Context,labels prometheusmetrics.AgentMetricLabels,metrics []agentsdk.AgentMetric)
172174
StatsBatcher*batchstats.Batcher
173175

174176
WorkspaceAppsStatsCollectorOptions workspaceapps.StatsCollectorOptions

‎coderd/database/dbauthz/dbauthz.go‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1918,7 +1918,7 @@ func (q *querier) GetWorkspaceBuildsCreatedAfter(ctx context.Context, createdAt
19181918
returnq.db.GetWorkspaceBuildsCreatedAfter(ctx,createdAt)
19191919
}
19201920

1921-
func (q*querier)GetWorkspaceByAgentID(ctx context.Context,agentID uuid.UUID) (database.Workspace,error) {
1921+
func (q*querier)GetWorkspaceByAgentID(ctx context.Context,agentID uuid.UUID) (database.GetWorkspaceByAgentIDRow,error) {
19221922
returnfetch(q.log,q.auth,q.db.GetWorkspaceByAgentID)(ctx,agentID)
19231923
}
19241924

‎coderd/database/dbauthz/dbauthz_test.go‎

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,21 +1065,30 @@ func (s *MethodTestSuite) TestWorkspace() {
10651065
check.Args(ws.ID).Asserts(ws,rbac.ActionRead).Returns(b)
10661066
}))
10671067
s.Run("GetWorkspaceAgentByID",s.Subtest(func(db database.Store,check*expects) {
1068-
ws:=dbgen.Workspace(s.T(),db, database.Workspace{})
1068+
tpl:=dbgen.Template(s.T(),db, database.Template{})
1069+
ws:=dbgen.Workspace(s.T(),db, database.Workspace{
1070+
TemplateID:tpl.ID,
1071+
})
10691072
build:=dbgen.WorkspaceBuild(s.T(),db, database.WorkspaceBuild{WorkspaceID:ws.ID,JobID:uuid.New()})
10701073
res:=dbgen.WorkspaceResource(s.T(),db, database.WorkspaceResource{JobID:build.JobID})
10711074
agt:=dbgen.WorkspaceAgent(s.T(),db, database.WorkspaceAgent{ResourceID:res.ID})
10721075
check.Args(agt.ID).Asserts(ws,rbac.ActionRead).Returns(agt)
10731076
}))
10741077
s.Run("GetWorkspaceAgentByInstanceID",s.Subtest(func(db database.Store,check*expects) {
1075-
ws:=dbgen.Workspace(s.T(),db, database.Workspace{})
1078+
tpl:=dbgen.Template(s.T(),db, database.Template{})
1079+
ws:=dbgen.Workspace(s.T(),db, database.Workspace{
1080+
TemplateID:tpl.ID,
1081+
})
10761082
build:=dbgen.WorkspaceBuild(s.T(),db, database.WorkspaceBuild{WorkspaceID:ws.ID,JobID:uuid.New()})
10771083
res:=dbgen.WorkspaceResource(s.T(),db, database.WorkspaceResource{JobID:build.JobID})
10781084
agt:=dbgen.WorkspaceAgent(s.T(),db, database.WorkspaceAgent{ResourceID:res.ID})
10791085
check.Args(agt.AuthInstanceID.String).Asserts(ws,rbac.ActionRead).Returns(agt)
10801086
}))
10811087
s.Run("UpdateWorkspaceAgentLifecycleStateByID",s.Subtest(func(db database.Store,check*expects) {
1082-
ws:=dbgen.Workspace(s.T(),db, database.Workspace{})
1088+
tpl:=dbgen.Template(s.T(),db, database.Template{})
1089+
ws:=dbgen.Workspace(s.T(),db, database.Workspace{
1090+
TemplateID:tpl.ID,
1091+
})
10831092
build:=dbgen.WorkspaceBuild(s.T(),db, database.WorkspaceBuild{WorkspaceID:ws.ID,JobID:uuid.New()})
10841093
res:=dbgen.WorkspaceResource(s.T(),db, database.WorkspaceResource{JobID:build.JobID})
10851094
agt:=dbgen.WorkspaceAgent(s.T(),db, database.WorkspaceAgent{ResourceID:res.ID})
@@ -1089,7 +1098,10 @@ func (s *MethodTestSuite) TestWorkspace() {
10891098
}).Asserts(ws,rbac.ActionUpdate).Returns()
10901099
}))
10911100
s.Run("UpdateWorkspaceAgentLogOverflowByID",s.Subtest(func(db database.Store,check*expects) {
1092-
ws:=dbgen.Workspace(s.T(),db, database.Workspace{})
1101+
tpl:=dbgen.Template(s.T(),db, database.Template{})
1102+
ws:=dbgen.Workspace(s.T(),db, database.Workspace{
1103+
TemplateID:tpl.ID,
1104+
})
10931105
build:=dbgen.WorkspaceBuild(s.T(),db, database.WorkspaceBuild{WorkspaceID:ws.ID,JobID:uuid.New()})
10941106
res:=dbgen.WorkspaceResource(s.T(),db, database.WorkspaceResource{JobID:build.JobID})
10951107
agt:=dbgen.WorkspaceAgent(s.T(),db, database.WorkspaceAgent{ResourceID:res.ID})
@@ -1099,7 +1111,10 @@ func (s *MethodTestSuite) TestWorkspace() {
10991111
}).Asserts(ws,rbac.ActionUpdate).Returns()
11001112
}))
11011113
s.Run("UpdateWorkspaceAgentStartupByID",s.Subtest(func(db database.Store,check*expects) {
1102-
ws:=dbgen.Workspace(s.T(),db, database.Workspace{})
1114+
tpl:=dbgen.Template(s.T(),db, database.Template{})
1115+
ws:=dbgen.Workspace(s.T(),db, database.Workspace{
1116+
TemplateID:tpl.ID,
1117+
})
11031118
build:=dbgen.WorkspaceBuild(s.T(),db, database.WorkspaceBuild{WorkspaceID:ws.ID,JobID:uuid.New()})
11041119
res:=dbgen.WorkspaceResource(s.T(),db, database.WorkspaceResource{JobID:build.JobID})
11051120
agt:=dbgen.WorkspaceAgent(s.T(),db, database.WorkspaceAgent{ResourceID:res.ID})
@@ -1111,7 +1126,10 @@ func (s *MethodTestSuite) TestWorkspace() {
11111126
}).Asserts(ws,rbac.ActionUpdate).Returns()
11121127
}))
11131128
s.Run("GetWorkspaceAgentLogsAfter",s.Subtest(func(db database.Store,check*expects) {
1114-
ws:=dbgen.Workspace(s.T(),db, database.Workspace{})
1129+
tpl:=dbgen.Template(s.T(),db, database.Template{})
1130+
ws:=dbgen.Workspace(s.T(),db, database.Workspace{
1131+
TemplateID:tpl.ID,
1132+
})
11151133
build:=dbgen.WorkspaceBuild(s.T(),db, database.WorkspaceBuild{WorkspaceID:ws.ID,JobID:uuid.New()})
11161134
res:=dbgen.WorkspaceResource(s.T(),db, database.WorkspaceResource{JobID:build.JobID})
11171135
agt:=dbgen.WorkspaceAgent(s.T(),db, database.WorkspaceAgent{ResourceID:res.ID})
@@ -1120,7 +1138,10 @@ func (s *MethodTestSuite) TestWorkspace() {
11201138
}).Asserts(ws,rbac.ActionRead).Returns([]database.WorkspaceAgentLog{})
11211139
}))
11221140
s.Run("GetWorkspaceAppByAgentIDAndSlug",s.Subtest(func(db database.Store,check*expects) {
1123-
ws:=dbgen.Workspace(s.T(),db, database.Workspace{})
1141+
tpl:=dbgen.Template(s.T(),db, database.Template{})
1142+
ws:=dbgen.Workspace(s.T(),db, database.Workspace{
1143+
TemplateID:tpl.ID,
1144+
})
11241145
build:=dbgen.WorkspaceBuild(s.T(),db, database.WorkspaceBuild{WorkspaceID:ws.ID,JobID:uuid.New()})
11251146
res:=dbgen.WorkspaceResource(s.T(),db, database.WorkspaceResource{JobID:build.JobID})
11261147
agt:=dbgen.WorkspaceAgent(s.T(),db, database.WorkspaceAgent{ResourceID:res.ID})
@@ -1132,7 +1153,10 @@ func (s *MethodTestSuite) TestWorkspace() {
11321153
}).Asserts(ws,rbac.ActionRead).Returns(app)
11331154
}))
11341155
s.Run("GetWorkspaceAppsByAgentID",s.Subtest(func(db database.Store,check*expects) {
1135-
ws:=dbgen.Workspace(s.T(),db, database.Workspace{})
1156+
tpl:=dbgen.Template(s.T(),db, database.Template{})
1157+
ws:=dbgen.Workspace(s.T(),db, database.Workspace{
1158+
TemplateID:tpl.ID,
1159+
})
11361160
build:=dbgen.WorkspaceBuild(s.T(),db, database.WorkspaceBuild{WorkspaceID:ws.ID,JobID:uuid.New()})
11371161
res:=dbgen.WorkspaceResource(s.T(),db, database.WorkspaceResource{JobID:build.JobID})
11381162
agt:=dbgen.WorkspaceAgent(s.T(),db, database.WorkspaceAgent{ResourceID:res.ID})
@@ -1173,11 +1197,17 @@ func (s *MethodTestSuite) TestWorkspace() {
11731197
check.Args(database.GetWorkspaceBuildsByWorkspaceIDParams{WorkspaceID:ws.ID}).Asserts(ws,rbac.ActionRead)// ordering
11741198
}))
11751199
s.Run("GetWorkspaceByAgentID",s.Subtest(func(db database.Store,check*expects) {
1176-
ws:=dbgen.Workspace(s.T(),db, database.Workspace{})
1200+
tpl:=dbgen.Template(s.T(),db, database.Template{})
1201+
ws:=dbgen.Workspace(s.T(),db, database.Workspace{
1202+
TemplateID:tpl.ID,
1203+
})
11771204
build:=dbgen.WorkspaceBuild(s.T(),db, database.WorkspaceBuild{WorkspaceID:ws.ID,JobID:uuid.New()})
11781205
res:=dbgen.WorkspaceResource(s.T(),db, database.WorkspaceResource{JobID:build.JobID})
11791206
agt:=dbgen.WorkspaceAgent(s.T(),db, database.WorkspaceAgent{ResourceID:res.ID})
1180-
check.Args(agt.ID).Asserts(ws,rbac.ActionRead).Returns(ws)
1207+
check.Args(agt.ID).Asserts(ws,rbac.ActionRead).Returns(database.GetWorkspaceByAgentIDRow{
1208+
Workspace:ws,
1209+
TemplateName:tpl.Name,
1210+
})
11811211
}))
11821212
s.Run("GetWorkspaceByOwnerIDAndName",s.Subtest(func(db database.Store,check*expects) {
11831213
ws:=dbgen.Workspace(s.T(),db, database.Workspace{})

‎coderd/database/dbmem/dbmem.go‎

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4293,11 +4293,24 @@ func (q *FakeQuerier) GetWorkspaceBuildsCreatedAfter(_ context.Context, after ti
42934293
returnworkspaceBuilds,nil
42944294
}
42954295

4296-
func (q*FakeQuerier)GetWorkspaceByAgentID(ctx context.Context,agentID uuid.UUID) (database.Workspace,error) {
4296+
func (q*FakeQuerier)GetWorkspaceByAgentID(ctx context.Context,agentID uuid.UUID) (database.GetWorkspaceByAgentIDRow,error) {
42974297
q.mutex.RLock()
42984298
deferq.mutex.RUnlock()
42994299

4300-
returnq.getWorkspaceByAgentIDNoLock(ctx,agentID)
4300+
w,err:=q.getWorkspaceByAgentIDNoLock(ctx,agentID)
4301+
iferr!=nil {
4302+
return database.GetWorkspaceByAgentIDRow{},err
4303+
}
4304+
4305+
tpl,err:=q.getTemplateByIDNoLock(ctx,w.TemplateID)
4306+
iferr!=nil {
4307+
return database.GetWorkspaceByAgentIDRow{},err
4308+
}
4309+
4310+
return database.GetWorkspaceByAgentIDRow{
4311+
Workspace:w,
4312+
TemplateName:tpl.Name,
4313+
},nil
43014314
}
43024315

43034316
func (q*FakeQuerier)GetWorkspaceByID(ctx context.Context,id uuid.UUID) (database.Workspace,error) {

‎coderd/database/dbmetrics/dbmetrics.go‎

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more aboutcustomizing how changed files appear on GitHub.

‎coderd/database/dbmock/dbmock.go‎

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more aboutcustomizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp