Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit79fb8e4

Browse files
authored
feat: expose workspace statuses (with details) as a prometheus metric (#12762)
Implements#12462
1 parent114830d commit79fb8e4

File tree

8 files changed

+375
-161
lines changed

8 files changed

+375
-161
lines changed

‎cli/server.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ func enablePrometheus(
209209
}
210210
afterCtx(ctx,closeUsersFunc)
211211

212-
closeWorkspacesFunc,err:=prometheusmetrics.Workspaces(ctx,options.PrometheusRegistry,options.Database,0)
212+
closeWorkspacesFunc,err:=prometheusmetrics.Workspaces(ctx,options.Logger.Named("workspaces_metrics"),options.PrometheusRegistry,options.Database,0)
213213
iferr!=nil {
214214
returnnil,xerrors.Errorf("register workspaces prometheus metric: %w",err)
215215
}

‎cli/server_test.go

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -973,26 +973,20 @@ func TestServer(t *testing.T) {
973973

974974
scanner:=bufio.NewScanner(res.Body)
975975
hasActiveUsers:=false
976-
hasWorkspaces:=false
977976
forscanner.Scan() {
978977
// This metric is manually registered to be tracked in the server. That's
979978
// why we test it's tracked here.
980979
ifstrings.HasPrefix(scanner.Text(),"coderd_api_active_users_duration_hour") {
981980
hasActiveUsers=true
982981
continue
983982
}
984-
ifstrings.HasPrefix(scanner.Text(),"coderd_api_workspace_latest_build_total") {
985-
hasWorkspaces=true
986-
continue
987-
}
988983
ifstrings.HasPrefix(scanner.Text(),"coderd_db_query_latencies_seconds") {
989984
t.Fatal("db metrics should not be tracked when --prometheus-collect-db-metrics is not enabled")
990985
}
991986
t.Logf("scanned %s",scanner.Text())
992987
}
993988
require.NoError(t,scanner.Err())
994989
require.True(t,hasActiveUsers)
995-
require.True(t,hasWorkspaces)
996990
})
997991

998992
t.Run("DBMetricsEnabled",func(t*testing.T) {

‎coderd/database/dbmem/dbmem.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,16 @@ func (q *FakeQuerier) convertToWorkspaceRowsNoLock(ctx context.Context, workspac
404404
break
405405
}
406406
}
407+
408+
ifpj,err:=q.getProvisionerJobByIDNoLock(ctx,build.JobID);err==nil {
409+
wr.LatestBuildStatus=pj.JobStatus
410+
}
411+
412+
wr.LatestBuildTransition=build.Transition
413+
}
414+
415+
ifu,err:=q.getUserByIDNoLock(w.OwnerID);err==nil {
416+
wr.Username=u.Username
407417
}
408418

409419
rows=append(rows,wr)

‎coderd/database/modelqueries.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ func (q *sqlQuerier) GetAuthorizedWorkspaces(ctx context.Context, arg GetWorkspa
266266
&i.LatestBuildCanceledAt,
267267
&i.LatestBuildError,
268268
&i.LatestBuildTransition,
269+
&i.LatestBuildStatus,
269270
&i.Count,
270271
);err!=nil {
271272
returnnil,err

‎coderd/database/queries.sql.go

Lines changed: 34 additions & 30 deletions
Some generated files are not rendered by default. Learn more aboutcustomizing how changed files appear on GitHub.

‎coderd/database/queries/workspaces.sql

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,8 @@ SELECT
9696
latest_build.completed_atas latest_build_completed_at,
9797
latest_build.canceled_atas latest_build_canceled_at,
9898
latest_build.erroras latest_build_error,
99-
latest_build.transitionas latest_build_transition
99+
latest_build.transitionas latest_build_transition,
100+
latest_build.job_statusas latest_build_status
100101
FROM
101102
workspaces
102103
JOIN
@@ -118,7 +119,7 @@ LEFT JOIN LATERAL (
118119
provisioner_jobs.job_status
119120
FROM
120121
workspace_builds
121-
LEFTJOIN
122+
JOIN
122123
provisioner_jobs
123124
ON
124125
provisioner_jobs.id=workspace_builds.job_id
@@ -374,7 +375,8 @@ WHERE
374375
'0001-01-01 00:00:00+00'::timestamptz,-- latest_build_completed_at,
375376
'0001-01-01 00:00:00+00'::timestamptz,-- latest_build_canceled_at,
376377
'',-- latest_build_error
377-
'start'::workspace_transition-- latest_build_transition
378+
'start'::workspace_transition,-- latest_build_transition
379+
'unknown'::provisioner_job_status-- latest_build_status
378380
WHERE
379381
@with_summary ::boolean= true
380382
), total_countAS (

‎coderd/prometheusmetrics/prometheusmetrics.go

Lines changed: 65 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,12 @@ import (
2424
"github.com/coder/coder/v2/tailnet"
2525
)
2626

27+
constdefaultRefreshRate=time.Minute
28+
2729
// ActiveUsers tracks the number of users that have authenticated within the past hour.
2830
funcActiveUsers(ctx context.Context,registerer prometheus.Registerer,db database.Store,duration time.Duration) (func(),error) {
2931
ifduration==0 {
30-
duration=5*time.Minute
32+
duration=defaultRefreshRate
3133
}
3234

3335
gauge:=prometheus.NewGauge(prometheus.GaugeOpts{
@@ -72,36 +74,42 @@ func ActiveUsers(ctx context.Context, registerer prometheus.Registerer, db datab
7274
}
7375

7476
// Workspaces tracks the total number of workspaces with labels on status.
75-
funcWorkspaces(ctx context.Context,registerer prometheus.Registerer,db database.Store,duration time.Duration) (func(),error) {
77+
funcWorkspaces(ctx context.Context,logger slog.Logger,registerer prometheus.Registerer,db database.Store,duration time.Duration) (func(),error) {
7678
ifduration==0 {
77-
duration=5*time.Minute
79+
duration=defaultRefreshRate
7880
}
7981

80-
gauge:=prometheus.NewGaugeVec(prometheus.GaugeOpts{
82+
workspaceLatestBuildTotals:=prometheus.NewGaugeVec(prometheus.GaugeOpts{
8183
Namespace:"coderd",
8284
Subsystem:"api",
8385
Name:"workspace_latest_build_total",
84-
Help:"Thelatestworkspace buildswith a status.",
86+
Help:"Thecurrent number ofworkspace buildsby status.",
8587
}, []string{"status"})
86-
err:=registerer.Register(gauge)
87-
iferr!=nil {
88+
iferr:=registerer.Register(workspaceLatestBuildTotals);err!=nil {
89+
returnnil,err
90+
}
91+
92+
workspaceLatestBuildStatuses:=prometheus.NewGaugeVec(prometheus.GaugeOpts{
93+
Namespace:"coderd",
94+
Name:"workspace_latest_build_status",
95+
Help:"The current workspace statuses by template, transition, and owner.",
96+
}, []string{"status","template_name","template_version","workspace_owner","workspace_transition"})
97+
iferr:=registerer.Register(workspaceLatestBuildStatuses);err!=nil {
8898
returnnil,err
8999
}
90-
// This exists so the prometheus metric exports immediately when set.
91-
// It helps with tests so they don't have to wait for a tick.
92-
gauge.WithLabelValues("pending").Set(0)
93100

94101
ctx,cancelFunc:=context.WithCancel(ctx)
95102
done:=make(chanstruct{})
96103

97-
// Use time.Nanosecond to force an initial tick. It will be reset to the
98-
// correct duration after executing once.
99-
ticker:=time.NewTicker(time.Nanosecond)
100-
doTick:=func() {
101-
deferticker.Reset(duration)
102-
104+
updateWorkspaceTotals:=func() {
103105
builds,err:=db.GetLatestWorkspaceBuilds(ctx)
104106
iferr!=nil {
107+
iferrors.Is(err,sql.ErrNoRows) {
108+
// clear all series if there are no database entries
109+
workspaceLatestBuildTotals.Reset()
110+
}
111+
112+
logger.Warn(ctx,"failed to load latest workspace builds",slog.Error(err))
105113
return
106114
}
107115
jobIDs:=make([]uuid.UUID,0,len(builds))
@@ -110,16 +118,53 @@ func Workspaces(ctx context.Context, registerer prometheus.Registerer, db databa
110118
}
111119
jobs,err:=db.GetProvisionerJobsByIDs(ctx,jobIDs)
112120
iferr!=nil {
121+
ids:=make([]string,0,len(jobIDs))
122+
for_,id:=rangejobIDs {
123+
ids=append(ids,id.String())
124+
}
125+
126+
logger.Warn(ctx,"failed to load provisioner jobs",slog.F("ids",ids),slog.Error(err))
113127
return
114128
}
115129

116-
gauge.Reset()
130+
workspaceLatestBuildTotals.Reset()
117131
for_,job:=rangejobs {
118132
status:=codersdk.ProvisionerJobStatus(job.JobStatus)
119-
gauge.WithLabelValues(string(status)).Add(1)
133+
workspaceLatestBuildTotals.WithLabelValues(string(status)).Add(1)
120134
}
121135
}
122136

137+
updateWorkspaceStatuses:=func() {
138+
ws,err:=db.GetWorkspaces(ctx, database.GetWorkspacesParams{
139+
Deleted:false,
140+
WithSummary:false,
141+
})
142+
iferr!=nil {
143+
iferrors.Is(err,sql.ErrNoRows) {
144+
// clear all series if there are no database entries
145+
workspaceLatestBuildStatuses.Reset()
146+
}
147+
148+
logger.Warn(ctx,"failed to load active workspaces",slog.Error(err))
149+
return
150+
}
151+
152+
workspaceLatestBuildStatuses.Reset()
153+
for_,w:=rangews {
154+
workspaceLatestBuildStatuses.WithLabelValues(string(w.LatestBuildStatus),w.TemplateName,w.TemplateVersionName.String,w.Username,string(w.LatestBuildTransition)).Add(1)
155+
}
156+
}
157+
158+
// Use time.Nanosecond to force an initial tick. It will be reset to the
159+
// correct duration after executing once.
160+
ticker:=time.NewTicker(time.Nanosecond)
161+
doTick:=func() {
162+
deferticker.Reset(duration)
163+
164+
updateWorkspaceTotals()
165+
updateWorkspaceStatuses()
166+
}
167+
123168
gofunc() {
124169
deferclose(done)
125170
deferticker.Stop()
@@ -141,7 +186,7 @@ func Workspaces(ctx context.Context, registerer prometheus.Registerer, db databa
141186
// Agents tracks the total number of workspaces with labels on status.
142187
funcAgents(ctx context.Context,logger slog.Logger,registerer prometheus.Registerer,db database.Store,coordinator*atomic.Pointer[tailnet.Coordinator],derpMapFnfunc()*tailcfg.DERPMap,agentInactiveDisconnectTimeout,duration time.Duration) (func(),error) {
143188
ifduration==0 {
144-
duration=1*time.Minute
189+
duration=defaultRefreshRate
145190
}
146191

147192
agentsGauge:=NewCachedGaugeVec(prometheus.NewGaugeVec(prometheus.GaugeOpts{
@@ -330,7 +375,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
330375

331376
funcAgentStats(ctx context.Context,logger slog.Logger,registerer prometheus.Registerer,db database.Store,initialCreateAfter time.Time,duration time.Duration,aggregateByLabels []string) (func(),error) {
332377
ifduration==0 {
333-
duration=1*time.Minute
378+
duration=defaultRefreshRate
334379
}
335380

336381
iflen(aggregateByLabels)==0 {

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp