Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit0347231

Browse files
authored
feat: expose agent metrics via Prometheus endpoint (#7011)
* WIP* WIP* WIP* Agents* fix* 1min* fix* WIP* Test* docs* fmt* Add timer to measure the metrics collection* Use CachedGaugeVec* Unit tests* Address PR comments
1 parentdd85ea8 commit0347231

File tree

7 files changed

+629
-48
lines changed

7 files changed

+629
-48
lines changed

‎cli/server.go‎

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -896,6 +896,15 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
896896
returnxerrors.Errorf("create coder API: %w",err)
897897
}
898898

899+
ifcfg.Prometheus.Enable {
900+
// Agent metrics require reference to the tailnet coordinator, so must be initiated after Coder API.
901+
closeAgentsFunc,err:=prometheusmetrics.Agents(ctx,logger,options.PrometheusRegistry,coderAPI.Database,&coderAPI.TailnetCoordinator,options.DERPMap,coderAPI.Options.AgentInactiveDisconnectTimeout,0)
902+
iferr!=nil {
903+
returnxerrors.Errorf("register agents prometheus metric: %w",err)
904+
}
905+
defercloseAgentsFunc()
906+
}
907+
899908
client:=codersdk.New(localURL)
900909
iflocalURL.Scheme=="https"&&isLocalhost(localURL.Hostname()) {
901910
// The certificate will likely be self-signed or for a different
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
package prometheusmetrics
2+
3+
import (
4+
"sync"
5+
6+
"github.com/prometheus/client_golang/prometheus"
7+
)
8+
9+
// CachedGaugeVec is a wrapper for the prometheus.GaugeVec which allows
10+
// for staging changes in the metrics vector. Calling "WithLabelValues(...)"
11+
// will update the internal gauge value, but it will not be returned by
12+
// "Collect(...)" until the "Commit()" method is called. The "Commit()" method
13+
// resets the internal gauge and applies all staged changes to it.
14+
//
15+
// The Use of CachedGaugeVec is recommended for use cases when there is a risk
16+
// that the Prometheus collector receives incomplete metrics, collected
17+
// in the middle of metrics recalculation, between "Reset()" and the last
18+
// "WithLabelValues()" call.
19+
typeCachedGaugeVecstruct {
20+
m sync.Mutex
21+
22+
gaugeVec*prometheus.GaugeVec
23+
records []vectorRecord
24+
}
25+
26+
var_ prometheus.Collector=new(CachedGaugeVec)
27+
28+
typeVectorOperationint
29+
30+
const (
31+
VectorOperationAddVectorOperation=iota
32+
VectorOperationSet
33+
)
34+
35+
typevectorRecordstruct {
36+
operationVectorOperation
37+
valuefloat64
38+
labelValues []string
39+
}
40+
41+
funcNewCachedGaugeVec(gaugeVec*prometheus.GaugeVec)*CachedGaugeVec {
42+
return&CachedGaugeVec{
43+
gaugeVec:gaugeVec,
44+
}
45+
}
46+
47+
func (v*CachedGaugeVec)Describe(descchan<-*prometheus.Desc) {
48+
v.gaugeVec.Describe(desc)
49+
}
50+
51+
func (v*CachedGaugeVec)Collect(chchan<- prometheus.Metric) {
52+
v.m.Lock()
53+
deferv.m.Unlock()
54+
55+
v.gaugeVec.Collect(ch)
56+
}
57+
58+
func (v*CachedGaugeVec)WithLabelValues(operationVectorOperation,valuefloat64,labelValues...string) {
59+
switchoperation {
60+
caseVectorOperationAdd:
61+
caseVectorOperationSet:
62+
default:
63+
panic("unsupported vector operation")
64+
}
65+
66+
v.m.Lock()
67+
deferv.m.Unlock()
68+
69+
v.records=append(v.records,vectorRecord{
70+
operation:operation,
71+
value:value,
72+
labelValues:labelValues,
73+
})
74+
}
75+
76+
// Commit will set the internal value as the cached value to return from "Collect()".
77+
// The internal metric value is completely reset, so the caller should expect
78+
// the gauge to be empty for the next 'WithLabelValues' values.
79+
func (v*CachedGaugeVec)Commit() {
80+
v.m.Lock()
81+
deferv.m.Unlock()
82+
83+
v.gaugeVec.Reset()
84+
for_,record:=rangev.records {
85+
g:=v.gaugeVec.WithLabelValues(record.labelValues...)
86+
switchrecord.operation {
87+
caseVectorOperationAdd:
88+
g.Add(record.value)
89+
caseVectorOperationSet:
90+
g.Set(record.value)
91+
}
92+
}
93+
94+
v.records=nil
95+
}
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
package prometheusmetrics_test
2+
3+
import (
4+
"sort"
5+
"testing"
6+
7+
"github.com/prometheus/client_golang/prometheus"
8+
dto"github.com/prometheus/client_model/go"
9+
"github.com/stretchr/testify/assert"
10+
"github.com/stretchr/testify/require"
11+
12+
"github.com/coder/coder/coderd/prometheusmetrics"
13+
)
14+
15+
funcTestCollector_Add(t*testing.T) {
16+
t.Parallel()
17+
18+
// given
19+
agentsGauge:=prometheusmetrics.NewCachedGaugeVec(prometheus.NewGaugeVec(prometheus.GaugeOpts{
20+
Namespace:"coderd",
21+
Subsystem:"agents",
22+
Name:"up",
23+
Help:"The number of active agents per workspace.",
24+
}, []string{"username","workspace_name"}))
25+
26+
// when
27+
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationAdd,7,"first user","my workspace")
28+
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationAdd,23,"second user","your workspace")
29+
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationAdd,1,"first user","my workspace")
30+
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationAdd,25,"second user","your workspace")
31+
agentsGauge.Commit()
32+
33+
// then
34+
ch:=make(chan prometheus.Metric,2)
35+
agentsGauge.Collect(ch)
36+
37+
metrics:=collectAndSortMetrics(t,agentsGauge,2)
38+
39+
assert.Equal(t,"first user",metrics[0].Label[0].GetValue())// Username
40+
assert.Equal(t,"my workspace",metrics[0].Label[1].GetValue())// Workspace name
41+
assert.Equal(t,8,int(metrics[0].Gauge.GetValue()))// Metric value
42+
43+
assert.Equal(t,"second user",metrics[1].Label[0].GetValue())// Username
44+
assert.Equal(t,"your workspace",metrics[1].Label[1].GetValue())// Workspace name
45+
assert.Equal(t,48,int(metrics[1].Gauge.GetValue()))// Metric value
46+
}
47+
48+
funcTestCollector_Set(t*testing.T) {
49+
t.Parallel()
50+
51+
// given
52+
agentsGauge:=prometheusmetrics.NewCachedGaugeVec(prometheus.NewGaugeVec(prometheus.GaugeOpts{
53+
Namespace:"coderd",
54+
Subsystem:"agents",
55+
Name:"up",
56+
Help:"The number of active agents per workspace.",
57+
}, []string{"username","workspace_name"}))
58+
59+
// when
60+
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationSet,3,"first user","my workspace")
61+
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationSet,4,"second user","your workspace")
62+
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationSet,5,"first user","my workspace")
63+
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationSet,6,"second user","your workspace")
64+
agentsGauge.Commit()
65+
66+
// then
67+
ch:=make(chan prometheus.Metric,2)
68+
agentsGauge.Collect(ch)
69+
70+
metrics:=collectAndSortMetrics(t,agentsGauge,2)
71+
72+
assert.Equal(t,"first user",metrics[0].Label[0].GetValue())// Username
73+
assert.Equal(t,"my workspace",metrics[0].Label[1].GetValue())// Workspace name
74+
assert.Equal(t,5,int(metrics[0].Gauge.GetValue()))// Metric value
75+
76+
assert.Equal(t,"second user",metrics[1].Label[0].GetValue())// Username
77+
assert.Equal(t,"your workspace",metrics[1].Label[1].GetValue())// Workspace name
78+
assert.Equal(t,6,int(metrics[1].Gauge.GetValue()))// Metric value
79+
}
80+
81+
funcTestCollector_Set_Add(t*testing.T) {
82+
t.Parallel()
83+
84+
// given
85+
agentsGauge:=prometheusmetrics.NewCachedGaugeVec(prometheus.NewGaugeVec(prometheus.GaugeOpts{
86+
Namespace:"coderd",
87+
Subsystem:"agents",
88+
Name:"up",
89+
Help:"The number of active agents per workspace.",
90+
}, []string{"username","workspace_name"}))
91+
92+
// when
93+
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationAdd,9,"first user","my workspace")
94+
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationAdd,8,"second user","your workspace")
95+
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationAdd,7,"first user","my workspace")
96+
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationAdd,6,"second user","your workspace")
97+
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationSet,5,"first user","my workspace")
98+
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationSet,4,"second user","your workspace")
99+
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationAdd,3,"first user","my workspace")
100+
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationAdd,2,"second user","your workspace")
101+
agentsGauge.Commit()
102+
103+
// then
104+
ch:=make(chan prometheus.Metric,2)
105+
agentsGauge.Collect(ch)
106+
107+
metrics:=collectAndSortMetrics(t,agentsGauge,2)
108+
109+
assert.Equal(t,"first user",metrics[0].Label[0].GetValue())// Username
110+
assert.Equal(t,"my workspace",metrics[0].Label[1].GetValue())// Workspace name
111+
assert.Equal(t,8,int(metrics[0].Gauge.GetValue()))// Metric value
112+
113+
assert.Equal(t,"second user",metrics[1].Label[0].GetValue())// Username
114+
assert.Equal(t,"your workspace",metrics[1].Label[1].GetValue())// Workspace name
115+
assert.Equal(t,6,int(metrics[1].Gauge.GetValue()))// Metric value
116+
}
117+
118+
funccollectAndSortMetrics(t*testing.T,collector prometheus.Collector,countint) []dto.Metric {
119+
ch:=make(chan prometheus.Metric,count)
120+
deferclose(ch)
121+
122+
varmetrics []dto.Metric
123+
124+
collector.Collect(ch)
125+
fori:=0;i<count;i++ {
126+
m:=<-ch
127+
128+
varmetric dto.Metric
129+
err:=m.Write(&metric)
130+
require.NoError(t,err)
131+
132+
metrics=append(metrics,metric)
133+
}
134+
135+
// Ensure always the same order of metrics
136+
sort.Slice(metrics,func(i,jint)bool {
137+
returnsort.StringsAreSorted([]string{metrics[i].Label[0].GetValue(),metrics[j].Label[1].GetValue()})
138+
})
139+
returnmetrics
140+
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp