Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit658e8c3

Browse files
authored
perf: improve performance of metricsAggregator path by reducing memory allocations (#20724)
Signed-off-by: Callum Styan <callumstyan@gmail.com>
1 parentca560d3 commit658e8c3

File tree

3 files changed

+240
-37
lines changed

3 files changed

+240
-37
lines changed

‎coderd/prometheusmetrics/aggregator.go‎

Lines changed: 100 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@ const (
3737

3838
varMetricLabelValueEncoder=strings.NewReplacer("\\","\\\\","|","\\|",",","\\,","=","\\=")
3939

40+
typedescCacheEntrystruct {
41+
desc*prometheus.Desc
42+
lastUsed time.Time
43+
}
44+
4045
typeMetricsAggregatorstruct {
4146
storemap[metricKey]annotatedMetric
4247

@@ -50,6 +55,8 @@ type MetricsAggregator struct {
5055
updateHistogram prometheus.Histogram
5156
cleanupHistogram prometheus.Histogram
5257
aggregateByLabels []string
58+
// per-aggregator cache of descriptors
59+
descCachemap[string]descCacheEntry
5360
}
5461

5562
typeupdateRequeststruct {
@@ -107,42 +114,6 @@ func hashKey(req *updateRequest, m *agentproto.Stats_Metric) metricKey {
107114

108115
var_ prometheus.Collector=new(MetricsAggregator)
109116

110-
func (am*annotatedMetric)asPrometheus() (prometheus.Metric,error) {
111-
var (
112-
baseLabelNames=am.aggregateByLabels
113-
baseLabelValues []string
114-
extraLabels=am.Labels
115-
)
116-
117-
for_,label:=rangebaseLabelNames {
118-
val,err:=am.getFieldByLabel(label)
119-
iferr!=nil {
120-
returnnil,err
121-
}
122-
123-
baseLabelValues=append(baseLabelValues,val)
124-
}
125-
126-
labels:=make([]string,0,len(baseLabelNames)+len(extraLabels))
127-
labelValues:=make([]string,0,len(baseLabelNames)+len(extraLabels))
128-
129-
labels=append(labels,baseLabelNames...)
130-
labelValues=append(labelValues,baseLabelValues...)
131-
132-
for_,l:=rangeextraLabels {
133-
labels=append(labels,l.Name)
134-
labelValues=append(labelValues,l.Value)
135-
}
136-
137-
desc:=prometheus.NewDesc(am.Name,metricHelpForAgent,labels,nil)
138-
valueType,err:=asPrometheusValueType(am.Type)
139-
iferr!=nil {
140-
returnnil,err
141-
}
142-
143-
returnprometheus.MustNewConstMetric(desc,valueType,am.Value,labelValues...),nil
144-
}
145-
146117
// getFieldByLabel returns the related field value for a given label
147118
func (am*annotatedMetric)getFieldByLabel(labelstring) (string,error) {
148119
varlabelValstring
@@ -364,7 +335,7 @@ func (ma *MetricsAggregator) Run(ctx context.Context) func() {
364335
}
365336

366337
for_,m:=rangeinput {
367-
promMetric,err:=m.asPrometheus()
338+
promMetric,err:=ma.asPrometheus(&m)
368339
iferr!=nil {
369340
ma.log.Error(ctx,"can't convert Prometheus value type",slog.F("name",m.Name),slog.F("type",m.Type),slog.F("value",m.Value),slog.Error(err))
370341
continue
@@ -386,6 +357,8 @@ func (ma *MetricsAggregator) Run(ctx context.Context) func() {
386357
}
387358
}
388359

360+
ma.cleanupDescCache()
361+
389362
timer.ObserveDuration()
390363
cleanupTicker.Reset(ma.metricsCleanupInterval)
391364
ma.storeSizeGauge.Set(float64(len(ma.store)))
@@ -407,6 +380,86 @@ func (ma *MetricsAggregator) Run(ctx context.Context) func() {
407380
func (*MetricsAggregator)Describe(_chan<-*prometheus.Desc) {
408381
}
409382

383+
// cacheKeyForDesc is used to determine the cache key for a set of labels/extra labels. Used with the aggregators description cache.
384+
// for strings.Builder returned errors from these functions are always nil.
385+
// nolint:revive
386+
funccacheKeyForDesc(namestring,baseLabelNames []string,extraLabels []*agentproto.Stats_Metric_Label)string {
387+
varb strings.Builder
388+
hint:=len(name)+ (len(baseLabelNames)+len(extraLabels))*8
389+
b.Grow(hint)
390+
b.WriteString(name)
391+
for_,ln:=rangebaseLabelNames {
392+
b.WriteByte('|')
393+
b.WriteString(ln)
394+
}
395+
for_,l:=rangeextraLabels {
396+
b.WriteByte('|')
397+
b.WriteString(l.Name)
398+
}
399+
returnb.String()
400+
}
401+
402+
// getOrCreateDec checks if we already have a metric description in the aggregators cache for a given combination of base
403+
// labels and extra labels. If we do not, we create a new description and cache it.
404+
func (ma*MetricsAggregator)getOrCreateDesc(namestring,helpstring,baseLabelNames []string,extraLabels []*agentproto.Stats_Metric_Label)*prometheus.Desc {
405+
ifma.descCache==nil {
406+
ma.descCache=make(map[string]descCacheEntry)
407+
}
408+
key:=cacheKeyForDesc(name,baseLabelNames,extraLabels)
409+
ifd,ok:=ma.descCache[key];ok {
410+
d.lastUsed=time.Now()
411+
ma.descCache[key]=d
412+
returnd.desc
413+
}
414+
nBase:=len(baseLabelNames)
415+
nExtra:=len(extraLabels)
416+
labels:=make([]string,nBase+nExtra)
417+
copy(labels,baseLabelNames)
418+
fori,l:=rangeextraLabels {
419+
labels[nBase+i]=l.Name
420+
}
421+
d:=prometheus.NewDesc(name,help,labels,nil)
422+
ma.descCache[key]=descCacheEntry{d,time.Now()}
423+
returnd
424+
}
425+
426+
// asPrometheus returns the annotatedMetric as a prometheus.Metric, it preallocates/fills by index, uses the aggregators
427+
// metric description cache, and a small stack buffer for values in order to reduce memory allocations.
428+
func (ma*MetricsAggregator)asPrometheus(am*annotatedMetric) (prometheus.Metric,error) {
429+
baseLabelNames:=am.aggregateByLabels
430+
extraLabels:=am.Labels
431+
432+
nBase:=len(baseLabelNames)
433+
nExtra:=len(extraLabels)
434+
nTotal:=nBase+nExtra
435+
436+
varscratch [16]string
437+
varlabelValues []string
438+
ifnTotal<=len(scratch) {
439+
labelValues=scratch[:nTotal]
440+
}else {
441+
labelValues=make([]string,nTotal)
442+
}
443+
444+
fori,label:=rangebaseLabelNames {
445+
val,err:=am.getFieldByLabel(label)
446+
iferr!=nil {
447+
returnnil,err
448+
}
449+
labelValues[i]=val
450+
}
451+
fori,l:=rangeextraLabels {
452+
labelValues[nBase+i]=l.Value
453+
}
454+
455+
desc:=ma.getOrCreateDesc(am.Name,metricHelpForAgent,baseLabelNames,extraLabels)
456+
valueType,err:=asPrometheusValueType(am.Type)
457+
iferr!=nil {
458+
returnnil,err
459+
}
460+
returnprometheus.MustNewConstMetric(desc,valueType,am.Value,labelValues...),nil
461+
}
462+
410463
vardefaultAgentMetricsLabels= []string{agentmetrics.LabelUsername,agentmetrics.LabelWorkspaceName,agentmetrics.LabelAgentName,agentmetrics.LabelTemplateName}
411464

412465
// AgentMetricLabels are the labels used to decorate an agent's metrics.
@@ -453,6 +506,16 @@ func (ma *MetricsAggregator) Update(ctx context.Context, labels AgentMetricLabel
453506
}
454507
}
455508

509+
// Move to a function for testability
510+
func (ma*MetricsAggregator)cleanupDescCache() {
511+
now:=time.Now()
512+
forkey,entry:=rangema.descCache {
513+
ifnow.Sub(entry.lastUsed)>ma.metricsCleanupInterval {
514+
delete(ma.descCache,key)
515+
}
516+
}
517+
}
518+
456519
funcasPrometheusValueType(metricType agentproto.Stats_Metric_Type) (prometheus.ValueType,error) {
457520
switchmetricType {
458521
caseagentproto.Stats_Metric_GAUGE:
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
package prometheusmetrics
2+
3+
import (
4+
"testing"
5+
"time"
6+
7+
"github.com/prometheus/client_golang/prometheus"
8+
"github.com/stretchr/testify/require"
9+
10+
"cdr.dev/slog/sloggers/slogtest"
11+
agentproto"github.com/coder/coder/v2/agent/proto"
12+
"github.com/coder/coder/v2/coderd/agentmetrics"
13+
"github.com/coder/coder/v2/testutil"
14+
)
15+
16+
funcTestDescCache_DescExpire(t*testing.T) {
17+
const (
18+
testWorkspaceName="yogi-workspace"
19+
testUsername="yogi-bear"
20+
testAgentName="main-agent"
21+
testTemplateName="main-template"
22+
)
23+
24+
testLabels:=AgentMetricLabels{
25+
Username:testUsername,
26+
WorkspaceName:testWorkspaceName,
27+
AgentName:testAgentName,
28+
TemplateName:testTemplateName,
29+
}
30+
31+
t.Parallel()
32+
33+
// given
34+
registry:=prometheus.NewRegistry()
35+
ma,err:=NewMetricsAggregator(slogtest.Make(t,&slogtest.Options{IgnoreErrors:true}),registry,time.Millisecond,agentmetrics.LabelAll)
36+
require.NoError(t,err)
37+
38+
given:= []*agentproto.Stats_Metric{
39+
{Name:"a_counter_one",Type:agentproto.Stats_Metric_COUNTER,Value:1},
40+
}
41+
42+
_,err=ma.asPrometheus(&annotatedMetric{
43+
given[0],
44+
testLabels.Username,
45+
testLabels.WorkspaceName,
46+
testLabels.AgentName,
47+
testLabels.TemplateName,
48+
// the rest doesn't matter for this test
49+
time.Now(),
50+
[]string{},
51+
})
52+
require.NoError(t,err)
53+
54+
require.Eventually(t,func()bool {
55+
ma.cleanupDescCache()
56+
returnlen(ma.descCache)==0
57+
},testutil.WaitShort,testutil.IntervalFast)
58+
}
59+
60+
// TestDescCacheTimestampUpdate ensures that the timestamp update in getOrCreateDesc
61+
// updates the map entry because d is a copy, not a pointer.
62+
funcTestDescCacheTimestampUpdate(t*testing.T) {
63+
t.Parallel()
64+
65+
registry:=prometheus.NewRegistry()
66+
ma,err:=NewMetricsAggregator(slogtest.Make(t,nil),registry,time.Hour,nil)
67+
require.NoError(t,err)
68+
69+
baseLabelNames:= []string{"label1","label2"}
70+
extraLabels:= []*agentproto.Stats_Metric_Label{
71+
{Name:"extra1",Value:"value1"},
72+
}
73+
74+
desc1:=ma.getOrCreateDesc("test_metric","help text",baseLabelNames,extraLabels)
75+
require.NotNil(t,desc1)
76+
77+
key:=cacheKeyForDesc("test_metric",baseLabelNames,extraLabels)
78+
initialEntry:=ma.descCache[key]
79+
initialTime:=initialEntry.lastUsed
80+
81+
desc2:=ma.getOrCreateDesc("test_metric","help text",baseLabelNames,extraLabels)
82+
require.NotNil(t,desc2)
83+
84+
updatedEntry:=ma.descCache[key]
85+
updatedTime:=updatedEntry.lastUsed
86+
87+
require.NotEqual(t,initialTime,updatedTime,
88+
"Timestamp was NOT updated in map when accessing a metric description that should be cached")
89+
}

‎coderd/prometheusmetrics/prometheusmetrics_internal_test.go‎

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
package prometheusmetrics
22

33
import (
4+
"fmt"
45
"testing"
56

67
"github.com/stretchr/testify/require"
78

9+
agentproto"github.com/coder/coder/v2/agent/proto"
810
"github.com/coder/coder/v2/coderd/agentmetrics"
911
)
1012

@@ -36,3 +38,52 @@ func TestFilterAcceptableAgentLabels(t *testing.T) {
3638
})
3739
}
3840
}
41+
42+
funcbenchAsPrometheus(b*testing.B,base []string,extraNint) {
43+
am:=annotatedMetric{
44+
Stats_Metric:&agentproto.Stats_Metric{
45+
Name:"blink_test_metric",
46+
Type:agentproto.Stats_Metric_GAUGE,
47+
Value:1,
48+
Labels:make([]*agentproto.Stats_Metric_Label,extraN),
49+
},
50+
username:"user",
51+
workspaceName:"ws",
52+
agentName:"agent",
53+
templateName:"tmpl",
54+
aggregateByLabels:base,
55+
}
56+
fori:=0;i<extraN;i++ {
57+
am.Labels[i]=&agentproto.Stats_Metric_Label{Name:fmt.Sprintf("l%d",i),Value:"v"}
58+
}
59+
60+
ma:=&MetricsAggregator{}
61+
62+
b.ReportAllocs()
63+
b.ResetTimer()
64+
fori:=0;i<b.N;i++ {
65+
_,err:=ma.asPrometheus(&am)
66+
iferr!=nil {
67+
b.Fatal(err)
68+
}
69+
}
70+
}
71+
72+
funcBenchmark_asPrometheus(b*testing.B) {
73+
cases:= []struct {
74+
namestring
75+
base []string
76+
extraNint
77+
}{
78+
{"base4_extra0",defaultAgentMetricsLabels,0},
79+
{"base4_extra2",defaultAgentMetricsLabels,2},
80+
{"base4_extra5",defaultAgentMetricsLabels,5},
81+
{"base4_extra10",defaultAgentMetricsLabels,10},
82+
{"base2_extra5", []string{agentmetrics.LabelUsername,agentmetrics.LabelWorkspaceName},5},
83+
}
84+
for_,tc:=rangecases {
85+
b.Run(tc.name,func(b*testing.B) {
86+
benchAsPrometheus(b,tc.base,tc.extraN)
87+
})
88+
}
89+
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp