@@ -37,6 +37,11 @@ const (
3737
3838var MetricLabelValueEncoder = strings .NewReplacer ("\\ " ,"\\ \\ " ,"|" ,"\\ |" ,"," ,"\\ ," ,"=" ,"\\ =" )
3939
40+ type descCacheEntry struct {
41+ desc * prometheus.Desc
42+ lastUsed time.Time
43+ }
44+
4045type MetricsAggregator struct {
4146store map [metricKey ]annotatedMetric
4247
@@ -50,6 +55,8 @@ type MetricsAggregator struct {
5055updateHistogram prometheus.Histogram
5156cleanupHistogram prometheus.Histogram
5257aggregateByLabels []string
58+ // per-aggregator cache of descriptors
59+ descCache map [string ]descCacheEntry
5360}
5461
5562type updateRequest struct {
@@ -107,42 +114,6 @@ func hashKey(req *updateRequest, m *agentproto.Stats_Metric) metricKey {
107114
108115var _ prometheus.Collector = new (MetricsAggregator )
109116
110- func (am * annotatedMetric )asPrometheus () (prometheus.Metric ,error ) {
111- var (
112- baseLabelNames = am .aggregateByLabels
113- baseLabelValues []string
114- extraLabels = am .Labels
115- )
116-
117- for _ ,label := range baseLabelNames {
118- val ,err := am .getFieldByLabel (label )
119- if err != nil {
120- return nil ,err
121- }
122-
123- baseLabelValues = append (baseLabelValues ,val )
124- }
125-
126- labels := make ([]string ,0 ,len (baseLabelNames )+ len (extraLabels ))
127- labelValues := make ([]string ,0 ,len (baseLabelNames )+ len (extraLabels ))
128-
129- labels = append (labels ,baseLabelNames ... )
130- labelValues = append (labelValues ,baseLabelValues ... )
131-
132- for _ ,l := range extraLabels {
133- labels = append (labels ,l .Name )
134- labelValues = append (labelValues ,l .Value )
135- }
136-
137- desc := prometheus .NewDesc (am .Name ,metricHelpForAgent ,labels ,nil )
138- valueType ,err := asPrometheusValueType (am .Type )
139- if err != nil {
140- return nil ,err
141- }
142-
143- return prometheus .MustNewConstMetric (desc ,valueType ,am .Value ,labelValues ... ),nil
144- }
145-
146117// getFieldByLabel returns the related field value for a given label
147118func (am * annotatedMetric )getFieldByLabel (label string ) (string ,error ) {
148119var labelVal string
@@ -364,7 +335,7 @@ func (ma *MetricsAggregator) Run(ctx context.Context) func() {
364335}
365336
366337for _ ,m := range input {
367- promMetric ,err := m .asPrometheus ()
338+ promMetric ,err := ma .asPrometheus (& m )
368339if err != nil {
369340ma .log .Error (ctx ,"can't convert Prometheus value type" ,slog .F ("name" ,m .Name ),slog .F ("type" ,m .Type ),slog .F ("value" ,m .Value ),slog .Error (err ))
370341continue
@@ -386,6 +357,8 @@ func (ma *MetricsAggregator) Run(ctx context.Context) func() {
386357}
387358}
388359
360+ ma .cleanupDescCache ()
361+
389362timer .ObserveDuration ()
390363cleanupTicker .Reset (ma .metricsCleanupInterval )
391364ma .storeSizeGauge .Set (float64 (len (ma .store )))
@@ -407,6 +380,86 @@ func (ma *MetricsAggregator) Run(ctx context.Context) func() {
407380func (* MetricsAggregator )Describe (_ chan <- * prometheus.Desc ) {
408381}
409382
383+ // cacheKeyForDesc is used to determine the cache key for a set of labels/extra labels. Used with the aggregators description cache.
384+ // for strings.Builder returned errors from these functions are always nil.
385+ // nolint:revive
386+ func cacheKeyForDesc (name string ,baseLabelNames []string ,extraLabels []* agentproto.Stats_Metric_Label )string {
387+ var b strings.Builder
388+ hint := len (name )+ (len (baseLabelNames )+ len (extraLabels ))* 8
389+ b .Grow (hint )
390+ b .WriteString (name )
391+ for _ ,ln := range baseLabelNames {
392+ b .WriteByte ('|' )
393+ b .WriteString (ln )
394+ }
395+ for _ ,l := range extraLabels {
396+ b .WriteByte ('|' )
397+ b .WriteString (l .Name )
398+ }
399+ return b .String ()
400+ }
401+
402+ // getOrCreateDec checks if we already have a metric description in the aggregators cache for a given combination of base
403+ // labels and extra labels. If we do not, we create a new description and cache it.
404+ func (ma * MetricsAggregator )getOrCreateDesc (name string ,help string ,baseLabelNames []string ,extraLabels []* agentproto.Stats_Metric_Label )* prometheus.Desc {
405+ if ma .descCache == nil {
406+ ma .descCache = make (map [string ]descCacheEntry )
407+ }
408+ key := cacheKeyForDesc (name ,baseLabelNames ,extraLabels )
409+ if d ,ok := ma .descCache [key ];ok {
410+ d .lastUsed = time .Now ()
411+ ma .descCache [key ]= d
412+ return d .desc
413+ }
414+ nBase := len (baseLabelNames )
415+ nExtra := len (extraLabels )
416+ labels := make ([]string ,nBase + nExtra )
417+ copy (labels ,baseLabelNames )
418+ for i ,l := range extraLabels {
419+ labels [nBase + i ]= l .Name
420+ }
421+ d := prometheus .NewDesc (name ,help ,labels ,nil )
422+ ma .descCache [key ]= descCacheEntry {d ,time .Now ()}
423+ return d
424+ }
425+
426+ // asPrometheus returns the annotatedMetric as a prometheus.Metric, it preallocates/fills by index, uses the aggregators
427+ // metric description cache, and a small stack buffer for values in order to reduce memory allocations.
428+ func (ma * MetricsAggregator )asPrometheus (am * annotatedMetric ) (prometheus.Metric ,error ) {
429+ baseLabelNames := am .aggregateByLabels
430+ extraLabels := am .Labels
431+
432+ nBase := len (baseLabelNames )
433+ nExtra := len (extraLabels )
434+ nTotal := nBase + nExtra
435+
436+ var scratch [16 ]string
437+ var labelValues []string
438+ if nTotal <= len (scratch ) {
439+ labelValues = scratch [:nTotal ]
440+ }else {
441+ labelValues = make ([]string ,nTotal )
442+ }
443+
444+ for i ,label := range baseLabelNames {
445+ val ,err := am .getFieldByLabel (label )
446+ if err != nil {
447+ return nil ,err
448+ }
449+ labelValues [i ]= val
450+ }
451+ for i ,l := range extraLabels {
452+ labelValues [nBase + i ]= l .Value
453+ }
454+
455+ desc := ma .getOrCreateDesc (am .Name ,metricHelpForAgent ,baseLabelNames ,extraLabels )
456+ valueType ,err := asPrometheusValueType (am .Type )
457+ if err != nil {
458+ return nil ,err
459+ }
460+ return prometheus .MustNewConstMetric (desc ,valueType ,am .Value ,labelValues ... ),nil
461+ }
462+
410463var defaultAgentMetricsLabels = []string {agentmetrics .LabelUsername ,agentmetrics .LabelWorkspaceName ,agentmetrics .LabelAgentName ,agentmetrics .LabelTemplateName }
411464
412465// AgentMetricLabels are the labels used to decorate an agent's metrics.
@@ -453,6 +506,16 @@ func (ma *MetricsAggregator) Update(ctx context.Context, labels AgentMetricLabel
453506}
454507}
455508
509+ // Move to a function for testability
510+ func (ma * MetricsAggregator )cleanupDescCache () {
511+ now := time .Now ()
512+ for key ,entry := range ma .descCache {
513+ if now .Sub (entry .lastUsed )> ma .metricsCleanupInterval {
514+ delete (ma .descCache ,key )
515+ }
516+ }
517+ }
518+
456519func asPrometheusValueType (metricType agentproto.Stats_Metric_Type ) (prometheus.ValueType ,error ) {
457520switch metricType {
458521case agentproto .Stats_Metric_GAUGE :