Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

feat(coderd): add prometheus metrics to servertailnet#11988

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes fromall commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletioncoderd/coderd.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -472,7 +472,7 @@ func New(options *Options) *API {

api.Auditor.Store(&options.Auditor)
api.TailnetCoordinator.Store(&options.TailnetCoordinator)
api.agentProvider, err = NewServerTailnet(api.ctx,
stn, err:= NewServerTailnet(api.ctx,
options.Logger,
options.DERPServer,
api.DERPMap,
Expand All@@ -485,6 +485,10 @@ func New(options *Options) *API {
if err != nil {
panic("failed to setup server tailnet: " + err.Error())
}
api.agentProvider = stn
if options.DeploymentValues.Prometheus.Enable {
options.PrometheusRegistry.MustRegister(stn)
}
api.TailnetClientService, err = tailnet.NewClientService(
api.Logger.Named("tailnetclient"),
&api.TailnetCoordinator,
Expand Down
79 changes: 19 additions & 60 deletionscoderd/database/pubsub/pubsub_test.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -6,7 +6,6 @@ import (
"testing"

"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

Expand DownExpand Up@@ -43,8 +42,8 @@ func TestPGPubsub_Metrics(t *testing.T) {

metrics, err := registry.Gather()
require.NoError(t, err)
require.True(t,gaugeHasValue(t, metrics, 0, "coder_pubsub_current_events"))
require.True(t,gaugeHasValue(t, metrics, 0, "coder_pubsub_current_subscribers"))
require.True(t,testutil.PromGaugeHasValue(t, metrics, 0, "coder_pubsub_current_events"))
require.True(t,testutil.PromGaugeHasValue(t, metrics, 0, "coder_pubsub_current_subscribers"))

event := "test"
data := "testing"
Expand All@@ -63,14 +62,14 @@ func TestPGPubsub_Metrics(t *testing.T) {
require.Eventually(t, func() bool {
metrics, err = registry.Gather()
assert.NoError(t, err)
returngaugeHasValue(t, metrics, 1, "coder_pubsub_current_events") &&
gaugeHasValue(t, metrics, 1, "coder_pubsub_current_subscribers") &&
gaugeHasValue(t, metrics, 1, "coder_pubsub_connected") &&
counterHasValue(t, metrics, 1, "coder_pubsub_publishes_total", "true") &&
counterHasValue(t, metrics, 1, "coder_pubsub_subscribes_total", "true") &&
counterHasValue(t, metrics, 1, "coder_pubsub_messages_total", "normal") &&
counterHasValue(t, metrics, 7, "coder_pubsub_received_bytes_total") &&
counterHasValue(t, metrics, 7, "coder_pubsub_published_bytes_total")
returntestutil.PromGaugeHasValue(t, metrics, 1, "coder_pubsub_current_events") &&
testutil.PromGaugeHasValue(t, metrics, 1, "coder_pubsub_current_subscribers") &&
testutil.PromGaugeHasValue(t, metrics, 1, "coder_pubsub_connected") &&
testutil.PromCounterHasValue(t, metrics, 1, "coder_pubsub_publishes_total", "true") &&
testutil.PromCounterHasValue(t, metrics, 1, "coder_pubsub_subscribes_total", "true") &&
testutil.PromCounterHasValue(t, metrics, 1, "coder_pubsub_messages_total", "normal") &&
testutil.PromCounterHasValue(t, metrics, 7, "coder_pubsub_received_bytes_total") &&
testutil.PromCounterHasValue(t, metrics, 7, "coder_pubsub_published_bytes_total")
}, testutil.WaitShort, testutil.IntervalFast)

colossalData := make([]byte, 7600)
Expand All@@ -93,54 +92,14 @@ func TestPGPubsub_Metrics(t *testing.T) {
require.Eventually(t, func() bool {
metrics, err = registry.Gather()
assert.NoError(t, err)
returngaugeHasValue(t, metrics, 1, "coder_pubsub_current_events") &&
gaugeHasValue(t, metrics, 2, "coder_pubsub_current_subscribers") &&
gaugeHasValue(t, metrics, 1, "coder_pubsub_connected") &&
counterHasValue(t, metrics, 2, "coder_pubsub_publishes_total", "true") &&
counterHasValue(t, metrics, 2, "coder_pubsub_subscribes_total", "true") &&
counterHasValue(t, metrics, 1, "coder_pubsub_messages_total", "normal") &&
counterHasValue(t, metrics, 1, "coder_pubsub_messages_total", "colossal") &&
counterHasValue(t, metrics, 7607, "coder_pubsub_received_bytes_total") &&
counterHasValue(t, metrics, 7607, "coder_pubsub_published_bytes_total")
returntestutil.PromGaugeHasValue(t, metrics, 1, "coder_pubsub_current_events") &&
testutil.PromGaugeHasValue(t, metrics, 2, "coder_pubsub_current_subscribers") &&
testutil.PromGaugeHasValue(t, metrics, 1, "coder_pubsub_connected") &&
testutil.PromCounterHasValue(t, metrics, 2, "coder_pubsub_publishes_total", "true") &&
testutil.PromCounterHasValue(t, metrics, 2, "coder_pubsub_subscribes_total", "true") &&
testutil.PromCounterHasValue(t, metrics, 1, "coder_pubsub_messages_total", "normal") &&
testutil.PromCounterHasValue(t, metrics, 1, "coder_pubsub_messages_total", "colossal") &&
testutil.PromCounterHasValue(t, metrics, 7607, "coder_pubsub_received_bytes_total") &&
testutil.PromCounterHasValue(t, metrics, 7607, "coder_pubsub_published_bytes_total")
}, testutil.WaitShort, testutil.IntervalFast)
}

func gaugeHasValue(t testing.TB, metrics []*dto.MetricFamily, value float64, name string, label ...string) bool {
t.Helper()
for _, family := range metrics {
if family.GetName() != name {
continue
}
ms := family.GetMetric()
for _, m := range ms {
require.Equal(t, len(label), len(m.GetLabel()))
for i, lv := range label {
if lv != m.GetLabel()[i].GetValue() {
continue
}
}
return value == m.GetGauge().GetValue()
}
}
return false
}

func counterHasValue(t testing.TB, metrics []*dto.MetricFamily, value float64, name string, label ...string) bool {
t.Helper()
for _, family := range metrics {
if family.GetName() != name {
continue
}
ms := family.GetMetric()
for _, m := range ms {
require.Equal(t, len(label), len(m.GetLabel()))
for i, lv := range label {
if lv != m.GetLabel()[i].GetValue() {
continue
}
}
return value == m.GetCounter().GetValue()
}
}
return false
}
54 changes: 53 additions & 1 deletioncoderd/tailnet.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -14,6 +14,7 @@ import (
"time"

"github.com/google/uuid"
"github.com/prometheus/client_golang/prometheus"
"go.opentelemetry.io/otel/trace"
"golang.org/x/xerrors"
"tailscale.com/derp"
Expand DownExpand Up@@ -97,6 +98,18 @@ func NewServerTailnet(
agentConnectionTimes: map[uuid.UUID]time.Time{},
agentTickets: map[uuid.UUID]map[uuid.UUID]struct{}{},
transport: tailnetTransport.Clone(),
connsPerAgent: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "coder",
Subsystem: "servertailnet",
Name: "open_connections",
Help: "Total number of TCP connections currently open to workspace agents.",
}, []string{"network"}),
totalConns: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "coder",
Subsystem: "servertailnet",
Name: "connections_total",
Help: "Total number of TCP connections made to workspace agents.",
}, []string{"network"}),
}
tn.transport.DialContext = tn.dialContext
// These options are mostly just picked at random, and they can likely be
Expand DownExpand Up@@ -170,6 +183,16 @@ func NewServerTailnet(
return tn, nil
}

func (s *ServerTailnet) Describe(descs chan<- *prometheus.Desc) {
s.connsPerAgent.Describe(descs)
s.totalConns.Describe(descs)
}

func (s *ServerTailnet) Collect(metrics chan<- prometheus.Metric) {
s.connsPerAgent.Collect(metrics)
s.totalConns.Collect(metrics)
}

func (s *ServerTailnet) expireOldAgents() {
const (
tick = 5 * time.Minute
Expand DownExpand Up@@ -304,6 +327,9 @@ type ServerTailnet struct {
agentTickets map[uuid.UUID]map[uuid.UUID]struct{}

transport *http.Transport

connsPerAgent *prometheus.GaugeVec
totalConns *prometheus.CounterVec
}

func (s *ServerTailnet) ReverseProxy(targetURL, dashboardURL *url.URL, agentID uuid.UUID) *httputil.ReverseProxy {
Expand DownExpand Up@@ -349,7 +375,18 @@ func (s *ServerTailnet) dialContext(ctx context.Context, network, addr string) (
return nil, xerrors.Errorf("no agent id attached")
}

return s.DialAgentNetConn(ctx, agentID, network, addr)
nc, err := s.DialAgentNetConn(ctx, agentID, network, addr)
if err != nil {
return nil, err
}

s.connsPerAgent.WithLabelValues("tcp").Inc()
s.totalConns.WithLabelValues("tcp").Inc()
return &instrumentedConn{
Conn: nc,
agentID: agentID,
connsPerAgent: s.connsPerAgent,
}, nil
}

func (s *ServerTailnet) ensureAgent(agentID uuid.UUID) error {
Expand DownExpand Up@@ -455,3 +492,18 @@ func (s *ServerTailnet) Close() error {
<-s.derpMapUpdaterClosed
return nil
}

type instrumentedConn struct {
net.Conn

agentID uuid.UUID
closeOnce sync.Once
connsPerAgent *prometheus.GaugeVec
}

func (c *instrumentedConn) Close() error {
c.closeOnce.Do(func() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

are network connectionsalways explicitly closed?

Copy link
ContributorAuthor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

This one I'm not 100% sure on. Admittedly, I found this idea from a stackoverflow post which seemed to work for a couple other people. Was planning to get this into dev and monitor to make sure it works as intended with a lot more usage than I can reproduce myself.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

I guess maybe theyshould be and this gauge will tell us if there's a leak...

c.connsPerAgent.WithLabelValues("tcp").Dec()
})
return c.Conn.Close()
}
38 changes: 38 additions & 0 deletionscoderd/tailnet_test.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -13,6 +13,7 @@ import (
"testing"

"github.com/google/uuid"
"github.com/prometheus/client_golang/prometheus"
"github.com/spf13/afero"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
Expand DownExpand Up@@ -79,6 +80,43 @@ func TestServerTailnet_ReverseProxy(t *testing.T) {
assert.Equal(t, http.StatusOK, res.StatusCode)
})

t.Run("Metrics", func(t *testing.T) {
t.Parallel()

ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()

agents, serverTailnet := setupServerTailnetAgent(t, 1)
a := agents[0]

registry := prometheus.NewRegistry()
require.NoError(t, registry.Register(serverTailnet))

u, err := url.Parse(fmt.Sprintf("http://127.0.0.1:%d", codersdk.WorkspaceAgentHTTPAPIServerPort))
require.NoError(t, err)

rp := serverTailnet.ReverseProxy(u, u, a.id)

rw := httptest.NewRecorder()
req := httptest.NewRequest(
http.MethodGet,
u.String(),
nil,
).WithContext(ctx)

rp.ServeHTTP(rw, req)
res := rw.Result()
defer res.Body.Close()

assert.Equal(t, http.StatusOK, res.StatusCode)
require.Eventually(t, func() bool {
metrics, err := registry.Gather()
assert.NoError(t, err)
return testutil.PromCounterHasValue(t, metrics, 1, "coder_servertailnet_connections_total", "tcp") &&
testutil.PromGaugeHasValue(t, metrics, 1, "coder_servertailnet_open_connections", "tcp")
}, testutil.WaitShort, testutil.IntervalFast)
})

t.Run("HostRewrite", func(t *testing.T) {
t.Parallel()

Expand Down
50 changes: 50 additions & 0 deletionstestutil/prometheus.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
package testutil

import (
"testing"

dto "github.com/prometheus/client_model/go"
"github.com/stretchr/testify/require"
)

func PromGaugeHasValue(t testing.TB, metrics []*dto.MetricFamily, value float64, name string, label ...string) bool {
t.Helper()
for _, family := range metrics {
if family.GetName() != name {
continue
}
ms := family.GetMetric()
metricsLoop:
for _, m := range ms {
require.Equal(t, len(label), len(m.GetLabel()))
for i, lv := range label {
if lv != m.GetLabel()[i].GetValue() {
continue metricsLoop
}
}
return value == m.GetGauge().GetValue()
}
}
return false
}

func PromCounterHasValue(t testing.TB, metrics []*dto.MetricFamily, value float64, name string, label ...string) bool {
t.Helper()
for _, family := range metrics {
if family.GetName() != name {
continue
}
ms := family.GetMetric()
metricsLoop:
for _, m := range ms {
require.Equal(t, len(label), len(m.GetLabel()))
for i, lv := range label {
if lv != m.GetLabel()[i].GetValue() {
continue metricsLoop
}
}
return value == m.GetCounter().GetValue()
}
}
return false
}

[8]ページ先頭

©2009-2025 Movatter.jp