Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

fix: reduce excessive logging when database is unreachable#17363

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
dannykopping merged 8 commits intomainfromdk/17045
Apr 15, 2025
Merged
Show file tree
Hide file tree
Changes fromall commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletionscoderd/coderd.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -675,10 +675,11 @@ func New(options *Options) *API {
api.Auditor.Store(&options.Auditor)
api.TailnetCoordinator.Store(&options.TailnetCoordinator)
dialer := &InmemTailnetDialer{
CoordPtr: &api.TailnetCoordinator,
DERPFn: api.DERPMap,
Logger: options.Logger,
ClientID: uuid.New(),
CoordPtr: &api.TailnetCoordinator,
DERPFn: api.DERPMap,
Logger: options.Logger,
ClientID: uuid.New(),
DatabaseHealthCheck: api.Database,
}
stn, err := NewServerTailnet(api.ctx,
options.Logger,
Expand Down
16 changes: 15 additions & 1 deletioncoderd/tailnet.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -24,9 +24,11 @@ import (
"tailscale.com/tailcfg"

"cdr.dev/slog"

"github.com/coder/coder/v2/coderd/tracing"
"github.com/coder/coder/v2/coderd/workspaceapps"
"github.com/coder/coder/v2/coderd/workspaceapps/appurl"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/workspacesdk"
"github.com/coder/coder/v2/site"
"github.com/coder/coder/v2/tailnet"
Expand DownExpand Up@@ -534,16 +536,28 @@ func NewMultiAgentController(ctx context.Context, logger slog.Logger, tracer tra
return m
}

type Pinger interface {
Ping(context.Context) (time.Duration, error)
}

// InmemTailnetDialer is a tailnet.ControlProtocolDialer that connects to a Coordinator and DERPMap
// service running in the same memory space.
type InmemTailnetDialer struct {
CoordPtr *atomic.Pointer[tailnet.Coordinator]
DERPFn func() *tailcfg.DERPMap
Logger slog.Logger
ClientID uuid.UUID
// DatabaseHealthCheck is used to validate that the store is reachable.
DatabaseHealthCheck Pinger
}

func (a *InmemTailnetDialer) Dial(_ context.Context, _ tailnet.ResumeTokenController) (tailnet.ControlProtocolClients, error) {
func (a *InmemTailnetDialer) Dial(ctx context.Context, _ tailnet.ResumeTokenController) (tailnet.ControlProtocolClients, error) {
if a.DatabaseHealthCheck != nil {
if _, err := a.DatabaseHealthCheck.Ping(ctx); err != nil {
return tailnet.ControlProtocolClients{}, xerrors.Errorf("%w: %v", codersdk.ErrDatabaseNotReachable, err)
}
}

coord := a.CoordPtr.Load()
if coord == nil {
return tailnet.ControlProtocolClients{}, xerrors.Errorf("tailnet coordinator not initialized")
Expand Down
41 changes: 41 additions & 0 deletionscoderd/tailnet_test.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -11,20 +11,23 @@ import (
"strconv"
"sync/atomic"
"testing"
"time"

"github.com/google/uuid"
"github.com/prometheus/client_golang/prometheus"
"github.com/spf13/afero"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.opentelemetry.io/otel/trace"
"golang.org/x/xerrors"
"tailscale.com/tailcfg"

"github.com/coder/coder/v2/agent"
"github.com/coder/coder/v2/agent/agenttest"
"github.com/coder/coder/v2/agent/proto"
"github.com/coder/coder/v2/coderd"
"github.com/coder/coder/v2/coderd/workspaceapps/appurl"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/agentsdk"
"github.com/coder/coder/v2/codersdk/workspacesdk"
"github.com/coder/coder/v2/tailnet"
Expand DownExpand Up@@ -365,6 +368,44 @@ func TestServerTailnet_ReverseProxy(t *testing.T) {
})
}

func TestDialFailure(t *testing.T) {
t.Parallel()

// Setup.
ctx := testutil.Context(t, testutil.WaitShort)
logger := testutil.Logger(t)

// Given: a tailnet coordinator.
coord := tailnet.NewCoordinator(logger)
t.Cleanup(func() {
_ = coord.Close()
})
coordPtr := atomic.Pointer[tailnet.Coordinator]{}
coordPtr.Store(&coord)

// Given: a fake DB healthchecker which will always fail.
fch := &failingHealthcheck{}

// When: dialing the in-memory coordinator.
dialer := &coderd.InmemTailnetDialer{
CoordPtr: &coordPtr,
Logger: logger,
ClientID: uuid.UUID{5},
DatabaseHealthCheck: fch,
}
_, err := dialer.Dial(ctx, nil)

// Then: the error returned reflects the database has failed its healthcheck.
require.ErrorIs(t, err, codersdk.ErrDatabaseNotReachable)
}

type failingHealthcheck struct{}

func (failingHealthcheck) Ping(context.Context) (time.Duration, error) {
// Simulate a database connection error.
return 0, xerrors.New("oops")
}

type wrappedListener struct {
net.Listener
dials int32
Expand Down
10 changes: 10 additions & 0 deletionscoderd/workspaceagents.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -997,6 +997,16 @@ func (api *API) derpMapUpdates(rw http.ResponseWriter, r *http.Request) {
func (api *API) workspaceAgentClientCoordinate(rw http.ResponseWriter, r *http.Request) {
ctx := r.Context()

// Ensure the database is reachable before proceeding.
_, err := api.Database.Ping(ctx)
if err != nil {
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
Message: codersdk.DatabaseNotReachable,
Detail: err.Error(),
})
return
}

// This route accepts user API key auth and workspace proxy auth. The moon actor has
// full permissions so should be able to pass this authz check.
workspace := httpmw.WorkspaceParam(r)
Expand Down
7 changes: 7 additions & 0 deletionscodersdk/database.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
package codersdk

import "golang.org/x/xerrors"

const DatabaseNotReachable = "database not reachable"

var ErrDatabaseNotReachable = xerrors.New(DatabaseNotReachable)
15 changes: 11 additions & 4 deletionscodersdk/workspacesdk/dialer.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -11,17 +11,19 @@ import (
"golang.org/x/xerrors"

"cdr.dev/slog"
"github.com/coder/websocket"

"github.com/coder/coder/v2/buildinfo"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/tailnet"
"github.com/coder/coder/v2/tailnet/proto"
"github.com/coder/websocket"
)

var permanentErrorStatuses = []int{
http.StatusConflict, // returned if client/agent connections disabled (browser only)
http.StatusBadRequest, // returned if API mismatch
http.StatusNotFound, // returned if user doesn't have permission or agent doesn't exist
http.StatusConflict, // returned if client/agent connections disabled (browser only)
http.StatusBadRequest, // returned if API mismatch
http.StatusNotFound, // returned if user doesn't have permission or agent doesn't exist
http.StatusInternalServerError, // returned if database is not reachable,
}

type WebsocketDialer struct {
Expand DownExpand Up@@ -89,6 +91,11 @@ func (w *WebsocketDialer) Dial(ctx context.Context, r tailnet.ResumeTokenControl
"Ensure your client release version (%s, different than the API version) matches the server release version",
buildinfo.Version())
}

if sdkErr.Message == codersdk.DatabaseNotReachable &&
sdkErr.StatusCode() == http.StatusInternalServerError {
err = xerrors.Errorf("%w: %v", codersdk.ErrDatabaseNotReachable, err)
}
}
w.connected <- err
return tailnet.ControlProtocolClients{}, err
Expand Down
35 changes: 35 additions & 0 deletionscodersdk/workspacesdk/workspacesdk_test.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,21 @@
package workspacesdk_test

import (
"net/http"
"net/http/httptest"
"net/url"
"testing"

"github.com/stretchr/testify/require"
"tailscale.com/tailcfg"

"github.com/coder/websocket"

"github.com/coder/coder/v2/coderd/httpapi"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/agentsdk"
"github.com/coder/coder/v2/codersdk/workspacesdk"
"github.com/coder/coder/v2/testutil"
)

func TestWorkspaceRewriteDERPMap(t *testing.T) {
Expand DownExpand Up@@ -37,3 +45,30 @@ func TestWorkspaceRewriteDERPMap(t *testing.T) {
require.Equal(t, "coconuts.org", node.HostName)
require.Equal(t, 44558, node.DERPPort)
}

func TestWorkspaceDialerFailure(t *testing.T) {
t.Parallel()

// Setup.
ctx := testutil.Context(t, testutil.WaitShort)
logger := testutil.Logger(t)

// Given: a mock HTTP server which mimicks an unreachable database when calling the coordination endpoint.
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
httpapi.Write(ctx, w, http.StatusInternalServerError, codersdk.Response{
Message: codersdk.DatabaseNotReachable,
Detail: "oops",
})
}))
t.Cleanup(srv.Close)

u, err := url.Parse(srv.URL)
require.NoError(t, err)

// When: calling the coordination endpoint.
dialer := workspacesdk.NewWebsocketDialer(logger, u, &websocket.DialOptions{})
_, err = dialer.Dial(ctx, nil)

// Then: an error indicating a database issue is returned, to conditionalize the behavior of the caller.
require.ErrorIs(t, err, codersdk.ErrDatabaseNotReachable)
}
30 changes: 21 additions & 9 deletionsprovisionerd/provisionerd.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -20,12 +20,13 @@ import (
"golang.org/x/xerrors"

"cdr.dev/slog"
"github.com/coder/retry"

"github.com/coder/coder/v2/coderd/tracing"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/provisionerd/proto"
"github.com/coder/coder/v2/provisionerd/runner"
sdkproto "github.com/coder/coder/v2/provisionersdk/proto"
"github.com/coder/retry"
)

// Dialer represents the function to create a daemon client connection.
Expand DownExpand Up@@ -290,7 +291,7 @@ func (p *Server) acquireLoop() {
defer p.wg.Done()
defer func() { close(p.acquireDoneCh) }()
ctx := p.closeContext
for {
forretrier := retry.New(10*time.Millisecond, 1*time.Second); retrier.Wait(ctx);{
if p.acquireExit() {
return
}
Expand All@@ -299,7 +300,17 @@ func (p *Server) acquireLoop() {
p.opts.Logger.Debug(ctx, "shut down before client (re) connected")
return
}
p.acquireAndRunOne(client)
err := p.acquireAndRunOne(client)
if err != nil && ctx.Err() == nil { // Only log if context is not done.
// Short-circuit: don't wait for the retry delay to exit, if required.
if p.acquireExit() {
return
}
p.opts.Logger.Warn(ctx, "failed to acquire job, retrying", slog.F("delay", fmt.Sprintf("%vms", retrier.Delay.Milliseconds())), slog.Error(err))
} else {
// Reset the retrier after each successful acquisition.
retrier.Reset()
}
}
}

Expand All@@ -318,7 +329,7 @@ func (p *Server) acquireExit() bool {
return false
}

func (p *Server) acquireAndRunOne(client proto.DRPCProvisionerDaemonClient) {
func (p *Server) acquireAndRunOne(client proto.DRPCProvisionerDaemonClient)error{
ctx := p.closeContext
p.opts.Logger.Debug(ctx, "start of acquireAndRunOne")
job, err := p.acquireGraceful(client)
Expand All@@ -327,15 +338,15 @@ func (p *Server) acquireAndRunOne(client proto.DRPCProvisionerDaemonClient) {
if errors.Is(err, context.Canceled) ||
errors.Is(err, yamux.ErrSessionShutdown) ||
errors.Is(err, fasthttputil.ErrInmemoryListenerClosed) {
return
return err
}

p.opts.Logger.Warn(ctx, "provisionerd was unable to acquire job", slog.Error(err))
return
return xerrors.Errorf("failed to acquire job: %w", err)
}
if job.JobId == "" {
p.opts.Logger.Debug(ctx, "acquire job successfully canceled")
return
return nil
}

if len(job.TraceMetadata) > 0 {
Expand DownExpand Up@@ -390,9 +401,9 @@ func (p *Server) acquireAndRunOne(client proto.DRPCProvisionerDaemonClient) {
Error: fmt.Sprintf("failed to connect to provisioner: %s", resp.Error),
})
if err != nil {
p.opts.Logger.Error(ctx, "provisioner job failed", slog.F("job_id", job.JobId), slog.Error(err))
p.opts.Logger.Error(ctx, "failed to reportprovisioner job failed", slog.F("job_id", job.JobId), slog.Error(err))
}
return
return xerrors.Errorf("failed to report provisioner job failed: %w", err)
}

p.mutex.Lock()
Expand All@@ -416,6 +427,7 @@ func (p *Server) acquireAndRunOne(client proto.DRPCProvisionerDaemonClient) {
p.mutex.Lock()
p.activeJob = nil
p.mutex.Unlock()
return nil
}

// acquireGraceful attempts to acquire a job from the server, handling canceling the acquisition if we gracefully shut
Expand Down
3 changes: 3 additions & 0 deletionssite/src/api/typesGenerated.ts
View file
Open in desktop

Some generated files are not rendered by default. Learn more abouthow customized files appear on GitHub.

14 changes: 12 additions & 2 deletionstailnet/controllers.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -2,6 +2,7 @@ package tailnet

import (
"context"
"errors"
"fmt"
"io"
"maps"
Expand All@@ -21,11 +22,12 @@ import (
"tailscale.com/util/dnsname"

"cdr.dev/slog"
"github.com/coder/quartz"
"github.com/coder/retry"

"github.com/coder/coder/v2/coderd/util/ptr"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/tailnet/proto"
"github.com/coder/quartz"
"github.com/coder/retry"
)

// A Controller connects to the tailnet control plane, and then uses the control protocols to
Expand DownExpand Up@@ -1381,6 +1383,14 @@ func (c *Controller) Run(ctx context.Context) {
if xerrors.Is(err, context.Canceled) || xerrors.Is(err, context.DeadlineExceeded) {
return
}

// If the database is unreachable by the control plane, there's not much we can do, so we'll just retry later.
if errors.Is(err, codersdk.ErrDatabaseNotReachable) {
c.logger.Warn(c.ctx, "control plane lost connection to database, retrying",
slog.Error(err), slog.F("delay", fmt.Sprintf("%vms", retrier.Delay.Milliseconds())))
continue
}

errF := slog.Error(err)
var sdkErr *codersdk.Error
if xerrors.As(err, &sdkErr) {
Expand Down
Loading

[8]ページ先頭

©2009-2025 Movatter.jp