@@ -10,6 +10,10 @@ import (
10
10
"testing"
11
11
"time"
12
12
13
+ "github.com/coder/coder/v2/coderd/database/pubsub"
14
+ agpl"github.com/coder/coder/v2/tailnet"
15
+ "golang.org/x/xerrors"
16
+
13
17
"github.com/google/uuid"
14
18
"github.com/stretchr/testify/require"
15
19
"go.uber.org/mock/gomock"
@@ -291,3 +295,51 @@ func TestGetDebug(t *testing.T) {
291
295
require .Equal (t ,peerID ,debug .Tunnels [0 ].SrcID )
292
296
require .Equal (t ,dstID ,debug .Tunnels [0 ].DstID )
293
297
}
298
+
299
+ // TestPGCoordinatorUnhealthy tests that when the coordinator fails to send heartbeats and is
300
+ // unhealthy it disconnects any peers and does not send any extraneous database queries.
301
+ func TestPGCoordinatorUnhealthy (t * testing.T ) {
302
+ t .Parallel ()
303
+ ctx := testutil .Context (t ,testutil .WaitShort )
304
+ logger := slogtest .Make (t ,& slogtest.Options {IgnoreErrors :true }).Leveled (slog .LevelDebug )
305
+
306
+ ctrl := gomock .NewController (t )
307
+ mStore := dbmock .NewMockStore (ctrl )
308
+ ps := pubsub .NewInMemory ()
309
+
310
+ // after 3 failed heartbeats, the coordinator is unhealthy
311
+ mStore .EXPECT ().
312
+ UpsertTailnetCoordinator (gomock .Any (),gomock .Any ()).
313
+ MinTimes (3 ).
314
+ Return (database.TailnetCoordinator {},xerrors .New ("badness" ))
315
+ mStore .EXPECT ().
316
+ DeleteCoordinator (gomock .Any (),gomock .Any ()).
317
+ Times (1 ).
318
+ Return (nil )
319
+ // But, in particular we DO NOT want the coordinator to call DeleteTailnetPeer, as this is
320
+ // unnecessary and can spam the database. c.f. https://github.com/coder/coder/issues/12923
321
+
322
+ // these cleanup queries run, but we don't care for this test
323
+ mStore .EXPECT ().CleanTailnetCoordinators (gomock .Any ()).AnyTimes ().Return (nil )
324
+ mStore .EXPECT ().CleanTailnetLostPeers (gomock .Any ()).AnyTimes ().Return (nil )
325
+ mStore .EXPECT ().CleanTailnetTunnels (gomock .Any ()).AnyTimes ().Return (nil )
326
+
327
+ coordinator ,err := newPGCoordInternal (ctx ,logger ,ps ,mStore )
328
+ require .NoError (t ,err )
329
+
330
+ require .Eventually (t ,func ()bool {
331
+ return ! coordinator .querier .isHealthy ()
332
+ },testutil .WaitShort ,testutil .IntervalFast )
333
+
334
+ pID := uuid.UUID {5 }
335
+ _ ,resps := coordinator .Coordinate (ctx ,pID ,"test" , agpl.AgentCoordinateeAuth {ID :pID })
336
+ resp := testutil .RequireRecvCtx (ctx ,t ,resps )
337
+ require .Nil (t ,resp ,"channel should be closed" )
338
+
339
+ // give the coordinator some time to process any pending work. We are
340
+ // testing here that a database call is absent, so we don't want to race to
341
+ // shut down the test.
342
+ time .Sleep (testutil .IntervalMedium )
343
+ _ = coordinator .Close ()
344
+ require .Eventually (t ,ctrl .Satisfied ,testutil .WaitShort ,testutil .IntervalFast )
345
+ }