@@ -12,14 +12,17 @@ import (
1212"time"
1313
1414"github.com/google/uuid"
15+ "github.com/prometheus/client_golang/prometheus"
1516"github.com/stretchr/testify/assert"
1617"github.com/stretchr/testify/require"
1718"go.uber.org/goleak"
1819"go.uber.org/mock/gomock"
20+ "golang.org/x/xerrors"
1921
2022"cdr.dev/slog"
2123"cdr.dev/slog/sloggers/slogtest"
2224
25+ "github.com/coder/coder/v2/coderd/coderdtest/promhelp"
2326"github.com/coder/coder/v2/coderd/database"
2427"github.com/coder/coder/v2/coderd/database/dbgen"
2528"github.com/coder/coder/v2/coderd/database/dbmock"
@@ -52,11 +55,114 @@ func TestPurge(t *testing.T) {
5255done := awaitDoTick (ctx ,t ,clk )
5356mDB := dbmock .NewMockStore (gomock .NewController (t ))
5457mDB .EXPECT ().InTx (gomock .Any (),database .DefaultTXOptions ().WithID ("db_purge" )).Return (nil ).Times (2 )
55- purger := dbpurge .New (context .Background (),testutil .Logger (t ),mDB ,& codersdk.DeploymentValues {},clk )
58+ purger := dbpurge .New (context .Background (),testutil .Logger (t ),mDB ,& codersdk.DeploymentValues {},clk , prometheus . NewRegistry () )
5659<- done // wait for doTick() to run.
5760require .NoError (t ,purger .Close ())
5861}
5962
63+ //nolint:paralleltest // It uses LockIDDBPurge.
64+ func TestMetrics (t * testing.T ) {
65+ t .Run ("SuccessfulIteration" ,func (t * testing.T ) {
66+ ctx ,cancel := context .WithTimeout (context .Background (),testutil .WaitShort )
67+ defer cancel ()
68+
69+ reg := prometheus .NewRegistry ()
70+ clk := quartz .NewMock (t )
71+ now := time .Date (2025 ,1 ,15 ,7 ,30 ,0 ,0 ,time .UTC )
72+ clk .Set (now ).MustWait (ctx )
73+
74+ db ,_ := dbtestutil .NewDB (t )
75+ logger := slogtest .Make (t ,& slogtest.Options {IgnoreErrors :true })
76+ user := dbgen .User (t ,db , database.User {})
77+
78+ oldExpiredKey ,_ := dbgen .APIKey (t ,db , database.APIKey {
79+ UserID :user .ID ,
80+ ExpiresAt :now .Add (- 8 * 24 * time .Hour ),// Expired 8 days ago
81+ TokenName :"old-expired-key" ,
82+ })
83+
84+ _ ,err := db .GetAPIKeyByID (ctx ,oldExpiredKey .ID )
85+ require .NoError (t ,err ,"key should exist before purge" )
86+
87+ done := awaitDoTick (ctx ,t ,clk )
88+ closer := dbpurge .New (ctx ,logger ,db ,& codersdk.DeploymentValues {
89+ Retention : codersdk.RetentionConfig {
90+ APIKeys :serpent .Duration (7 * 24 * time .Hour ),// 7 days retention
91+ },
92+ },clk ,reg )
93+ defer closer .Close ()
94+ testutil .TryReceive (ctx ,t ,done )
95+
96+ hist := promhelp .HistogramValue (t ,reg ,"coderd_dbpurge_iteration_duration_seconds" , prometheus.Labels {
97+ "success" :"true" ,
98+ })
99+ require .NotNil (t ,hist )
100+ require .Greater (t ,hist .GetSampleCount (),uint64 (0 ),"should have at least one sample" )
101+
102+ expiredAPIKeys := promhelp .CounterValue (t ,reg ,"coderd_dbpurge_records_purged_total" , prometheus.Labels {
103+ "record_type" :"expired_api_keys" ,
104+ })
105+ require .Greater (t ,expiredAPIKeys ,0 ,"should have deleted at least one expired API key" )
106+
107+ _ ,err = db .GetAPIKeyByID (ctx ,oldExpiredKey .ID )
108+ require .Error (t ,err ,"key should be deleted after purge" )
109+
110+ workspaceAgentLogs := promhelp .CounterValue (t ,reg ,"coderd_dbpurge_records_purged_total" , prometheus.Labels {
111+ "record_type" :"workspace_agent_logs" ,
112+ })
113+ require .GreaterOrEqual (t ,workspaceAgentLogs ,0 )
114+
115+ aibridgeRecords := promhelp .CounterValue (t ,reg ,"coderd_dbpurge_records_purged_total" , prometheus.Labels {
116+ "record_type" :"aibridge_records" ,
117+ })
118+ require .GreaterOrEqual (t ,aibridgeRecords ,0 )
119+
120+ connectionLogs := promhelp .CounterValue (t ,reg ,"coderd_dbpurge_records_purged_total" , prometheus.Labels {
121+ "record_type" :"connection_logs" ,
122+ })
123+ require .GreaterOrEqual (t ,connectionLogs ,0 )
124+
125+ auditLogs := promhelp .CounterValue (t ,reg ,"coderd_dbpurge_records_purged_total" , prometheus.Labels {
126+ "record_type" :"audit_logs" ,
127+ })
128+ require .GreaterOrEqual (t ,auditLogs ,0 )
129+ })
130+
131+ t .Run ("FailedIteration" ,func (t * testing.T ) {
132+ ctx ,cancel := context .WithTimeout (context .Background (),testutil .WaitShort )
133+ defer cancel ()
134+
135+ reg := prometheus .NewRegistry ()
136+ clk := quartz .NewMock (t )
137+ now := clk .Now ()
138+ clk .Set (now ).MustWait (ctx )
139+
140+ ctrl := gomock .NewController (t )
141+ mDB := dbmock .NewMockStore (ctrl )
142+ mDB .EXPECT ().InTx (gomock .Any (),database .DefaultTXOptions ().WithID ("db_purge" )).
143+ Return (xerrors .New ("simulated database error" )).
144+ MinTimes (1 )
145+
146+ logger := slogtest .Make (t ,& slogtest.Options {IgnoreErrors :true })
147+
148+ done := awaitDoTick (ctx ,t ,clk )
149+ closer := dbpurge .New (ctx ,logger ,mDB ,& codersdk.DeploymentValues {},clk ,reg )
150+ defer closer .Close ()
151+ testutil .TryReceive (ctx ,t ,done )
152+
153+ hist := promhelp .HistogramValue (t ,reg ,"coderd_dbpurge_iteration_duration_seconds" , prometheus.Labels {
154+ "success" :"false" ,
155+ })
156+ require .NotNil (t ,hist )
157+ require .Greater (t ,hist .GetSampleCount (),uint64 (0 ),"should have at least one sample" )
158+
159+ successHist := promhelp .MetricValue (t ,reg ,"coderd_dbpurge_iteration_duration_seconds" , prometheus.Labels {
160+ "success" :"true" ,
161+ })
162+ require .Nil (t ,successHist ,"should not have success=true metric on failure" )
163+ })
164+ }
165+
60166//nolint:paralleltest // It uses LockIDDBPurge.
61167func TestDeleteOldWorkspaceAgentStats (t * testing.T ) {
62168ctx ,cancel := context .WithTimeout (context .Background (),testutil .WaitLong )
@@ -130,7 +236,7 @@ func TestDeleteOldWorkspaceAgentStats(t *testing.T) {
130236})
131237
132238// when
133- closer := dbpurge .New (ctx ,logger ,db ,& codersdk.DeploymentValues {},clk )
239+ closer := dbpurge .New (ctx ,logger ,db ,& codersdk.DeploymentValues {},clk , prometheus . NewRegistry () )
134240defer closer .Close ()
135241
136242// then
@@ -155,7 +261,7 @@ func TestDeleteOldWorkspaceAgentStats(t *testing.T) {
155261
156262// Start a new purger to immediately trigger delete after rollup.
157263_ = closer .Close ()
158- closer = dbpurge .New (ctx ,logger ,db ,& codersdk.DeploymentValues {},clk )
264+ closer = dbpurge .New (ctx ,logger ,db ,& codersdk.DeploymentValues {},clk , prometheus . NewRegistry () )
159265defer closer .Close ()
160266
161267// then
@@ -250,7 +356,8 @@ func TestDeleteOldWorkspaceAgentLogs(t *testing.T) {
250356Retention : codersdk.RetentionConfig {
251357WorkspaceAgentLogs :serpent .Duration (7 * 24 * time .Hour ),
252358},
253- },clk )
359+ },clk ,prometheus .NewRegistry ())
360+
254361defer closer .Close ()
255362<- done // doTick() has now run.
256363
@@ -464,7 +571,7 @@ func TestDeleteOldWorkspaceAgentLogsRetention(t *testing.T) {
464571done := awaitDoTick (ctx ,t ,clk )
465572closer := dbpurge .New (ctx ,logger ,db ,& codersdk.DeploymentValues {
466573Retention :tc .retentionConfig ,
467- },clk )
574+ },clk , prometheus . NewRegistry () )
468575defer closer .Close ()
469576testutil .TryReceive (ctx ,t ,done )
470577
@@ -555,7 +662,7 @@ func TestDeleteOldProvisionerDaemons(t *testing.T) {
555662require .NoError (t ,err )
556663
557664// when
558- closer := dbpurge .New (ctx ,logger ,db ,& codersdk.DeploymentValues {},clk )
665+ closer := dbpurge .New (ctx ,logger ,db ,& codersdk.DeploymentValues {},clk , prometheus . NewRegistry () )
559666defer closer .Close ()
560667
561668// then
@@ -659,7 +766,7 @@ func TestDeleteOldAuditLogConnectionEvents(t *testing.T) {
659766
660767// Run the purge
661768done := awaitDoTick (ctx ,t ,clk )
662- closer := dbpurge .New (ctx ,logger ,db ,& codersdk.DeploymentValues {},clk )
769+ closer := dbpurge .New (ctx ,logger ,db ,& codersdk.DeploymentValues {},clk , prometheus . NewRegistry () )
663770defer closer .Close ()
664771// Wait for tick
665772testutil .TryReceive (ctx ,t ,done )
@@ -822,7 +929,7 @@ func TestDeleteOldTelemetryHeartbeats(t *testing.T) {
822929require .NoError (t ,err )
823930
824931done := awaitDoTick (ctx ,t ,clk )
825- closer := dbpurge .New (ctx ,logger ,db ,& codersdk.DeploymentValues {},clk )
932+ closer := dbpurge .New (ctx ,logger ,db ,& codersdk.DeploymentValues {},clk , prometheus . NewRegistry () )
826933defer closer .Close ()
827934<- done // doTick() has now run.
828935
@@ -941,7 +1048,7 @@ func TestDeleteOldConnectionLogs(t *testing.T) {
9411048done := awaitDoTick (ctx ,t ,clk )
9421049closer := dbpurge .New (ctx ,logger ,db ,& codersdk.DeploymentValues {
9431050Retention :tc .retentionConfig ,
944- },clk )
1051+ },clk , prometheus . NewRegistry () )
9451052defer closer .Close ()
9461053testutil .TryReceive (ctx ,t ,done )
9471054
@@ -1197,7 +1304,7 @@ func TestDeleteOldAIBridgeRecords(t *testing.T) {
11971304Retention :serpent .Duration (tc .retention ),
11981305},
11991306},
1200- },clk )
1307+ },clk , prometheus . NewRegistry () )
12011308defer closer .Close ()
12021309testutil .TryReceive (ctx ,t ,done )
12031310
@@ -1284,7 +1391,7 @@ func TestDeleteOldAuditLogs(t *testing.T) {
12841391done := awaitDoTick (ctx ,t ,clk )
12851392closer := dbpurge .New (ctx ,logger ,db ,& codersdk.DeploymentValues {
12861393Retention :tc .retentionConfig ,
1287- },clk )
1394+ },clk , prometheus . NewRegistry () )
12881395defer closer .Close ()
12891396testutil .TryReceive (ctx ,t ,done )
12901397
@@ -1374,7 +1481,7 @@ func TestDeleteOldAuditLogs(t *testing.T) {
13741481Retention : codersdk.RetentionConfig {
13751482AuditLogs :serpent .Duration (retentionPeriod ),
13761483},
1377- },clk )
1484+ },clk , prometheus . NewRegistry () )
13781485defer closer .Close ()
13791486testutil .TryReceive (ctx ,t ,done )
13801487
@@ -1494,7 +1601,7 @@ func TestDeleteExpiredAPIKeys(t *testing.T) {
14941601done := awaitDoTick (ctx ,t ,clk )
14951602closer := dbpurge .New (ctx ,logger ,db ,& codersdk.DeploymentValues {
14961603Retention :tc .retentionConfig ,
1497- },clk )
1604+ },clk , prometheus . NewRegistry () )
14981605defer closer .Close ()
14991606testutil .TryReceive (ctx ,t ,done )
15001607