@@ -29,6 +29,7 @@ import (
29
29
30
30
"github.com/coder/coder/v2/cli/cliui"
31
31
"github.com/coder/coder/v2/coderd/httpapi"
32
+ notificationsLib"github.com/coder/coder/v2/coderd/notifications"
32
33
"github.com/coder/coder/v2/coderd/tracing"
33
34
"github.com/coder/coder/v2/codersdk"
34
35
"github.com/coder/coder/v2/codersdk/workspacesdk"
@@ -39,6 +40,7 @@ import (
39
40
"github.com/coder/coder/v2/scaletest/dashboard"
40
41
"github.com/coder/coder/v2/scaletest/harness"
41
42
"github.com/coder/coder/v2/scaletest/loadtestutil"
43
+ "github.com/coder/coder/v2/scaletest/notifications"
42
44
"github.com/coder/coder/v2/scaletest/reconnectingpty"
43
45
"github.com/coder/coder/v2/scaletest/workspacebuild"
44
46
"github.com/coder/coder/v2/scaletest/workspacetraffic"
@@ -62,6 +64,7 @@ func (r *RootCmd) scaletestCmd() *serpent.Command {
62
64
r .scaletestWorkspaceUpdates (),
63
65
r .scaletestWorkspaceTraffic (),
64
66
r .scaletestAutostart (),
67
+ r .scaletestNotifications (),
65
68
},
66
69
}
67
70
@@ -1917,6 +1920,259 @@ func (r *RootCmd) scaletestAutostart() *serpent.Command {
1917
1920
return cmd
1918
1921
}
1919
1922
1923
+ func (r * RootCmd )scaletestNotifications ()* serpent.Command {
1924
+ var (
1925
+ userCount int64
1926
+ ownerUserPercentage float64
1927
+ notificationTimeout time.Duration
1928
+ dialTimeout time.Duration
1929
+ noCleanup bool
1930
+
1931
+ tracingFlags = & scaletestTracingFlags {}
1932
+
1933
+ // This test requires unlimited concurrency.
1934
+ timeoutStrategy = & timeoutFlags {}
1935
+ cleanupStrategy = newScaletestCleanupStrategy ()
1936
+ output = & scaletestOutputFlags {}
1937
+ prometheusFlags = & scaletestPrometheusFlags {}
1938
+ )
1939
+
1940
+ cmd := & serpent.Command {
1941
+ Use :"notifications" ,
1942
+ Short :"Simulate notification delivery by creating many users listening to notifications." ,
1943
+ Handler :func (inv * serpent.Invocation )error {
1944
+ ctx := inv .Context ()
1945
+ client ,err := r .InitClient (inv )
1946
+ if err != nil {
1947
+ return err
1948
+ }
1949
+
1950
+ notifyCtx ,stop := signal .NotifyContext (ctx ,StopSignals ... )
1951
+ defer stop ()
1952
+ ctx = notifyCtx
1953
+
1954
+ me ,err := requireAdmin (ctx ,client )
1955
+ if err != nil {
1956
+ return err
1957
+ }
1958
+
1959
+ client .HTTPClient = & http.Client {
1960
+ Transport :& codersdk.HeaderTransport {
1961
+ Transport :http .DefaultTransport ,
1962
+ Header :map [string ][]string {
1963
+ codersdk .BypassRatelimitHeader : {"true" },
1964
+ },
1965
+ },
1966
+ }
1967
+
1968
+ if userCount <= 0 {
1969
+ return xerrors .Errorf ("--user-count must be greater than 0" )
1970
+ }
1971
+
1972
+ if ownerUserPercentage < 0 || ownerUserPercentage > 100 {
1973
+ return xerrors .Errorf ("--owner-user-percentage must be between 0 and 100" )
1974
+ }
1975
+
1976
+ ownerUserCount := int64 (float64 (userCount )* ownerUserPercentage / 100 )
1977
+ if ownerUserCount == 0 && ownerUserPercentage > 0 {
1978
+ ownerUserCount = 1
1979
+ }
1980
+ regularUserCount := userCount - ownerUserCount
1981
+
1982
+ _ ,_ = fmt .Fprintf (inv .Stderr ,"Distribution plan:\n " )
1983
+ _ ,_ = fmt .Fprintf (inv .Stderr ," Total users: %d\n " ,userCount )
1984
+ _ ,_ = fmt .Fprintf (inv .Stderr ," Owner users: %d (%.1f%%)\n " ,ownerUserCount ,ownerUserPercentage )
1985
+ _ ,_ = fmt .Fprintf (inv .Stderr ," Regular users: %d (%.1f%%)\n " ,regularUserCount ,100.0 - ownerUserPercentage )
1986
+
1987
+ outputs ,err := output .parse ()
1988
+ if err != nil {
1989
+ return xerrors .Errorf ("could not parse --output flags" )
1990
+ }
1991
+
1992
+ tracerProvider ,closeTracing ,tracingEnabled ,err := tracingFlags .provider (ctx )
1993
+ if err != nil {
1994
+ return xerrors .Errorf ("create tracer provider: %w" ,err )
1995
+ }
1996
+ tracer := tracerProvider .Tracer (scaletestTracerName )
1997
+
1998
+ reg := prometheus .NewRegistry ()
1999
+ metrics := notifications .NewMetrics (reg )
2000
+
2001
+ logger := inv .Logger
2002
+ prometheusSrvClose := ServeHandler (ctx ,logger ,promhttp .HandlerFor (reg , promhttp.HandlerOpts {}),prometheusFlags .Address ,"prometheus" )
2003
+ defer prometheusSrvClose ()
2004
+
2005
+ defer func () {
2006
+ _ ,_ = fmt .Fprintln (inv .Stderr ,"\n Uploading traces..." )
2007
+ if err := closeTracing (ctx );err != nil {
2008
+ _ ,_ = fmt .Fprintf (inv .Stderr ,"\n Error uploading traces: %+v\n " ,err )
2009
+ }
2010
+ // Wait for prometheus metrics to be scraped
2011
+ _ ,_ = fmt .Fprintf (inv .Stderr ,"Waiting %s for prometheus metrics to be scraped\n " ,prometheusFlags .Wait )
2012
+ <- time .After (prometheusFlags .Wait )
2013
+ }()
2014
+
2015
+ _ ,_ = fmt .Fprintln (inv .Stderr ,"Creating users..." )
2016
+
2017
+ dialBarrier := & sync.WaitGroup {}
2018
+ ownerWatchBarrier := & sync.WaitGroup {}
2019
+ dialBarrier .Add (int (userCount ))
2020
+ ownerWatchBarrier .Add (int (ownerUserCount ))
2021
+
2022
+ expectedNotifications := map [uuid.UUID ]chan time.Time {
2023
+ notificationsLib .TemplateUserAccountCreated :make (chan time.Time ,1 ),
2024
+ notificationsLib .TemplateUserAccountDeleted :make (chan time.Time ,1 ),
2025
+ }
2026
+
2027
+ configs := make ([]notifications.Config ,0 ,userCount )
2028
+ for range ownerUserCount {
2029
+ config := notifications.Config {
2030
+ User : createusers.Config {
2031
+ OrganizationID :me .OrganizationIDs [0 ],
2032
+ },
2033
+ Roles : []string {codersdk .RoleOwner },
2034
+ NotificationTimeout :notificationTimeout ,
2035
+ DialTimeout :dialTimeout ,
2036
+ DialBarrier :dialBarrier ,
2037
+ ReceivingWatchBarrier :ownerWatchBarrier ,
2038
+ ExpectedNotifications :expectedNotifications ,
2039
+ Metrics :metrics ,
2040
+ }
2041
+ if err := config .Validate ();err != nil {
2042
+ return xerrors .Errorf ("validate config: %w" ,err )
2043
+ }
2044
+ configs = append (configs ,config )
2045
+ }
2046
+ for range regularUserCount {
2047
+ config := notifications.Config {
2048
+ User : createusers.Config {
2049
+ OrganizationID :me .OrganizationIDs [0 ],
2050
+ },
2051
+ Roles : []string {},
2052
+ NotificationTimeout :notificationTimeout ,
2053
+ DialTimeout :dialTimeout ,
2054
+ DialBarrier :dialBarrier ,
2055
+ ReceivingWatchBarrier :ownerWatchBarrier ,
2056
+ Metrics :metrics ,
2057
+ }
2058
+ if err := config .Validate ();err != nil {
2059
+ return xerrors .Errorf ("validate config: %w" ,err )
2060
+ }
2061
+ configs = append (configs ,config )
2062
+ }
2063
+
2064
+ go triggerUserNotifications (
2065
+ ctx ,
2066
+ logger ,
2067
+ client ,
2068
+ me .OrganizationIDs [0 ],
2069
+ dialBarrier ,
2070
+ dialTimeout ,
2071
+ expectedNotifications ,
2072
+ )
2073
+
2074
+ th := harness .NewTestHarness (timeoutStrategy .wrapStrategy (harness.ConcurrentExecutionStrategy {}),cleanupStrategy .toStrategy ())
2075
+
2076
+ for i ,config := range configs {
2077
+ id := strconv .Itoa (i )
2078
+ name := fmt .Sprintf ("notifications-%s" ,id )
2079
+ var runner harness.Runnable = notifications .NewRunner (client ,config )
2080
+ if tracingEnabled {
2081
+ runner = & runnableTraceWrapper {
2082
+ tracer :tracer ,
2083
+ spanName :name ,
2084
+ runner :runner ,
2085
+ }
2086
+ }
2087
+
2088
+ th .AddRun (name ,id ,runner )
2089
+ }
2090
+
2091
+ _ ,_ = fmt .Fprintln (inv .Stderr ,"Running notification delivery scaletest..." )
2092
+ testCtx ,testCancel := timeoutStrategy .toContext (ctx )
2093
+ defer testCancel ()
2094
+ err = th .Run (testCtx )
2095
+ if err != nil {
2096
+ return xerrors .Errorf ("run test harness (harness failure, not a test failure): %w" ,err )
2097
+ }
2098
+
2099
+ // If the command was interrupted, skip stats.
2100
+ if notifyCtx .Err ()!= nil {
2101
+ return notifyCtx .Err ()
2102
+ }
2103
+
2104
+ res := th .Results ()
2105
+ for _ ,o := range outputs {
2106
+ err = o .write (res ,inv .Stdout )
2107
+ if err != nil {
2108
+ return xerrors .Errorf ("write output %q to %q: %w" ,o .format ,o .path ,err )
2109
+ }
2110
+ }
2111
+
2112
+ if ! noCleanup {
2113
+ _ ,_ = fmt .Fprintln (inv .Stderr ,"\n Cleaning up..." )
2114
+ cleanupCtx ,cleanupCancel := cleanupStrategy .toContext (ctx )
2115
+ defer cleanupCancel ()
2116
+ err = th .Cleanup (cleanupCtx )
2117
+ if err != nil {
2118
+ return xerrors .Errorf ("cleanup tests: %w" ,err )
2119
+ }
2120
+ }
2121
+
2122
+ if res .TotalFail > 0 {
2123
+ return xerrors .New ("load test failed, see above for more details" )
2124
+ }
2125
+
2126
+ return nil
2127
+ },
2128
+ }
2129
+
2130
+ cmd .Options = serpent.OptionSet {
2131
+ {
2132
+ Flag :"user-count" ,
2133
+ FlagShorthand :"c" ,
2134
+ Env :"CODER_SCALETEST_NOTIFICATION_USER_COUNT" ,
2135
+ Description :"Required: Total number of users to create." ,
2136
+ Value :serpent .Int64Of (& userCount ),
2137
+ Required :true ,
2138
+ },
2139
+ {
2140
+ Flag :"owner-user-percentage" ,
2141
+ Env :"CODER_SCALETEST_NOTIFICATION_OWNER_USER_PERCENTAGE" ,
2142
+ Default :"20.0" ,
2143
+ Description :"Percentage of users to assign Owner role to (0-100)." ,
2144
+ Value :serpent .Float64Of (& ownerUserPercentage ),
2145
+ },
2146
+ {
2147
+ Flag :"notification-timeout" ,
2148
+ Env :"CODER_SCALETEST_NOTIFICATION_TIMEOUT" ,
2149
+ Default :"5m" ,
2150
+ Description :"How long to wait for notifications after triggering." ,
2151
+ Value :serpent .DurationOf (& notificationTimeout ),
2152
+ },
2153
+ {
2154
+ Flag :"dial-timeout" ,
2155
+ Env :"CODER_SCALETEST_DIAL_TIMEOUT" ,
2156
+ Default :"2m" ,
2157
+ Description :"Timeout for dialing the notification websocket endpoint." ,
2158
+ Value :serpent .DurationOf (& dialTimeout ),
2159
+ },
2160
+ {
2161
+ Flag :"no-cleanup" ,
2162
+ Env :"CODER_SCALETEST_NO_CLEANUP" ,
2163
+ Description :"Do not clean up resources after the test completes." ,
2164
+ Value :serpent .BoolOf (& noCleanup ),
2165
+ },
2166
+ }
2167
+
2168
+ tracingFlags .attach (& cmd .Options )
2169
+ timeoutStrategy .attach (& cmd .Options )
2170
+ cleanupStrategy .attach (& cmd .Options )
2171
+ output .attach (& cmd .Options )
2172
+ prometheusFlags .attach (& cmd .Options )
2173
+ return cmd
2174
+ }
2175
+
1920
2176
type runnableTraceWrapper struct {
1921
2177
tracer trace.Tracer
1922
2178
spanName string
@@ -2117,6 +2373,73 @@ func parseTargetRange(name, targets string) (start, end int, err error) {
2117
2373
return start ,end ,nil
2118
2374
}
2119
2375
2376
+ // triggerUserNotifications waits for all test users to connect,
2377
+ // then creates and deletes a test user to trigger notification events for testing.
2378
+ func triggerUserNotifications (
2379
+ ctx context.Context ,
2380
+ logger slog.Logger ,
2381
+ client * codersdk.Client ,
2382
+ orgID uuid.UUID ,
2383
+ dialBarrier * sync.WaitGroup ,
2384
+ dialTimeout time.Duration ,
2385
+ expectedNotifications map [uuid.UUID ]chan time.Time ,
2386
+ ) {
2387
+ logger .Info (ctx ,"waiting for all users to connect" )
2388
+
2389
+ // Wait for all users to connect
2390
+ waitCtx ,cancel := context .WithTimeout (ctx ,dialTimeout + 30 * time .Second )
2391
+ defer cancel ()
2392
+
2393
+ done := make (chan struct {})
2394
+ go func () {
2395
+ dialBarrier .Wait ()
2396
+ close (done )
2397
+ }()
2398
+
2399
+ select {
2400
+ case <- done :
2401
+ logger .Info (ctx ,"all users connected" )
2402
+ case <- waitCtx .Done ():
2403
+ if waitCtx .Err ()== context .DeadlineExceeded {
2404
+ logger .Error (ctx ,"timeout waiting for users to connect" )
2405
+ }else {
2406
+ logger .Info (ctx ,"context canceled while waiting for users" )
2407
+ }
2408
+ return
2409
+ }
2410
+
2411
+ const (
2412
+ triggerUsername = "scaletest-trigger-user"
2413
+ triggerEmail = "scaletest-trigger@example.com"
2414
+ )
2415
+
2416
+ logger .Info (ctx ,"creating test user to test notifications" ,
2417
+ slog .F ("username" ,triggerUsername ),
2418
+ slog .F ("email" ,triggerEmail ),
2419
+ slog .F ("org_id" ,orgID ))
2420
+
2421
+ testUser ,err := client .CreateUserWithOrgs (ctx , codersdk.CreateUserRequestWithOrgs {
2422
+ OrganizationIDs : []uuid.UUID {orgID },
2423
+ Username :triggerUsername ,
2424
+ Email :triggerEmail ,
2425
+ Password :"test-password-123" ,
2426
+ })
2427
+ if err != nil {
2428
+ logger .Error (ctx ,"create test user" ,slog .Error (err ))
2429
+ return
2430
+ }
2431
+ expectedNotifications [notificationsLib .TemplateUserAccountCreated ]<- time .Now ()
2432
+
2433
+ err = client .DeleteUser (ctx ,testUser .ID )
2434
+ if err != nil {
2435
+ logger .Error (ctx ,"delete test user" ,slog .Error (err ))
2436
+ return
2437
+ }
2438
+ expectedNotifications [notificationsLib .TemplateUserAccountDeleted ]<- time .Now ()
2439
+ close (expectedNotifications [notificationsLib .TemplateUserAccountCreated ])
2440
+ close (expectedNotifications [notificationsLib .TemplateUserAccountDeleted ])
2441
+ }
2442
+
2120
2443
func createWorkspaceAppConfig (client * codersdk.Client ,appHost ,app string ,workspace codersdk.Workspace ,agent codersdk.WorkspaceAgent ) (workspacetraffic.AppConfig ,error ) {
2121
2444
if app == "" {
2122
2445
return workspacetraffic.AppConfig {},nil