@@ -39,6 +39,7 @@ import (
39
39
"github.com/coder/coder/v2/scaletest/dashboard"
40
40
"github.com/coder/coder/v2/scaletest/harness"
41
41
"github.com/coder/coder/v2/scaletest/loadtestutil"
42
+ "github.com/coder/coder/v2/scaletest/notifications"
42
43
"github.com/coder/coder/v2/scaletest/reconnectingpty"
43
44
"github.com/coder/coder/v2/scaletest/workspacebuild"
44
45
"github.com/coder/coder/v2/scaletest/workspacetraffic"
@@ -62,6 +63,7 @@ func (r *RootCmd) scaletestCmd() *serpent.Command {
62
63
r .scaletestWorkspaceUpdates (),
63
64
r .scaletestWorkspaceTraffic (),
64
65
r .scaletestAutostart (),
66
+ r .scaletestNotifications (),
65
67
},
66
68
}
67
69
@@ -1917,6 +1919,295 @@ func (r *RootCmd) scaletestAutostart() *serpent.Command {
1917
1919
return cmd
1918
1920
}
1919
1921
1922
+ func (r * RootCmd )scaletestNotifications ()* serpent.Command {
1923
+ var (
1924
+ userCount int64
1925
+ ownerUserPercentage float64
1926
+ notificationTimeout time.Duration
1927
+ dialTimeout time.Duration
1928
+ noCleanup bool
1929
+
1930
+ tracingFlags = & scaletestTracingFlags {}
1931
+
1932
+ // This test requires unlimited concurrency.
1933
+ timeoutStrategy = & timeoutFlags {}
1934
+ cleanupStrategy = newScaletestCleanupStrategy ()
1935
+ output = & scaletestOutputFlags {}
1936
+ prometheusFlags = & scaletestPrometheusFlags {}
1937
+ )
1938
+
1939
+ cmd := & serpent.Command {
1940
+ Use :"notifications" ,
1941
+ Short :"Simulate notification delivery by creating many users listening to notifications." ,
1942
+ Handler :func (inv * serpent.Invocation )error {
1943
+ ctx := inv .Context ()
1944
+ client ,err := r .TryInitClient (inv )
1945
+ if err != nil {
1946
+ return err
1947
+ }
1948
+
1949
+ notifyCtx ,stop := signal .NotifyContext (ctx ,StopSignals ... )
1950
+ defer stop ()
1951
+ ctx = notifyCtx
1952
+
1953
+ me ,err := requireAdmin (ctx ,client )
1954
+ if err != nil {
1955
+ return err
1956
+ }
1957
+
1958
+ client .HTTPClient = & http.Client {
1959
+ Transport :& codersdk.HeaderTransport {
1960
+ Transport :http .DefaultTransport ,
1961
+ Header :map [string ][]string {
1962
+ codersdk .BypassRatelimitHeader : {"true" },
1963
+ },
1964
+ },
1965
+ }
1966
+
1967
+ if userCount <= 0 {
1968
+ return xerrors .Errorf ("--user-count must be greater than 0" )
1969
+ }
1970
+
1971
+ if ownerUserPercentage < 0 || ownerUserPercentage > 100 {
1972
+ return xerrors .Errorf ("--owner-user-percentage must be between 0 and 100" )
1973
+ }
1974
+
1975
+ ownerUserCount := int64 (float64 (userCount )* ownerUserPercentage / 100 )
1976
+ if ownerUserCount == 0 && ownerUserPercentage > 0 {
1977
+ ownerUserCount = 1
1978
+ }
1979
+ regularUserCount := userCount - ownerUserCount
1980
+
1981
+ _ ,_ = fmt .Fprintf (inv .Stderr ,"Distribution plan:\n " )
1982
+ _ ,_ = fmt .Fprintf (inv .Stderr ," Total users: %d\n " ,userCount )
1983
+ _ ,_ = fmt .Fprintf (inv .Stderr ," Owner users: %d (%.1f%%)\n " ,ownerUserCount ,ownerUserPercentage )
1984
+ _ ,_ = fmt .Fprintf (inv .Stderr ," Regular users: %d (%.1f%%)\n " ,regularUserCount ,100.0 - ownerUserPercentage )
1985
+
1986
+ outputs ,err := output .parse ()
1987
+ if err != nil {
1988
+ return xerrors .Errorf ("could not parse --output flags" )
1989
+ }
1990
+
1991
+ tracerProvider ,closeTracing ,tracingEnabled ,err := tracingFlags .provider (ctx )
1992
+ if err != nil {
1993
+ return xerrors .Errorf ("create tracer provider: %w" ,err )
1994
+ }
1995
+ tracer := tracerProvider .Tracer (scaletestTracerName )
1996
+
1997
+ reg := prometheus .NewRegistry ()
1998
+ metrics := notifications .NewMetrics (reg )
1999
+
2000
+ logger := inv .Logger
2001
+ prometheusSrvClose := ServeHandler (ctx ,logger ,promhttp .HandlerFor (reg , promhttp.HandlerOpts {}),prometheusFlags .Address ,"prometheus" )
2002
+ defer prometheusSrvClose ()
2003
+
2004
+ defer func () {
2005
+ _ ,_ = fmt .Fprintln (inv .Stderr ,"\n Uploading traces..." )
2006
+ if err := closeTracing (ctx );err != nil {
2007
+ _ ,_ = fmt .Fprintf (inv .Stderr ,"\n Error uploading traces: %+v\n " ,err )
2008
+ }
2009
+ // Wait for prometheus metrics to be scraped
2010
+ _ ,_ = fmt .Fprintf (inv .Stderr ,"Waiting %s for prometheus metrics to be scraped\n " ,prometheusFlags .Wait )
2011
+ <- time .After (prometheusFlags .Wait )
2012
+ }()
2013
+
2014
+ _ ,_ = fmt .Fprintln (inv .Stderr ,"Creating users..." )
2015
+
2016
+ dialBarrier := & sync.WaitGroup {}
2017
+ dialBarrier .Add (int (userCount ))
2018
+
2019
+ configs := make ([]notifications.Config ,0 ,userCount )
2020
+ for range ownerUserCount {
2021
+ config := notifications.Config {
2022
+ User : createusers.Config {
2023
+ OrganizationID :me .OrganizationIDs [0 ],
2024
+ },
2025
+ IsOwner :true ,
2026
+ NotificationTimeout :notificationTimeout ,
2027
+ DialTimeout :dialTimeout ,
2028
+ DialBarrier :dialBarrier ,
2029
+ Metrics :metrics ,
2030
+ }
2031
+ if err := config .Validate ();err != nil {
2032
+ return xerrors .Errorf ("validate config: %w" ,err )
2033
+ }
2034
+ configs = append (configs ,config )
2035
+ }
2036
+ for range regularUserCount {
2037
+ config := notifications.Config {
2038
+ User : createusers.Config {
2039
+ OrganizationID :me .OrganizationIDs [0 ],
2040
+ },
2041
+ IsOwner :false ,
2042
+ NotificationTimeout :notificationTimeout ,
2043
+ DialTimeout :dialTimeout ,
2044
+ DialBarrier :dialBarrier ,
2045
+ Metrics :metrics ,
2046
+ }
2047
+ if err := config .Validate ();err != nil {
2048
+ return xerrors .Errorf ("validate config: %w" ,err )
2049
+ }
2050
+ configs = append (configs ,config )
2051
+ }
2052
+
2053
+ go func () {
2054
+ logger .Info (ctx ,"waiting for all users to connect" )
2055
+
2056
+ // Create timeout context for the entire trigger operation
2057
+ waitCtx ,cancel := context .WithTimeout (ctx ,dialTimeout + 30 * time .Second )
2058
+ defer cancel ()
2059
+
2060
+ // Wait for all runners to reach the barrier
2061
+ done := make (chan struct {})
2062
+ go func () {
2063
+ dialBarrier .Wait ()
2064
+ close (done )
2065
+ }()
2066
+
2067
+ select {
2068
+ case <- done :
2069
+ logger .Info (ctx ,"all users connected" )
2070
+ case <- waitCtx .Done ():
2071
+ if waitCtx .Err ()== context .DeadlineExceeded {
2072
+ logger .Error (ctx ,"timeout waiting for all users to connect" )
2073
+ }else {
2074
+ logger .Info (ctx ,"context canceled while waiting for users" )
2075
+ }
2076
+ return
2077
+ }
2078
+
2079
+ const (
2080
+ triggerUsername = "scaletest-trigger-user"
2081
+ triggerEmail = "scaletest-trigger@example.com"
2082
+ )
2083
+
2084
+ logger .Info (ctx ,"creating test user to test notifications" ,
2085
+ slog .F ("username" ,triggerUsername ),
2086
+ slog .F ("email" ,triggerEmail ),
2087
+ slog .F ("org_id" ,me .OrganizationIDs [0 ]))
2088
+
2089
+ testUser ,err := client .CreateUserWithOrgs (ctx , codersdk.CreateUserRequestWithOrgs {
2090
+ OrganizationIDs : []uuid.UUID {me .OrganizationIDs [0 ]},
2091
+ Username :triggerUsername ,
2092
+ Email :triggerEmail ,
2093
+ Password :"test-password-123" ,
2094
+ })
2095
+ if err != nil {
2096
+ logger .Error (ctx ,"create test user" ,slog .Error (err ))
2097
+ return
2098
+ }
2099
+
2100
+ err = client .DeleteUser (ctx ,testUser .ID )
2101
+ if err != nil {
2102
+ logger .Error (ctx ,"delete test user" ,slog .Error (err ))
2103
+ return
2104
+ }
2105
+
2106
+ logger .Info (ctx ,"test user created and deleted successfully" )
2107
+ }()
2108
+
2109
+ th := harness .NewTestHarness (timeoutStrategy .wrapStrategy (harness.ConcurrentExecutionStrategy {}),cleanupStrategy .toStrategy ())
2110
+
2111
+ for i ,config := range configs {
2112
+ id := strconv .Itoa (i )
2113
+ name := fmt .Sprintf ("notifications-%s" ,id )
2114
+ var runner harness.Runnable = notifications .NewRunner (client ,config )
2115
+ if tracingEnabled {
2116
+ runner = & runnableTraceWrapper {
2117
+ tracer :tracer ,
2118
+ spanName :name ,
2119
+ runner :runner ,
2120
+ }
2121
+ }
2122
+
2123
+ th .AddRun (name ,id ,runner )
2124
+ }
2125
+
2126
+ _ ,_ = fmt .Fprintln (inv .Stderr ,"Running notification delivery scaletest..." )
2127
+ testCtx ,testCancel := timeoutStrategy .toContext (ctx )
2128
+ defer testCancel ()
2129
+ err = th .Run (testCtx )
2130
+ if err != nil {
2131
+ return xerrors .Errorf ("run test harness (harness failure, not a test failure): %w" ,err )
2132
+ }
2133
+
2134
+ // If the command was interrupted, skip stats.
2135
+ if notifyCtx .Err ()!= nil {
2136
+ return notifyCtx .Err ()
2137
+ }
2138
+
2139
+ res := th .Results ()
2140
+ for _ ,o := range outputs {
2141
+ err = o .write (res ,inv .Stdout )
2142
+ if err != nil {
2143
+ return xerrors .Errorf ("write output %q to %q: %w" ,o .format ,o .path ,err )
2144
+ }
2145
+ }
2146
+
2147
+ if ! noCleanup {
2148
+ _ ,_ = fmt .Fprintln (inv .Stderr ,"\n Cleaning up..." )
2149
+ cleanupCtx ,cleanupCancel := cleanupStrategy .toContext (ctx )
2150
+ defer cleanupCancel ()
2151
+ err = th .Cleanup (cleanupCtx )
2152
+ if err != nil {
2153
+ return xerrors .Errorf ("cleanup tests: %w" ,err )
2154
+ }
2155
+ }
2156
+
2157
+ if res .TotalFail > 0 {
2158
+ return xerrors .New ("load test failed, see above for more details" )
2159
+ }
2160
+
2161
+ return nil
2162
+ },
2163
+ }
2164
+
2165
+ cmd .Options = serpent.OptionSet {
2166
+ {
2167
+ Flag :"user-count" ,
2168
+ FlagShorthand :"c" ,
2169
+ Env :"CODER_SCALETEST_NOTIFICATION_USER_COUNT" ,
2170
+ Description :"Required: Total number of users to create." ,
2171
+ Value :serpent .Int64Of (& userCount ),
2172
+ Required :true ,
2173
+ },
2174
+ {
2175
+ Flag :"owner-user-percentage" ,
2176
+ Env :"CODER_SCALETEST_NOTIFICATION_OWNER_USER_PERCENTAGE" ,
2177
+ Default :"20.0" ,
2178
+ Description :"Percentage of users to assign Owner role to (0-100)." ,
2179
+ Value :serpent .Float64Of (& ownerUserPercentage ),
2180
+ },
2181
+ {
2182
+ Flag :"notification-timeout" ,
2183
+ Env :"CODER_SCALETEST_NOTIFICATION_TIMEOUT" ,
2184
+ Default :"5m" ,
2185
+ Description :"How long to wait for notifications after triggering." ,
2186
+ Value :serpent .DurationOf (& notificationTimeout ),
2187
+ },
2188
+ {
2189
+ Flag :"dial-timeout" ,
2190
+ Env :"CODER_SCALETEST_DIAL_TIMEOUT" ,
2191
+ Default :"2m" ,
2192
+ Description :"Timeout for dialing the notification websocket endpoint." ,
2193
+ Value :serpent .DurationOf (& dialTimeout ),
2194
+ },
2195
+ {
2196
+ Flag :"no-cleanup" ,
2197
+ Env :"CODER_SCALETEST_NO_CLEANUP" ,
2198
+ Description :"Do not clean up resources after the test completes." ,
2199
+ Value :serpent .BoolOf (& noCleanup ),
2200
+ },
2201
+ }
2202
+
2203
+ tracingFlags .attach (& cmd .Options )
2204
+ timeoutStrategy .attach (& cmd .Options )
2205
+ cleanupStrategy .attach (& cmd .Options )
2206
+ output .attach (& cmd .Options )
2207
+ prometheusFlags .attach (& cmd .Options )
2208
+ return cmd
2209
+ }
2210
+
1920
2211
type runnableTraceWrapper struct {
1921
2212
tracer trace.Tracer
1922
2213
spanName string