@@ -38,6 +38,7 @@ import (
38
38
"github.com/coder/coder/v2/scaletest/dashboard"
39
39
"github.com/coder/coder/v2/scaletest/harness"
40
40
"github.com/coder/coder/v2/scaletest/loadtestutil"
41
+ "github.com/coder/coder/v2/scaletest/notifications"
41
42
"github.com/coder/coder/v2/scaletest/reconnectingpty"
42
43
"github.com/coder/coder/v2/scaletest/workspacebuild"
43
44
"github.com/coder/coder/v2/scaletest/workspacetraffic"
@@ -60,6 +61,7 @@ func (r *RootCmd) scaletestCmd() *serpent.Command {
60
61
r .scaletestCreateWorkspaces (),
61
62
r .scaletestWorkspaceUpdates (),
62
63
r .scaletestWorkspaceTraffic (),
64
+ r .scaletestNotifications (),
63
65
},
64
66
}
65
67
@@ -1682,6 +1684,295 @@ func (r *RootCmd) scaletestDashboard() *serpent.Command {
1682
1684
return cmd
1683
1685
}
1684
1686
1687
+ func (r * RootCmd )scaletestNotifications ()* serpent.Command {
1688
+ var (
1689
+ userCount int64
1690
+ ownerUserPercentage float64
1691
+ notificationTimeout time.Duration
1692
+ dialTimeout time.Duration
1693
+ noCleanup bool
1694
+
1695
+ tracingFlags = & scaletestTracingFlags {}
1696
+
1697
+ // This test requires unlimited concurrency.
1698
+ timeoutStrategy = & timeoutFlags {}
1699
+ cleanupStrategy = newScaletestCleanupStrategy ()
1700
+ output = & scaletestOutputFlags {}
1701
+ prometheusFlags = & scaletestPrometheusFlags {}
1702
+ )
1703
+
1704
+ cmd := & serpent.Command {
1705
+ Use :"notifications" ,
1706
+ Short :"Simulate notification delivery by creating many users listening to notifications." ,
1707
+ Handler :func (inv * serpent.Invocation )error {
1708
+ ctx := inv .Context ()
1709
+ client ,err := r .TryInitClient (inv )
1710
+ if err != nil {
1711
+ return err
1712
+ }
1713
+
1714
+ notifyCtx ,stop := signal .NotifyContext (ctx ,StopSignals ... )
1715
+ defer stop ()
1716
+ ctx = notifyCtx
1717
+
1718
+ me ,err := requireAdmin (ctx ,client )
1719
+ if err != nil {
1720
+ return err
1721
+ }
1722
+
1723
+ client .HTTPClient = & http.Client {
1724
+ Transport :& codersdk.HeaderTransport {
1725
+ Transport :http .DefaultTransport ,
1726
+ Header :map [string ][]string {
1727
+ codersdk .BypassRatelimitHeader : {"true" },
1728
+ },
1729
+ },
1730
+ }
1731
+
1732
+ if userCount <= 0 {
1733
+ return xerrors .Errorf ("--user-count must be greater than 0" )
1734
+ }
1735
+
1736
+ if ownerUserPercentage < 0 || ownerUserPercentage > 100 {
1737
+ return xerrors .Errorf ("--owner-user-percentage must be between 0 and 100" )
1738
+ }
1739
+
1740
+ ownerUserCount := int64 (float64 (userCount )* ownerUserPercentage / 100 )
1741
+ if ownerUserCount == 0 && ownerUserPercentage > 0 {
1742
+ ownerUserCount = 1
1743
+ }
1744
+ regularUserCount := userCount - ownerUserCount
1745
+
1746
+ _ ,_ = fmt .Fprintf (inv .Stderr ,"Distribution plan:\n " )
1747
+ _ ,_ = fmt .Fprintf (inv .Stderr ," Total users: %d\n " ,userCount )
1748
+ _ ,_ = fmt .Fprintf (inv .Stderr ," Owner users: %d (%.1f%%)\n " ,ownerUserCount ,ownerUserPercentage )
1749
+ _ ,_ = fmt .Fprintf (inv .Stderr ," Regular users: %d (%.1f%%)\n " ,regularUserCount ,100.0 - ownerUserPercentage )
1750
+
1751
+ outputs ,err := output .parse ()
1752
+ if err != nil {
1753
+ return xerrors .Errorf ("could not parse --output flags" )
1754
+ }
1755
+
1756
+ tracerProvider ,closeTracing ,tracingEnabled ,err := tracingFlags .provider (ctx )
1757
+ if err != nil {
1758
+ return xerrors .Errorf ("create tracer provider: %w" ,err )
1759
+ }
1760
+ tracer := tracerProvider .Tracer (scaletestTracerName )
1761
+
1762
+ reg := prometheus .NewRegistry ()
1763
+ metrics := notifications .NewMetrics (reg )
1764
+
1765
+ logger := inv .Logger
1766
+ prometheusSrvClose := ServeHandler (ctx ,logger ,promhttp .HandlerFor (reg , promhttp.HandlerOpts {}),prometheusFlags .Address ,"prometheus" )
1767
+ defer prometheusSrvClose ()
1768
+
1769
+ defer func () {
1770
+ _ ,_ = fmt .Fprintln (inv .Stderr ,"\n Uploading traces..." )
1771
+ if err := closeTracing (ctx );err != nil {
1772
+ _ ,_ = fmt .Fprintf (inv .Stderr ,"\n Error uploading traces: %+v\n " ,err )
1773
+ }
1774
+ // Wait for prometheus metrics to be scraped
1775
+ _ ,_ = fmt .Fprintf (inv .Stderr ,"Waiting %s for prometheus metrics to be scraped\n " ,prometheusFlags .Wait )
1776
+ <- time .After (prometheusFlags .Wait )
1777
+ }()
1778
+
1779
+ _ ,_ = fmt .Fprintln (inv .Stderr ,"Creating users..." )
1780
+
1781
+ dialBarrier := & sync.WaitGroup {}
1782
+ dialBarrier .Add (int (userCount ))
1783
+
1784
+ configs := make ([]notifications.Config ,0 ,userCount )
1785
+ for range ownerUserCount {
1786
+ config := notifications.Config {
1787
+ User : createusers.Config {
1788
+ OrganizationID :me .OrganizationIDs [0 ],
1789
+ },
1790
+ IsOwner :true ,
1791
+ NotificationTimeout :notificationTimeout ,
1792
+ DialTimeout :dialTimeout ,
1793
+ DialBarrier :dialBarrier ,
1794
+ Metrics :metrics ,
1795
+ }
1796
+ if err := config .Validate ();err != nil {
1797
+ return xerrors .Errorf ("validate config: %w" ,err )
1798
+ }
1799
+ configs = append (configs ,config )
1800
+ }
1801
+ for range regularUserCount {
1802
+ config := notifications.Config {
1803
+ User : createusers.Config {
1804
+ OrganizationID :me .OrganizationIDs [0 ],
1805
+ },
1806
+ IsOwner :false ,
1807
+ NotificationTimeout :notificationTimeout ,
1808
+ DialTimeout :dialTimeout ,
1809
+ DialBarrier :dialBarrier ,
1810
+ Metrics :metrics ,
1811
+ }
1812
+ if err := config .Validate ();err != nil {
1813
+ return xerrors .Errorf ("validate config: %w" ,err )
1814
+ }
1815
+ configs = append (configs ,config )
1816
+ }
1817
+
1818
+ go func () {
1819
+ logger .Info (ctx ,"waiting for all users to connect" )
1820
+
1821
+ // Create timeout context for the entire trigger operation
1822
+ waitCtx ,cancel := context .WithTimeout (ctx ,dialTimeout + 30 * time .Second )
1823
+ defer cancel ()
1824
+
1825
+ // Wait for all runners to reach the barrier
1826
+ done := make (chan struct {})
1827
+ go func () {
1828
+ dialBarrier .Wait ()
1829
+ close (done )
1830
+ }()
1831
+
1832
+ select {
1833
+ case <- done :
1834
+ logger .Info (ctx ,"all users connected" )
1835
+ case <- waitCtx .Done ():
1836
+ if waitCtx .Err ()== context .DeadlineExceeded {
1837
+ logger .Error (ctx ,"timeout waiting for all users to connect" )
1838
+ }else {
1839
+ logger .Info (ctx ,"context canceled while waiting for users" )
1840
+ }
1841
+ return
1842
+ }
1843
+
1844
+ const (
1845
+ triggerUsername = "scaletest-trigger-user"
1846
+ triggerEmail = "scaletest-trigger@example.com"
1847
+ )
1848
+
1849
+ logger .Info (ctx ,"creating test user to test notifications" ,
1850
+ slog .F ("username" ,triggerUsername ),
1851
+ slog .F ("email" ,triggerEmail ),
1852
+ slog .F ("org_id" ,me .OrganizationIDs [0 ]))
1853
+
1854
+ testUser ,err := client .CreateUserWithOrgs (ctx , codersdk.CreateUserRequestWithOrgs {
1855
+ OrganizationIDs : []uuid.UUID {me .OrganizationIDs [0 ]},
1856
+ Username :triggerUsername ,
1857
+ Email :triggerEmail ,
1858
+ Password :"test-password-123" ,
1859
+ })
1860
+ if err != nil {
1861
+ logger .Error (ctx ,"create test user" ,slog .Error (err ))
1862
+ return
1863
+ }
1864
+
1865
+ err = client .DeleteUser (ctx ,testUser .ID )
1866
+ if err != nil {
1867
+ logger .Error (ctx ,"delete test user" ,slog .Error (err ))
1868
+ return
1869
+ }
1870
+
1871
+ logger .Info (ctx ,"test user created and deleted successfully" )
1872
+ }()
1873
+
1874
+ th := harness .NewTestHarness (timeoutStrategy .wrapStrategy (harness.ConcurrentExecutionStrategy {}),cleanupStrategy .toStrategy ())
1875
+
1876
+ for i ,config := range configs {
1877
+ id := strconv .Itoa (i )
1878
+ name := fmt .Sprintf ("notifications-%s" ,id )
1879
+ var runner harness.Runnable = notifications .NewRunner (client ,config )
1880
+ if tracingEnabled {
1881
+ runner = & runnableTraceWrapper {
1882
+ tracer :tracer ,
1883
+ spanName :name ,
1884
+ runner :runner ,
1885
+ }
1886
+ }
1887
+
1888
+ th .AddRun (name ,id ,runner )
1889
+ }
1890
+
1891
+ _ ,_ = fmt .Fprintln (inv .Stderr ,"Running notification delivery scaletest..." )
1892
+ testCtx ,testCancel := timeoutStrategy .toContext (ctx )
1893
+ defer testCancel ()
1894
+ err = th .Run (testCtx )
1895
+ if err != nil {
1896
+ return xerrors .Errorf ("run test harness (harness failure, not a test failure): %w" ,err )
1897
+ }
1898
+
1899
+ // If the command was interrupted, skip stats.
1900
+ if notifyCtx .Err ()!= nil {
1901
+ return notifyCtx .Err ()
1902
+ }
1903
+
1904
+ res := th .Results ()
1905
+ for _ ,o := range outputs {
1906
+ err = o .write (res ,inv .Stdout )
1907
+ if err != nil {
1908
+ return xerrors .Errorf ("write output %q to %q: %w" ,o .format ,o .path ,err )
1909
+ }
1910
+ }
1911
+
1912
+ if ! noCleanup {
1913
+ _ ,_ = fmt .Fprintln (inv .Stderr ,"\n Cleaning up..." )
1914
+ cleanupCtx ,cleanupCancel := cleanupStrategy .toContext (ctx )
1915
+ defer cleanupCancel ()
1916
+ err = th .Cleanup (cleanupCtx )
1917
+ if err != nil {
1918
+ return xerrors .Errorf ("cleanup tests: %w" ,err )
1919
+ }
1920
+ }
1921
+
1922
+ if res .TotalFail > 0 {
1923
+ return xerrors .New ("load test failed, see above for more details" )
1924
+ }
1925
+
1926
+ return nil
1927
+ },
1928
+ }
1929
+
1930
+ cmd .Options = serpent.OptionSet {
1931
+ {
1932
+ Flag :"user-count" ,
1933
+ FlagShorthand :"c" ,
1934
+ Env :"CODER_SCALETEST_NOTIFICATION_USER_COUNT" ,
1935
+ Description :"Required: Total number of users to create." ,
1936
+ Value :serpent .Int64Of (& userCount ),
1937
+ Required :true ,
1938
+ },
1939
+ {
1940
+ Flag :"owner-user-percentage" ,
1941
+ Env :"CODER_SCALETEST_NOTIFICATION_OWNER_USER_PERCENTAGE" ,
1942
+ Default :"20.0" ,
1943
+ Description :"Percentage of users to assign Owner role to (0-100)." ,
1944
+ Value :serpent .Float64Of (& ownerUserPercentage ),
1945
+ },
1946
+ {
1947
+ Flag :"notification-timeout" ,
1948
+ Env :"CODER_SCALETEST_NOTIFICATION_TIMEOUT" ,
1949
+ Default :"5m" ,
1950
+ Description :"How long to wait for notifications after triggering." ,
1951
+ Value :serpent .DurationOf (& notificationTimeout ),
1952
+ },
1953
+ {
1954
+ Flag :"dial-timeout" ,
1955
+ Env :"CODER_SCALETEST_DIAL_TIMEOUT" ,
1956
+ Default :"2m" ,
1957
+ Description :"Timeout for dialing the notification websocket endpoint." ,
1958
+ Value :serpent .DurationOf (& dialTimeout ),
1959
+ },
1960
+ {
1961
+ Flag :"no-cleanup" ,
1962
+ Env :"CODER_SCALETEST_NO_CLEANUP" ,
1963
+ Description :"Do not clean up resources after the test completes." ,
1964
+ Value :serpent .BoolOf (& noCleanup ),
1965
+ },
1966
+ }
1967
+
1968
+ tracingFlags .attach (& cmd .Options )
1969
+ timeoutStrategy .attach (& cmd .Options )
1970
+ cleanupStrategy .attach (& cmd .Options )
1971
+ output .attach (& cmd .Options )
1972
+ prometheusFlags .attach (& cmd .Options )
1973
+ return cmd
1974
+ }
1975
+
1685
1976
type runnableTraceWrapper struct {
1686
1977
tracer trace.Tracer
1687
1978
spanName string