Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitd8515f0

Browse files
authored
feat(scaletest): add grafana annotations and slack reporting (#9852)
Fixes#9575Fixes#9576
1 parent4e44204 commitd8515f0

File tree

8 files changed

+495
-78
lines changed

8 files changed

+495
-78
lines changed

‎scaletest/templates/scaletest-runner/main.tf

Lines changed: 192 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,18 @@ resource "null_resource" "permission_check" {
3535
}
3636

3737
locals {
38-
workspace_pod_name="coder-scaletest-runner-${lower(data.coder_workspace.me.owner)}-${lower(data.coder_workspace.me.name)}"
39-
workspace_pod_instance="coder-workspace-${lower(data.coder_workspace.me.owner)}-${lower(data.coder_workspace.me.name)}"
40-
service_account_name="scaletest-sa"
41-
cpu=2
42-
memory=2
43-
home_disk_size=10
44-
scaletest_run_id="scaletest-${time_static.start_time.rfc3339}"
45-
scaletest_run_dir="/home/coder/${local.scaletest_run_id}"
38+
workspace_pod_name="coder-scaletest-runner-${lower(data.coder_workspace.me.owner)}-${lower(data.coder_workspace.me.name)}"
39+
workspace_pod_instance="coder-workspace-${lower(data.coder_workspace.me.owner)}-${lower(data.coder_workspace.me.name)}"
40+
workspace_pod_termination_grace_period_seconds=7200# 2 hours (cleanup timeout).
41+
service_account_name="scaletest-sa"
42+
cpu=16
43+
memory=64
44+
home_disk_size=10
45+
scaletest_run_id="scaletest-${time_static.start_time.rfc3339}"
46+
scaletest_run_dir="/home/coder/${local.scaletest_run_id}"
47+
grafana_url="https://stats.dev.c8s.io"
48+
grafana_dashboard_uid="qLVSTR-Vz"
49+
grafana_dashboard_name="coderv2-loadtest-dashboard"
4650
}
4751

4852
data"coder_provisioner""me" {
@@ -91,15 +95,14 @@ data "coder_parameter" "job_concurrency" {
9195
order=11
9296
type="number"
9397
name="Job concurrency"
94-
default=10
98+
default=0
9599
description="The number of concurrent jobs (e.g. when producing workspace traffic)."
96100
mutable=true
97101

98102
# Setting zero = unlimited, but perhaps not a good idea,
99103
# we can raise this limit instead.
100104
validation {
101-
min=1
102-
max=100
105+
min=0
103106
}
104107
}
105108

@@ -197,6 +200,121 @@ data "coder_parameter" "num_workspaces" {
197200
}
198201
}
199202

203+
204+
data"coder_parameter""load_scenarios" {
205+
order=22
206+
name="Load Scenarios"
207+
type="list(string)"
208+
description="The load scenarios to run."
209+
mutable=true
210+
ephemeral=true
211+
default=jsonencode([
212+
"SSH Traffic",
213+
"Web Terminal Traffic",
214+
"Dashboard Traffic",
215+
])
216+
}
217+
218+
data"coder_parameter""load_scenario_ssh_traffic_duration" {
219+
order=23
220+
name="SSH Traffic Duration"
221+
type="number"
222+
description="The duration of the SSH traffic load scenario in minutes."
223+
mutable=true
224+
default=30
225+
validation {
226+
min=1
227+
max=1440// 24 hours.
228+
}
229+
}
230+
231+
data"coder_parameter""load_scenario_ssh_bytes_per_tick" {
232+
order=24
233+
name="SSH Bytes Per Tick"
234+
type="number"
235+
description="The number of bytes to send per tick in the SSH traffic load scenario."
236+
mutable=true
237+
default=1024
238+
validation {
239+
min=1
240+
}
241+
}
242+
243+
data"coder_parameter""load_scenario_ssh_tick_interval" {
244+
order=25
245+
name="SSH Tick Interval"
246+
type="number"
247+
description="The number of milliseconds between each tick in the SSH traffic load scenario."
248+
mutable=true
249+
default=100
250+
validation {
251+
min=1
252+
}
253+
}
254+
255+
data"coder_parameter""load_scenario_web_terminal_traffic_duration" {
256+
order=26
257+
name="Web Terminal Traffic Duration"
258+
type="number"
259+
description="The duration of the web terminal traffic load scenario in minutes."
260+
mutable=true
261+
default=30
262+
validation {
263+
min=1
264+
max=1440// 24 hours.
265+
}
266+
}
267+
268+
data"coder_parameter""load_scenario_web_terminal_bytes_per_tick" {
269+
order=27
270+
name="Web Terminal Bytes Per Tick"
271+
type="number"
272+
description="The number of bytes to send per tick in the web terminal traffic load scenario."
273+
mutable=true
274+
default=1024
275+
validation {
276+
min=1
277+
}
278+
}
279+
280+
data"coder_parameter""load_scenario_web_terminal_tick_interval" {
281+
order=28
282+
name="Web Terminal Tick Interval"
283+
type="number"
284+
description="The number of milliseconds between each tick in the web terminal traffic load scenario."
285+
mutable=true
286+
default=100
287+
validation {
288+
min=1
289+
}
290+
}
291+
292+
data"coder_parameter""load_scenario_dashboard_traffic_duration" {
293+
order=29
294+
name="Dashboard Traffic Duration"
295+
type="number"
296+
description="The duration of the dashboard traffic load scenario in minutes."
297+
mutable=true
298+
default=30
299+
validation {
300+
min=1
301+
max=1440// 24 hours.
302+
}
303+
}
304+
305+
data"coder_parameter""load_scenario_baseline_duration" {
306+
order=26
307+
name="Baseline Wait Duration"
308+
type="number"
309+
description="The duration to wait before starting a load scenario in minutes."
310+
mutable=true
311+
default=5
312+
validation {
313+
min=0
314+
max=60
315+
}
316+
}
317+
200318
data"coder_parameter""namespace" {
201319
order=999
202320
type="string"
@@ -221,21 +339,38 @@ resource "coder_agent" "main" {
221339
CODER_CONFIG_DIR:"/home/coder/.config/coderv2",
222340
CODER_USER_TOKEN: data.coder_workspace.me.owner_session_token,
223341
CODER_URL: data.coder_workspace.me.access_url,
342+
CODER_USER: data.coder_workspace.me.owner,
343+
CODER_WORKSPACE: data.coder_workspace.me.name,
224344

225345
# Global scaletest envs that may affect each `coder exp scaletest` invocation.
226346
CODER_SCALETEST_PROMETHEUS_ADDRESS:"0.0.0.0:21112",
227347
CODER_SCALETEST_PROMETHEUS_WAIT:"60s",
228348
CODER_SCALETEST_CONCURRENCY:"${data.coder_parameter.job_concurrency.value}",
229349
CODER_SCALETEST_CLEANUP_CONCURRENCY:"${data.coder_parameter.cleanup_concurrency.value}",
230350

351+
# Expose as params as well, for reporting (TODO(mafredri): refactor, only have one).
352+
SCALETEST_PARAM_SCALETEST_CONCURRENCY:"${data.coder_parameter.job_concurrency.value}",
353+
SCALETEST_PARAM_SCALETEST_CLEANUP_CONCURRENCY:"${data.coder_parameter.cleanup_concurrency.value}",
354+
231355
# Local envs passed as arguments to `coder exp scaletest` invocations.
232356
SCALETEST_RUN_ID: local.scaletest_run_id,
233357
SCALETEST_RUN_DIR: local.scaletest_run_dir,
234-
SCALETEST_TEMPLATE: data.coder_parameter.workspace_template.value,
235-
SCALETEST_SKIP_CLEANUP:"1",
236-
SCALETEST_NUM_WORKSPACES: data.coder_parameter.num_workspaces.value,
237-
SCALETEST_CREATE_CONCURRENCY:"${data.coder_parameter.create_concurrency.value}",
238-
SCALETEST_CLEANUP_STRATEGY: data.coder_parameter.cleanup_strategy.value,
358+
359+
SCALETEST_PARAM_TEMPLATE: data.coder_parameter.workspace_template.value,
360+
SCALETEST_PARAM_NUM_WORKSPACES: data.coder_parameter.num_workspaces.value,
361+
SCALETEST_PARAM_CREATE_CONCURRENCY:"${data.coder_parameter.create_concurrency.value}",
362+
SCALETEST_PARAM_CLEANUP_STRATEGY: data.coder_parameter.cleanup_strategy.value,
363+
SCALETEST_PARAM_LOAD_SCENARIOS: data.coder_parameter.load_scenarios.value,
364+
SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_DURATION:"${data.coder_parameter.load_scenario_ssh_traffic_duration.value}",
365+
SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_BYTES_PER_TICK:"${data.coder_parameter.load_scenario_ssh_bytes_per_tick.value}",
366+
SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_TICK_INTERVAL:"${data.coder_parameter.load_scenario_ssh_tick_interval.value}",
367+
SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_DURATION:"${data.coder_parameter.load_scenario_web_terminal_traffic_duration.value}",
368+
SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_BYTES_PER_TICK:"${data.coder_parameter.load_scenario_web_terminal_bytes_per_tick.value}",
369+
SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_TICK_INTERVAL:"${data.coder_parameter.load_scenario_web_terminal_tick_interval.value}",
370+
SCALETEST_PARAM_LOAD_SCENARIO_DASHBOARD_TRAFFIC_DURATION:"${data.coder_parameter.load_scenario_dashboard_traffic_duration.value}",
371+
SCALETEST_PARAM_LOAD_SCENARIO_BASELINE_DURATION:"${data.coder_parameter.load_scenario_baseline_duration.value}",
372+
373+
GRAFANA_URL: local.grafana_url,
239374

240375
SCRIPTS_ZIP:filebase64(data.archive_file.scripts_zip.output_path),
241376
SCRIPTS_DIR:"/tmp/scripts",
@@ -244,12 +379,13 @@ resource "coder_agent" "main" {
244379
vscode=false
245380
ssh_helper=false
246381
}
247-
startup_script_timeout=3600
248-
shutdown_script_timeout=1800
382+
startup_script_timeout=86400
383+
shutdown_script_timeout=7200
249384
startup_script_behavior="blocking"
250385
startup_script=file("startup.sh")
251386
shutdown_script=file("shutdown.sh")
252387

388+
# IDEA(mafredri): It would be pretty cool to define metadata to expect JSON output, each field/item could become a separate metadata item.
253389
# Scaletest metadata.
254390
metadata {
255391
display_name="Scaletest status"
@@ -332,7 +468,7 @@ resource "coder_app" "grafana" {
332468
agent_id=coder_agent.main.id
333469
slug="00-grafana"
334470
display_name="Grafana"
335-
url="https://stats.dev.c8s.io/d/qLVSTR-Vz/coderv2-loadtest-dashboard?orgId=1&from=${time_static.start_time.unix*1000}&to=now"
471+
url="${local.grafana_url}/d/${local.grafana_dashboard_uid}/${local.grafana_dashboard_name}?orgId=1&from=${time_static.start_time.unix*1000}&to=now"
336472
icon="https://grafana.com/static/assets/img/fav32.png"
337473
external=true
338474
}
@@ -409,7 +545,7 @@ resource "kubernetes_pod" "main" {
409545
}
410546
# Set the pod delete timeout to termination_grace_period_seconds + 1m.
411547
timeouts {
412-
delete="32m"
548+
delete="${(local.workspace_pod_termination_grace_period_seconds+120)/60}s"
413549
}
414550
spec {
415551
security_context {
@@ -421,8 +557,9 @@ resource "kubernetes_pod" "main" {
421557
service_account_name=local.service_account_name
422558

423559
# Allow the coder agent to perform graceful shutdown and cleanup of
424-
# scaletest resources, 30 minutes (cleanup timeout) + 1 minute.
425-
termination_grace_period_seconds=1860
560+
# scaletest resources. We add an extra minute so ensure work
561+
# completion is prioritized over timeout.
562+
termination_grace_period_seconds=local.workspace_pod_termination_grace_period_seconds+60
426563

427564
container {
428565
name="dev"
@@ -440,6 +577,24 @@ resource "kubernetes_pod" "main" {
440577
name="CODER_AGENT_LOG_DIR"
441578
value="${local.scaletest_run_dir}/logs"
442579
}
580+
env {
581+
name="GRAFANA_API_TOKEN"
582+
value_from {
583+
secret_key_ref {
584+
name=data.kubernetes_secret.grafana_editor_api_token.metadata[0].name
585+
key="token"
586+
}
587+
}
588+
}
589+
env {
590+
name="SLACK_WEBHOOK_URL"
591+
value_from {
592+
secret_key_ref {
593+
name=data.kubernetes_secret.slack_scaletest_notifications_webhook_url.metadata[0].name
594+
key="url"
595+
}
596+
}
597+
}
443598
resources {
444599
# Set requests and limits values such that we can do performant
445600
# execution of `coder scaletest` commands.
@@ -496,7 +651,7 @@ resource "kubernetes_pod" "main" {
496651
match_expressions {
497652
key="cloud.google.com/gke-nodepool"
498653
operator="In"
499-
values=["big-misc"]# Avoid placing on the same nodes as scaletest workspaces.
654+
values=["big-workspacetraffic"]# Avoid placing on the same nodes as scaletest workspaces.
500655
}
501656
}
502657
}
@@ -505,6 +660,20 @@ resource "kubernetes_pod" "main" {
505660
}
506661
}
507662

663+
data"kubernetes_secret""grafana_editor_api_token" {
664+
metadata {
665+
name="grafana-editor-api-token"
666+
namespace=data.coder_parameter.namespace.value
667+
}
668+
}
669+
670+
data"kubernetes_secret""slack_scaletest_notifications_webhook_url" {
671+
metadata {
672+
name="slack-scaletest-notifications-webhook-url"
673+
namespace=data.coder_parameter.namespace.value
674+
}
675+
}
676+
508677
resource"kubernetes_manifest""pod_monitor" {
509678
count=data.coder_workspace.me.start_count
510679
manifest={

‎scaletest/templates/scaletest-runner/scripts/cleanup.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ fi
2424
start_phase"Cleanup (${event})"
2525
coder exp scaletest cleanup \
2626
--cleanup-job-timeout 15m \
27-
--cleanup-timeout30m|
27+
--cleanup-timeout2h|
2828
tee"${SCALETEST_RESULTS_DIR}/cleanup-${event}.txt"
2929
end_phase
3030

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp