Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

feat: add prebuild timing metrics to Prometheus#19503

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
ssncferreira merged 13 commits intomainfromssncferreira/prebuild_metrics
Aug 28, 2025
Merged
Show file tree
Hide file tree
Changes fromall commits
Commits
Show all changes
13 commits
Select commitHold shift + click to select a range
1cd4417
feat: add prebuild timing metrics to Prometheus
ssncferreiraAug 22, 2025
73af00e
Merge remote-tracking branch 'origin/main' into ssncferreira/prebuild…
ssncferreiraAug 25, 2025
ea19217
refactor: improve metrics description and labels
ssncferreiraAug 26, 2025
11d7ed3
chore: address comments
ssncferreiraAug 27, 2025
3cde491
Merge remote-tracking branch 'origin/main' into ssncferreira/prebuild…
ssncferreiraAug 27, 2025
b59ebad
chore: add provisionerdserver metrics
ssncferreiraAug 27, 2025
20f6493
chore: add tests
ssncferreiraAug 28, 2025
2c325cc
fix: improve provisionerdserver metrics update logic
ssncferreiraAug 28, 2025
fc13c44
docs: add metrics documentation
ssncferreiraAug 28, 2025
0426f3a
Merge remote-tracking branch 'origin/main' into ssncferreira/prebuild…
ssncferreiraAug 28, 2025
b888c8f
fix: update metricsdocgen metrics file with introduced metrics
ssncferreiraAug 28, 2025
2d4a1f4
chore: address comments
ssncferreiraAug 28, 2025
b18fbe4
Merge remote-tracking branch 'origin/main' into ssncferreira/prebuild…
ssncferreiraAug 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletionscli/server.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -62,12 +62,6 @@ import (
"github.com/coder/serpent"
"github.com/coder/wgtunnel/tunnelsdk"

"github.com/coder/coder/v2/coderd/entitlements"
"github.com/coder/coder/v2/coderd/notifications/reports"
"github.com/coder/coder/v2/coderd/runtimeconfig"
"github.com/coder/coder/v2/coderd/webpush"
"github.com/coder/coder/v2/codersdk/drpcsdk"

"github.com/coder/coder/v2/buildinfo"
"github.com/coder/coder/v2/cli/clilog"
"github.com/coder/coder/v2/cli/cliui"
Expand All@@ -83,25 +77,31 @@ import (
"github.com/coder/coder/v2/coderd/database/migrations"
"github.com/coder/coder/v2/coderd/database/pubsub"
"github.com/coder/coder/v2/coderd/devtunnel"
"github.com/coder/coder/v2/coderd/entitlements"
"github.com/coder/coder/v2/coderd/externalauth"
"github.com/coder/coder/v2/coderd/gitsshkey"
"github.com/coder/coder/v2/coderd/httpmw"
"github.com/coder/coder/v2/coderd/jobreaper"
"github.com/coder/coder/v2/coderd/notifications"
"github.com/coder/coder/v2/coderd/notifications/reports"
"github.com/coder/coder/v2/coderd/oauthpki"
"github.com/coder/coder/v2/coderd/prometheusmetrics"
"github.com/coder/coder/v2/coderd/prometheusmetrics/insights"
"github.com/coder/coder/v2/coderd/promoauth"
"github.com/coder/coder/v2/coderd/provisionerdserver"
"github.com/coder/coder/v2/coderd/runtimeconfig"
"github.com/coder/coder/v2/coderd/schedule"
"github.com/coder/coder/v2/coderd/telemetry"
"github.com/coder/coder/v2/coderd/tracing"
"github.com/coder/coder/v2/coderd/updatecheck"
"github.com/coder/coder/v2/coderd/util/ptr"
"github.com/coder/coder/v2/coderd/util/slice"
stringutil "github.com/coder/coder/v2/coderd/util/strings"
"github.com/coder/coder/v2/coderd/webpush"
"github.com/coder/coder/v2/coderd/workspaceapps/appurl"
"github.com/coder/coder/v2/coderd/workspacestats"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/drpcsdk"
"github.com/coder/coder/v2/cryptorand"
"github.com/coder/coder/v2/provisioner/echo"
"github.com/coder/coder/v2/provisioner/terraform"
Expand DownExpand Up@@ -280,6 +280,12 @@ func enablePrometheus(
}
}

provisionerdserverMetrics := provisionerdserver.NewMetrics(logger)
if err := provisionerdserverMetrics.Register(options.PrometheusRegistry); err != nil {
return nil, xerrors.Errorf("failed to register provisionerd_server metrics: %w", err)
}
options.ProvisionerdServerMetrics = provisionerdserverMetrics

//nolint:revive
return ServeHandler(
ctx, logger, promhttp.InstrumentMetricHandler(
Expand Down
3 changes: 3 additions & 0 deletionscoderd/coderd.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -241,6 +241,8 @@ type Options struct {
UpdateAgentMetricsfunc(ctx context.Context,labels prometheusmetrics.AgentMetricLabels,metrics []*agentproto.Stats_Metric)
StatsBatcher workspacestats.Batcher

ProvisionerdServerMetrics*provisionerdserver.Metrics

// WorkspaceAppAuditSessionTimeout allows changing the timeout for audit
// sessions. Raising or lowering this value will directly affect the write
// load of the audit log table. This is used for testing. Default 1 hour.
Expand DownExpand Up@@ -1930,6 +1932,7 @@ func (api *API) CreateInMemoryTaggedProvisionerDaemon(dialCtx context.Context, n
},
api.NotificationsEnqueuer,
&api.PrebuildsReconciler,
api.ProvisionerdServerMetrics,
)
iferr!=nil {
returnnil,err
Expand Down
3 changes: 3 additions & 0 deletionscoderd/coderdtest/coderdtest.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -184,6 +184,8 @@ type Options struct {
OIDCConvertKeyCache cryptokeys.SigningKeycache
Clock quartz.Clock
TelemetryReporter telemetry.Reporter

ProvisionerdServerMetrics *provisionerdserver.Metrics
}

// New constructs a codersdk client connected to an in-memory API instance.
Expand DownExpand Up@@ -604,6 +606,7 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can
Clock: options.Clock,
AppEncryptionKeyCache: options.APIKeyEncryptionCache,
OIDCConvertKeyCache: options.OIDCConvertKeyCache,
ProvisionerdServerMetrics: options.ProvisionerdServerMetrics,
}
}

Expand Down
7 changes: 7 additions & 0 deletionscoderd/database/dbauthz/dbauthz.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -2699,6 +2699,13 @@ func (q *querier) GetQuotaConsumedForUser(ctx context.Context, params database.G
return q.db.GetQuotaConsumedForUser(ctx, params)
}

func (q *querier) GetRegularWorkspaceCreateMetrics(ctx context.Context) ([]database.GetRegularWorkspaceCreateMetricsRow, error) {
if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceWorkspace.All()); err != nil {
return nil, err
}
return q.db.GetRegularWorkspaceCreateMetrics(ctx)
}

func (q *querier) GetReplicaByID(ctx context.Context, id uuid.UUID) (database.Replica, error) {
if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceSystem); err != nil {
return database.Replica{}, err
Expand Down
4 changes: 4 additions & 0 deletionscoderd/database/dbauthz/dbauthz_test.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -2177,6 +2177,10 @@ func (s *MethodTestSuite) TestWorkspace() {
dbm.EXPECT().GetWorkspaceAgentDevcontainersByAgentID(gomock.Any(), agt.ID).Return([]database.WorkspaceAgentDevcontainer{d}, nil).AnyTimes()
check.Args(agt.ID).Asserts(w, policy.ActionRead).Returns([]database.WorkspaceAgentDevcontainer{d})
}))
s.Run("GetRegularWorkspaceCreateMetrics", s.Subtest(func(_ database.Store, check *expects) {
check.Args().
Asserts(rbac.ResourceWorkspace.All(), policy.ActionRead)
}))
}

func (s *MethodTestSuite) TestWorkspacePortSharing() {
Expand Down
7 changes: 7 additions & 0 deletionscoderd/database/dbmetrics/querymetrics.go
View file
Open in desktop

Some generated files are not rendered by default. Learn more abouthow customized files appear on GitHub.

15 changes: 15 additions & 0 deletionscoderd/database/dbmock/dbmock.go
View file
Open in desktop

Some generated files are not rendered by default. Learn more abouthow customized files appear on GitHub.

3 changes: 3 additions & 0 deletionscoderd/database/querier.go
View file
Open in desktop

Some generated files are not rendered by default. Learn more abouthow customized files appear on GitHub.

71 changes: 70 additions & 1 deletioncoderd/database/queries.sql.go
View file
Open in desktop

Some generated files are not rendered by default. Learn more abouthow customized files appear on GitHub.

2 changes: 1 addition & 1 deletioncoderd/database/queries/prebuilds.sql
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -230,7 +230,7 @@ HAVING COUNT(*) = @hard_limit::bigint;
SELECT
t.nameas template_name,
tvp.nameas preset_name,
o.nameas organization_name,
o.nameas organization_name,
COUNT(*)as created_count,
COUNT(*) FILTER (WHEREpj.job_status='failed'::provisioner_job_status)as failed_count,
COUNT(*) FILTER (
Expand Down
33 changes: 33 additions & 0 deletionscoderd/database/queries/workspaces.sql
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -923,3 +923,36 @@ SET
user_acl = @user_acl
WHERE
id = @id;

-- name: GetRegularWorkspaceCreateMetrics :many
-- Count regular workspaces: only those whose first successful 'start' build
-- was not initiated by the prebuild system user.
WITH first_success_build AS (
-- Earliest successful 'start' build per workspace
SELECT DISTINCT ON (wb.workspace_id)
wb.workspace_id,
wb.template_version_preset_id,
wb.initiator_id
FROM workspace_builds wb
JOIN provisioner_jobs pj ON pj.id = wb.job_id
WHERE
wb.transition = 'start'::workspace_transition
AND pj.job_status = 'succeeded'::provisioner_job_status
ORDER BY wb.workspace_id, wb.build_number, wb.id
)
SELECT
t.name AS template_name,
COALESCE(tvp.name, '') AS preset_name,
o.name AS organization_name,
COUNT(*) AS created_count
FROM first_success_build fsb
JOIN workspaces w ON w.id = fsb.workspace_id
JOIN templates t ON t.id = w.template_id
LEFT JOIN template_version_presets tvp ON tvp.id = fsb.template_version_preset_id
JOIN organizations o ON o.id = w.organization_id
WHERE
NOT t.deleted
-- Exclude workspaces whose first successful start was the prebuilds system user
AND fsb.initiator_id != 'c42fdf75-3097-471c-8c33-fb52454d81c0'::uuid
GROUP BY t.name, COALESCE(tvp.name, ''), o.name
ORDER BY t.name, preset_name, o.name;
Copy link
ContributorAuthor

@ssncferreirassncferreiraAug 28, 2025
edited
Loading

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

This SQL query follows a similar pattern asGetPrebuildMetrics

This query (in combination with others inprometheusmetrics) will be executed everydefaultRefreshRate, which is currently set to 1 minute. AnEXPLAIN was run in dogfood's database and the results are here:https://explain.dalibo.com/plan/6f96bfac3baac3d6

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

Is it necesary to look at all workspace builds ever, or do we need to look at builds in a certain time window?

The explain output shows a number of seq scans which we probably want to turn into at least index scans.

Copy link
ContributorAuthor

@ssncferreirassncferreiraAug 28, 2025
edited
Loading

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

Is it necesary to look at all workspace builds ever, or do we need to look at builds in a certain time window?

This is to account for the edge case where a workspace was not successfully provisioned on the first workspace build, that is why we are getting the first successful start workspace build. I would say that in most cases the first workspace build will be successful, so maybe we don't need this level of detail for the counter and we can just look at the first workspace build (like we did in the initial version of thequery)

The explain output shows a number of seq scans which we probably want to turn into at least index scans.

Yes,that is the main issue here 😕 I can try to create some indexes to improve the performance here.
(edit: actually it seems the biggest bottleneck here is the ORDER BY wb.workspace_id, wb.build_number, wb.id)

33 changes: 33 additions & 0 deletionscoderd/prometheusmetrics/prometheusmetrics.go
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -165,6 +165,18 @@ func Workspaces(ctx context.Context, logger slog.Logger, registerer prometheus.R
return nil, err
}

workspaceCreationTotal := prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "coderd",
Name: "workspace_creation_total",
Help: "Total regular (non-prebuilt) workspace creations by organization, template, and preset.",
},
[]string{"organization_name", "template_name", "preset_name"},
)
if err := registerer.Register(workspaceCreationTotal); err != nil {
return nil, err
}

ctx, cancelFunc := context.WithCancel(ctx)
done := make(chan struct{})

Expand DownExpand Up@@ -200,6 +212,27 @@ func Workspaces(ctx context.Context, logger slog.Logger, registerer prometheus.R
string(w.LatestBuildTransition),
).Add(1)
}

// Update regular workspaces (without a prebuild transition) creation counter
regularWorkspaces, err := db.GetRegularWorkspaceCreateMetrics(ctx)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
workspaceCreationTotal.Reset()
} else {
logger.Warn(ctx, "failed to load regular workspaces for metrics", slog.Error(err))
}
return
}

workspaceCreationTotal.Reset()

for _, regularWorkspace := range regularWorkspaces {
workspaceCreationTotal.WithLabelValues(
regularWorkspace.OrganizationName,
regularWorkspace.TemplateName,
regularWorkspace.PresetName,
).Add(float64(regularWorkspace.CreatedCount))
}
}

// Use time.Nanosecond to force an initial tick. It will be reset to the
Expand Down
Loading
Loading

[8]ページ先頭

©2009-2025 Movatter.jp