Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit426e9f2

Browse files
authored
feat: support adjusting child proc oom scores (#12655)
1 parentac8d1c6 commit426e9f2

File tree

7 files changed

+320
-39
lines changed

7 files changed

+320
-39
lines changed

‎agent/agent.go

Lines changed: 148 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,10 @@ const (
6262

6363
// EnvProcPrioMgmt determines whether we attempt to manage
6464
// process CPU and OOM Killer priority.
65-
constEnvProcPrioMgmt="CODER_PROC_PRIO_MGMT"
65+
const (
66+
EnvProcPrioMgmt="CODER_PROC_PRIO_MGMT"
67+
EnvProcOOMScore="CODER_PROC_OOM_SCORE"
68+
)
6669

6770
typeOptionsstruct {
6871
Filesystem afero.Fs
@@ -1575,10 +1578,31 @@ func (a *agent) manageProcessPriorityUntilGracefulShutdown() {
15751578
a.processManagementTick=ticker.C
15761579
}
15771580

1581+
oomScore:=unsetOOMScore
1582+
ifscoreStr,ok:=a.environmentVariables[EnvProcOOMScore];ok {
1583+
score,err:=strconv.Atoi(strings.TrimSpace(scoreStr))
1584+
iferr==nil&&score>=-1000&&score<=1000 {
1585+
oomScore=score
1586+
}else {
1587+
a.logger.Error(ctx,"invalid oom score",
1588+
slog.F("min_value",-1000),
1589+
slog.F("max_value",1000),
1590+
slog.F("value",scoreStr),
1591+
)
1592+
}
1593+
}
1594+
1595+
debouncer:=&logDebouncer{
1596+
logger:a.logger,
1597+
messages:map[string]time.Time{},
1598+
interval:time.Minute,
1599+
}
1600+
15781601
for {
1579-
procs,err:=a.manageProcessPriority(ctx)
1602+
procs,err:=a.manageProcessPriority(ctx,debouncer,oomScore)
1603+
// Avoid spamming the logs too often.
15801604
iferr!=nil {
1581-
a.logger.Error(ctx,"manage process priority",
1605+
debouncer.Error(ctx,"manage process priority",
15821606
slog.Error(err),
15831607
)
15841608
}
@@ -1594,42 +1618,51 @@ func (a *agent) manageProcessPriorityUntilGracefulShutdown() {
15941618
}
15951619
}
15961620

1597-
func (a*agent)manageProcessPriority(ctx context.Context) ([]*agentproc.Process,error) {
1621+
// unsetOOMScore is set to an invalid OOM score to imply an unset value.
1622+
constunsetOOMScore=1001
1623+
1624+
func (a*agent)manageProcessPriority(ctx context.Context,debouncer*logDebouncer,oomScoreint) ([]*agentproc.Process,error) {
15981625
const (
15991626
niceness=10
16001627
)
16011628

1629+
// We fetch the agent score each time because it's possible someone updates the
1630+
// value after it is started.
1631+
agentScore,err:=a.getAgentOOMScore()
1632+
iferr!=nil {
1633+
agentScore=unsetOOMScore
1634+
}
1635+
ifoomScore==unsetOOMScore&&agentScore!=unsetOOMScore {
1636+
// If the child score has not been explicitly specified we should
1637+
// set it to a score relative to the agent score.
1638+
oomScore=childOOMScore(agentScore)
1639+
}
1640+
16021641
procs,err:=agentproc.List(a.filesystem,a.syscaller)
16031642
iferr!=nil {
16041643
returnnil,xerrors.Errorf("list: %w",err)
16051644
}
16061645

1607-
var (
1608-
modProcs= []*agentproc.Process{}
1609-
logger slog.Logger
1610-
)
1646+
modProcs:= []*agentproc.Process{}
16111647

16121648
for_,proc:=rangeprocs {
1613-
logger=a.logger.With(
1614-
slog.F("cmd",proc.Cmd()),
1615-
slog.F("pid",proc.PID),
1616-
)
1617-
16181649
containsFn:=func(estring)bool {
16191650
contains:=strings.Contains(proc.Cmd(),e)
16201651
returncontains
16211652
}
16221653

16231654
// If the process is prioritized we should adjust
16241655
// it's oom_score_adj and avoid lowering its niceness.
1625-
ifslices.ContainsFunc[[]string,string](prioritizedProcs,containsFn) {
1656+
ifslices.ContainsFunc(prioritizedProcs,containsFn) {
16261657
continue
16271658
}
16281659

1629-
score,err:=proc.Niceness(a.syscaller)
1630-
iferr!=nil {
1631-
logger.Warn(ctx,"unable to get proc niceness",
1632-
slog.Error(err),
1660+
score,niceErr:=proc.Niceness(a.syscaller)
1661+
ifniceErr!=nil&&!xerrors.Is(niceErr,os.ErrPermission) {
1662+
debouncer.Warn(ctx,"unable to get proc niceness",
1663+
slog.F("cmd",proc.Cmd()),
1664+
slog.F("pid",proc.PID),
1665+
slog.Error(niceErr),
16331666
)
16341667
continue
16351668
}
@@ -1643,15 +1676,31 @@ func (a *agent) manageProcessPriority(ctx context.Context) ([]*agentproc.Process
16431676
continue
16441677
}
16451678

1646-
err=proc.SetNiceness(a.syscaller,niceness)
1647-
iferr!=nil {
1648-
logger.Warn(ctx,"unable to set proc niceness",
1649-
slog.F("niceness",niceness),
1650-
slog.Error(err),
1651-
)
1652-
continue
1679+
ifniceErr==nil {
1680+
err:=proc.SetNiceness(a.syscaller,niceness)
1681+
iferr!=nil&&!xerrors.Is(err,os.ErrPermission) {
1682+
debouncer.Warn(ctx,"unable to set proc niceness",
1683+
slog.F("cmd",proc.Cmd()),
1684+
slog.F("pid",proc.PID),
1685+
slog.F("niceness",niceness),
1686+
slog.Error(err),
1687+
)
1688+
}
16531689
}
16541690

1691+
// If the oom score is valid and it's not already set and isn't a custom value set by another process then it's ok to update it.
1692+
ifoomScore!=unsetOOMScore&&oomScore!=proc.OOMScoreAdj&&!isCustomOOMScore(agentScore,proc) {
1693+
oomScoreStr:=strconv.Itoa(oomScore)
1694+
err:=afero.WriteFile(a.filesystem,fmt.Sprintf("/proc/%d/oom_score_adj",proc.PID), []byte(oomScoreStr),0o644)
1695+
iferr!=nil&&!xerrors.Is(err,os.ErrPermission) {
1696+
debouncer.Warn(ctx,"unable to set oom_score_adj",
1697+
slog.F("cmd",proc.Cmd()),
1698+
slog.F("pid",proc.PID),
1699+
slog.F("score",oomScoreStr),
1700+
slog.Error(err),
1701+
)
1702+
}
1703+
}
16551704
modProcs=append(modProcs,proc)
16561705
}
16571706
returnmodProcs,nil
@@ -2005,3 +2054,77 @@ func PrometheusMetricsHandler(prometheusRegistry *prometheus.Registry, logger sl
20052054
}
20062055
})
20072056
}
2057+
2058+
// childOOMScore returns the oom_score_adj for a child process. It is based
2059+
// on the oom_score_adj of the agent process.
2060+
funcchildOOMScore(agentScoreint)int {
2061+
// If the agent has a negative oom_score_adj, we set the child to 0
2062+
// so it's treated like every other process.
2063+
ifagentScore<0 {
2064+
return0
2065+
}
2066+
2067+
// If the agent is already almost at the maximum then set it to the max.
2068+
ifagentScore>=998 {
2069+
return1000
2070+
}
2071+
2072+
// If the agent oom_score_adj is >=0, we set the child to slightly
2073+
// less than the maximum. If users want a different score they set it
2074+
// directly.
2075+
return998
2076+
}
2077+
2078+
func (a*agent)getAgentOOMScore() (int,error) {
2079+
scoreStr,err:=afero.ReadFile(a.filesystem,"/proc/self/oom_score_adj")
2080+
iferr!=nil {
2081+
return0,xerrors.Errorf("read file: %w",err)
2082+
}
2083+
2084+
score,err:=strconv.Atoi(strings.TrimSpace(string(scoreStr)))
2085+
iferr!=nil {
2086+
return0,xerrors.Errorf("parse int: %w",err)
2087+
}
2088+
2089+
returnscore,nil
2090+
}
2091+
2092+
// isCustomOOMScore checks to see if the oom_score_adj is not a value that would
2093+
// originate from an agent-spawned process.
2094+
funcisCustomOOMScore(agentScoreint,process*agentproc.Process)bool {
2095+
score:=process.OOMScoreAdj
2096+
returnagentScore!=score&&score!=1000&&score!=0&&score!=998
2097+
}
2098+
2099+
// logDebouncer skips writing a log for a particular message if
2100+
// it's been emitted within the given interval duration.
2101+
// It's a shoddy implementation used in one spot that should be replaced at
2102+
// some point.
2103+
typelogDebouncerstruct {
2104+
logger slog.Logger
2105+
messagesmap[string]time.Time
2106+
interval time.Duration
2107+
}
2108+
2109+
func (l*logDebouncer)Warn(ctx context.Context,msgstring,fields...any) {
2110+
l.log(ctx,slog.LevelWarn,msg,fields...)
2111+
}
2112+
2113+
func (l*logDebouncer)Error(ctx context.Context,msgstring,fields...any) {
2114+
l.log(ctx,slog.LevelError,msg,fields...)
2115+
}
2116+
2117+
func (l*logDebouncer)log(ctx context.Context,level slog.Level,msgstring,fields...any) {
2118+
// This (bad) implementation assumes you wouldn't reuse the same msg
2119+
// for different levels.
2120+
iflast,ok:=l.messages[msg];ok&&time.Since(last)<l.interval {
2121+
return
2122+
}
2123+
switchlevel {
2124+
caseslog.LevelWarn:
2125+
l.logger.Warn(ctx,msg,fields...)
2126+
caseslog.LevelError:
2127+
l.logger.Error(ctx,msg,fields...)
2128+
}
2129+
l.messages[msg]=time.Now()
2130+
}

‎agent/agent_test.go

Lines changed: 78 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2529,11 +2529,11 @@ func TestAgent_ManageProcessPriority(t *testing.T) {
25292529
logger=slog.Make(sloghuman.Sink(io.Discard))
25302530
)
25312531

2532+
requireFileWrite(t,fs,"/proc/self/oom_score_adj","-500")
2533+
25322534
// Create some processes.
25332535
fori:=0;i<4;i++ {
2534-
// Create a prioritized process. This process should
2535-
// have it's oom_score_adj set to -500 and its nice
2536-
// score should be untouched.
2536+
// Create a prioritized process.
25372537
varproc agentproc.Process
25382538
ifi==0 {
25392539
proc=agentproctest.GenerateProcess(t,fs,
@@ -2551,8 +2551,8 @@ func TestAgent_ManageProcessPriority(t *testing.T) {
25512551
},
25522552
)
25532553

2554-
syscaller.EXPECT().SetPriority(proc.PID,10).Return(nil)
25552554
syscaller.EXPECT().GetPriority(proc.PID).Return(20,nil)
2555+
syscaller.EXPECT().SetPriority(proc.PID,10).Return(nil)
25562556
}
25572557
syscaller.EXPECT().
25582558
Kill(proc.PID,syscall.Signal(0)).
@@ -2571,6 +2571,9 @@ func TestAgent_ManageProcessPriority(t *testing.T) {
25712571
})
25722572
actualProcs:=<-modProcs
25732573
require.Len(t,actualProcs,len(expectedProcs)-1)
2574+
for_,proc:=rangeactualProcs {
2575+
requireFileEquals(t,fs,fmt.Sprintf("/proc/%d/oom_score_adj",proc.PID),"0")
2576+
}
25742577
})
25752578

25762579
t.Run("IgnoreCustomNice",func(t*testing.T) {
@@ -2589,8 +2592,11 @@ func TestAgent_ManageProcessPriority(t *testing.T) {
25892592
logger=slog.Make(sloghuman.Sink(io.Discard))
25902593
)
25912594

2595+
err:=afero.WriteFile(fs,"/proc/self/oom_score_adj", []byte("0"),0o644)
2596+
require.NoError(t,err)
2597+
25922598
// Create some processes.
2593-
fori:=0;i<2;i++ {
2599+
fori:=0;i<3;i++ {
25942600
proc:=agentproctest.GenerateProcess(t,fs)
25952601
syscaller.EXPECT().
25962602
Kill(proc.PID,syscall.Signal(0)).
@@ -2618,7 +2624,59 @@ func TestAgent_ManageProcessPriority(t *testing.T) {
26182624
})
26192625
actualProcs:=<-modProcs
26202626
// We should ignore the process with a custom nice score.
2621-
require.Len(t,actualProcs,1)
2627+
require.Len(t,actualProcs,2)
2628+
for_,proc:=rangeactualProcs {
2629+
_,ok:=expectedProcs[proc.PID]
2630+
require.True(t,ok)
2631+
requireFileEquals(t,fs,fmt.Sprintf("/proc/%d/oom_score_adj",proc.PID),"998")
2632+
}
2633+
})
2634+
2635+
t.Run("CustomOOMScore",func(t*testing.T) {
2636+
t.Parallel()
2637+
2638+
ifruntime.GOOS!="linux" {
2639+
t.Skip("Skipping non-linux environment")
2640+
}
2641+
2642+
var (
2643+
fs=afero.NewMemMapFs()
2644+
ticker=make(chan time.Time)
2645+
syscaller=agentproctest.NewMockSyscaller(gomock.NewController(t))
2646+
modProcs=make(chan []*agentproc.Process)
2647+
logger=slog.Make(sloghuman.Sink(io.Discard))
2648+
)
2649+
2650+
err:=afero.WriteFile(fs,"/proc/self/oom_score_adj", []byte("0"),0o644)
2651+
require.NoError(t,err)
2652+
2653+
// Create some processes.
2654+
fori:=0;i<3;i++ {
2655+
proc:=agentproctest.GenerateProcess(t,fs)
2656+
syscaller.EXPECT().
2657+
Kill(proc.PID,syscall.Signal(0)).
2658+
Return(nil)
2659+
syscaller.EXPECT().GetPriority(proc.PID).Return(20,nil)
2660+
syscaller.EXPECT().SetPriority(proc.PID,10).Return(nil)
2661+
}
2662+
2663+
_,_,_,_,_=setupAgent(t, agentsdk.Manifest{},0,func(c*agenttest.Client,o*agent.Options) {
2664+
o.Syscaller=syscaller
2665+
o.ModifiedProcesses=modProcs
2666+
o.EnvironmentVariables=map[string]string{
2667+
agent.EnvProcPrioMgmt:"1",
2668+
agent.EnvProcOOMScore:"-567",
2669+
}
2670+
o.Filesystem=fs
2671+
o.Logger=logger
2672+
o.ProcessManagementTick=ticker
2673+
})
2674+
actualProcs:=<-modProcs
2675+
// We should ignore the process with a custom nice score.
2676+
require.Len(t,actualProcs,3)
2677+
for_,proc:=rangeactualProcs {
2678+
requireFileEquals(t,fs,fmt.Sprintf("/proc/%d/oom_score_adj",proc.PID),"-567")
2679+
}
26222680
})
26232681

26242682
t.Run("DisabledByDefault",func(t*testing.T) {
@@ -2739,3 +2797,17 @@ func requireEcho(t *testing.T, conn net.Conn) {
27392797
require.NoError(t,err)
27402798
require.Equal(t,"test",string(b))
27412799
}
2800+
2801+
funcrequireFileWrite(t testing.TB,fs afero.Fs,fp,datastring) {
2802+
t.Helper()
2803+
err:=afero.WriteFile(fs,fp, []byte(data),0o600)
2804+
require.NoError(t,err)
2805+
}
2806+
2807+
funcrequireFileEquals(t testing.TB,fs afero.Fs,fp,expectstring) {
2808+
t.Helper()
2809+
actual,err:=afero.ReadFile(fs,fp)
2810+
require.NoError(t,err)
2811+
2812+
require.Equal(t,expect,string(actual))
2813+
}

‎agent/agentproc/agentproctest/proc.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package agentproctest
22

33
import (
44
"fmt"
5+
"strconv"
56
"testing"
67

78
"github.com/spf13/afero"
@@ -29,8 +30,9 @@ func GenerateProcess(t *testing.T, fs afero.Fs, muts ...func(*agentproc.Process)
2930
cmdline:=fmt.Sprintf("%s\x00%s\x00%s",arg1,arg2,arg3)
3031

3132
process:= agentproc.Process{
32-
CmdLine:cmdline,
33-
PID:int32(pid),
33+
CmdLine:cmdline,
34+
PID:int32(pid),
35+
OOMScoreAdj:0,
3436
}
3537

3638
for_,mut:=rangemuts {
@@ -45,5 +47,9 @@ func GenerateProcess(t *testing.T, fs afero.Fs, muts ...func(*agentproc.Process)
4547
err=afero.WriteFile(fs,fmt.Sprintf("%s/cmdline",process.Dir), []byte(process.CmdLine),0o444)
4648
require.NoError(t,err)
4749

50+
score:=strconv.Itoa(process.OOMScoreAdj)
51+
err=afero.WriteFile(fs,fmt.Sprintf("%s/oom_score_adj",process.Dir), []byte(score),0o444)
52+
require.NoError(t,err)
53+
4854
returnprocess
4955
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp