|
| 1 | +package clistat |
| 2 | + |
| 3 | +import ( |
| 4 | +"bufio" |
| 5 | +"bytes" |
| 6 | +"strconv" |
| 7 | +"strings" |
| 8 | + |
| 9 | +"github.com/spf13/afero" |
| 10 | +"golang.org/x/xerrors" |
| 11 | +"tailscale.com/types/ptr" |
| 12 | +) |
| 13 | + |
| 14 | +// Paths for CGroupV1. |
| 15 | +// Ref: https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt |
| 16 | +const ( |
| 17 | +// CPU usage of all tasks in cgroup in nanoseconds. |
| 18 | +cgroupV1CPUAcctUsage="/sys/fs/cgroup/cpu/cpuacct.usage" |
| 19 | +// Alternate path |
| 20 | +cgroupV1CPUAcctUsageAlt="/sys/fs/cgroup/cpu,cpuacct/cpuacct.usage" |
| 21 | +// CFS quota and period for cgroup in MICROseconds |
| 22 | +cgroupV1CFSQuotaUs="/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_quota_us" |
| 23 | +cgroupV1CFSPeriodUs="/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_period_us" |
| 24 | +// Maximum memory usable by cgroup in bytes |
| 25 | +cgroupV1MemoryMaxUsageBytes="/sys/fs/cgroup/memory/memory.max_usage_in_bytes" |
| 26 | +// Current memory usage of cgroup in bytes |
| 27 | +cgroupV1MemoryUsageBytes="/sys/fs/cgroup/memory/memory.usage_in_bytes" |
| 28 | +// Other memory stats - we are interested in total_inactive_file |
| 29 | +cgroupV1MemoryStat="/sys/fs/cgroup/memory/memory.stat" |
| 30 | +) |
| 31 | + |
| 32 | +// Paths for CGroupV2. |
| 33 | +// Ref: https://docs.kernel.org/admin-guide/cgroup-v2.html |
| 34 | +const ( |
| 35 | +// Contains quota and period in microseconds separated by a space. |
| 36 | +cgroupV2CPUMax="/sys/fs/cgroup/cpu.max" |
| 37 | +// Contains current CPU usage under usage_usec |
| 38 | +cgroupV2CPUStat="/sys/fs/cgroup/cpu.stat" |
| 39 | +// Contains current cgroup memory usage in bytes. |
| 40 | +cgroupV2MemoryUsageBytes="/sys/fs/cgroup/memory.current" |
| 41 | +// Contains max cgroup memory usage in bytes. |
| 42 | +cgroupV2MemoryMaxBytes="/sys/fs/cgroup/memory.max" |
| 43 | +// Other memory stats - we are interested in total_inactive_file |
| 44 | +cgroupV2MemoryStat="/sys/fs/cgroup/memory.stat" |
| 45 | +) |
| 46 | + |
| 47 | +// ContainerCPU returns the CPU usage of the container cgroup. |
| 48 | +// This is calculated as difference of two samples of the |
| 49 | +// CPU usage of the container cgroup. |
| 50 | +// The total is read from the relevant path in /sys/fs/cgroup. |
| 51 | +// If there is no limit set, the total is assumed to be the |
| 52 | +// number of host cores multiplied by the CFS period. |
| 53 | +// If the system is not containerized, this always returns nil. |
| 54 | +func (s*Statter)ContainerCPU() (*Result,error) { |
| 55 | +// Firstly, check if we are containerized. |
| 56 | +ifok,err:=IsContainerized(s.fs);err!=nil||!ok { |
| 57 | +returnnil,nil//nolint: nilnil |
| 58 | +} |
| 59 | + |
| 60 | +total,err:=s.cGroupCPUTotal() |
| 61 | +iferr!=nil { |
| 62 | +returnnil,xerrors.Errorf("get total cpu: %w",err) |
| 63 | +} |
| 64 | + |
| 65 | +used1,err:=s.cGroupCPUUsed() |
| 66 | +iferr!=nil { |
| 67 | +returnnil,xerrors.Errorf("get cgroup CPU usage: %w",err) |
| 68 | +} |
| 69 | + |
| 70 | +// The measurements in /sys/fs/cgroup are counters. |
| 71 | +// We need to wait for a bit to get a difference. |
| 72 | +// Note that someone could reset the counter in the meantime. |
| 73 | +// We can't do anything about that. |
| 74 | +s.wait(s.sampleInterval) |
| 75 | + |
| 76 | +used2,err:=s.cGroupCPUUsed() |
| 77 | +iferr!=nil { |
| 78 | +returnnil,xerrors.Errorf("get cgroup CPU usage: %w",err) |
| 79 | +} |
| 80 | + |
| 81 | +ifused2<used1 { |
| 82 | +// Someone reset the counter. Best we can do is count from zero. |
| 83 | +used1=0 |
| 84 | +} |
| 85 | + |
| 86 | +r:=&Result{ |
| 87 | +Unit:"cores", |
| 88 | +Used:used2-used1, |
| 89 | +Total:ptr.To(total), |
| 90 | +} |
| 91 | +returnr,nil |
| 92 | +} |
| 93 | + |
| 94 | +func (s*Statter)cGroupCPUTotal() (usedfloat64,errerror) { |
| 95 | +ifs.isCGroupV2() { |
| 96 | +returns.cGroupV2CPUTotal() |
| 97 | +} |
| 98 | + |
| 99 | +// Fall back to CGroupv1 |
| 100 | +returns.cGroupV1CPUTotal() |
| 101 | +} |
| 102 | + |
| 103 | +func (s*Statter)cGroupCPUUsed() (usedfloat64,errerror) { |
| 104 | +ifs.isCGroupV2() { |
| 105 | +returns.cGroupV2CPUUsed() |
| 106 | +} |
| 107 | + |
| 108 | +returns.cGroupV1CPUUsed() |
| 109 | +} |
| 110 | + |
| 111 | +func (s*Statter)isCGroupV2()bool { |
| 112 | +// Check for the presence of /sys/fs/cgroup/cpu.max |
| 113 | +_,err:=s.fs.Stat(cgroupV2CPUMax) |
| 114 | +returnerr==nil |
| 115 | +} |
| 116 | + |
| 117 | +func (s*Statter)cGroupV2CPUUsed() (usedfloat64,errerror) { |
| 118 | +usageUs,err:=readInt64Prefix(s.fs,cgroupV2CPUStat,"usage_usec") |
| 119 | +iferr!=nil { |
| 120 | +return0,xerrors.Errorf("get cgroupv2 cpu used: %w",err) |
| 121 | +} |
| 122 | +periodUs,err:=readInt64SepIdx(s.fs,cgroupV2CPUMax," ",1) |
| 123 | +iferr!=nil { |
| 124 | +return0,xerrors.Errorf("get cpu period: %w",err) |
| 125 | +} |
| 126 | + |
| 127 | +returnfloat64(usageUs)/float64(periodUs),nil |
| 128 | +} |
| 129 | + |
| 130 | +func (s*Statter)cGroupV2CPUTotal() (totalfloat64,errerror) { |
| 131 | +varquotaUs,periodUsint64 |
| 132 | +periodUs,err=readInt64SepIdx(s.fs,cgroupV2CPUMax," ",1) |
| 133 | +iferr!=nil { |
| 134 | +return0,xerrors.Errorf("get cpu period: %w",err) |
| 135 | +} |
| 136 | + |
| 137 | +quotaUs,err=readInt64SepIdx(s.fs,cgroupV2CPUMax," ",0) |
| 138 | +iferr!=nil { |
| 139 | +// Fall back to number of cores |
| 140 | +quotaUs=int64(s.nproc)*periodUs |
| 141 | +} |
| 142 | + |
| 143 | +returnfloat64(quotaUs)/float64(periodUs),nil |
| 144 | +} |
| 145 | + |
| 146 | +func (s*Statter)cGroupV1CPUTotal() (float64,error) { |
| 147 | +periodUs,err:=readInt64(s.fs,cgroupV1CFSPeriodUs) |
| 148 | +iferr!=nil { |
| 149 | +return0,xerrors.Errorf("read cpu period: %w",err) |
| 150 | +} |
| 151 | + |
| 152 | +quotaUs,err:=readInt64(s.fs,cgroupV1CFSQuotaUs) |
| 153 | +iferr!=nil { |
| 154 | +return0,xerrors.Errorf("read cpu quota: %w",err) |
| 155 | +} |
| 156 | + |
| 157 | +ifquotaUs<0 { |
| 158 | +// Fall back to the number of cores |
| 159 | +quotaUs=int64(s.nproc)*periodUs |
| 160 | +} |
| 161 | + |
| 162 | +returnfloat64(quotaUs)/float64(periodUs),nil |
| 163 | +} |
| 164 | + |
| 165 | +func (s*Statter)cGroupV1CPUUsed() (float64,error) { |
| 166 | +usageNs,err:=readInt64(s.fs,cgroupV1CPUAcctUsage) |
| 167 | +iferr!=nil { |
| 168 | +// try alternate path |
| 169 | +usageNs,err=readInt64(s.fs,cgroupV1CPUAcctUsageAlt) |
| 170 | +iferr!=nil { |
| 171 | +return0,xerrors.Errorf("read cpu used: %w",err) |
| 172 | +} |
| 173 | +} |
| 174 | + |
| 175 | +// usage is in ns, convert to us |
| 176 | +usageNs/=1000 |
| 177 | +periodUs,err:=readInt64(s.fs,cgroupV1CFSPeriodUs) |
| 178 | +iferr!=nil { |
| 179 | +return0,xerrors.Errorf("get cpu period: %w",err) |
| 180 | +} |
| 181 | + |
| 182 | +returnfloat64(usageNs)/float64(periodUs),nil |
| 183 | +} |
| 184 | + |
| 185 | +// ContainerMemory returns the memory usage of the container cgroup. |
| 186 | +// If the system is not containerized, this always returns nil. |
| 187 | +func (s*Statter)ContainerMemory() (*Result,error) { |
| 188 | +ifok,err:=IsContainerized(s.fs);err!=nil||!ok { |
| 189 | +returnnil,nil//nolint:nilnil |
| 190 | +} |
| 191 | + |
| 192 | +ifs.isCGroupV2() { |
| 193 | +returns.cGroupV2Memory() |
| 194 | +} |
| 195 | + |
| 196 | +// Fall back to CGroupv1 |
| 197 | +returns.cGroupV1Memory() |
| 198 | +} |
| 199 | + |
| 200 | +func (s*Statter)cGroupV2Memory() (*Result,error) { |
| 201 | +maxUsageBytes,err:=readInt64(s.fs,cgroupV2MemoryMaxBytes) |
| 202 | +iferr!=nil { |
| 203 | +returnnil,xerrors.Errorf("read memory total: %w",err) |
| 204 | +} |
| 205 | + |
| 206 | +currUsageBytes,err:=readInt64(s.fs,cgroupV2MemoryUsageBytes) |
| 207 | +iferr!=nil { |
| 208 | +returnnil,xerrors.Errorf("read memory usage: %w",err) |
| 209 | +} |
| 210 | + |
| 211 | +inactiveFileBytes,err:=readInt64Prefix(s.fs,cgroupV2MemoryStat,"inactive_file") |
| 212 | +iferr!=nil { |
| 213 | +returnnil,xerrors.Errorf("read memory stats: %w",err) |
| 214 | +} |
| 215 | + |
| 216 | +return&Result{ |
| 217 | +Total:ptr.To(float64(maxUsageBytes)), |
| 218 | +Used:float64(currUsageBytes-inactiveFileBytes), |
| 219 | +Unit:"B", |
| 220 | +},nil |
| 221 | +} |
| 222 | + |
| 223 | +func (s*Statter)cGroupV1Memory() (*Result,error) { |
| 224 | +maxUsageBytes,err:=readInt64(s.fs,cgroupV1MemoryMaxUsageBytes) |
| 225 | +iferr!=nil { |
| 226 | +returnnil,xerrors.Errorf("read memory total: %w",err) |
| 227 | +} |
| 228 | + |
| 229 | +// need a space after total_rss so we don't hit something else |
| 230 | +usageBytes,err:=readInt64(s.fs,cgroupV1MemoryUsageBytes) |
| 231 | +iferr!=nil { |
| 232 | +returnnil,xerrors.Errorf("read memory usage: %w",err) |
| 233 | +} |
| 234 | + |
| 235 | +totalInactiveFileBytes,err:=readInt64Prefix(s.fs,cgroupV1MemoryStat,"total_inactive_file") |
| 236 | +iferr!=nil { |
| 237 | +returnnil,xerrors.Errorf("read memory stats: %w",err) |
| 238 | +} |
| 239 | + |
| 240 | +// Total memory used is usage - total_inactive_file |
| 241 | +return&Result{ |
| 242 | +Total:ptr.To(float64(maxUsageBytes)), |
| 243 | +Used:float64(usageBytes-totalInactiveFileBytes), |
| 244 | +Unit:"B", |
| 245 | +},nil |
| 246 | +} |
| 247 | + |
| 248 | +// read an int64 value from path |
| 249 | +funcreadInt64(fs afero.Fs,pathstring) (int64,error) { |
| 250 | +data,err:=afero.ReadFile(fs,path) |
| 251 | +iferr!=nil { |
| 252 | +return0,xerrors.Errorf("read %s: %w",path,err) |
| 253 | +} |
| 254 | + |
| 255 | +val,err:=strconv.ParseInt(string(bytes.TrimSpace(data)),10,64) |
| 256 | +iferr!=nil { |
| 257 | +return0,xerrors.Errorf("parse %s: %w",path,err) |
| 258 | +} |
| 259 | + |
| 260 | +returnval,nil |
| 261 | +} |
| 262 | + |
| 263 | +// read an int64 value from path at field idx separated by sep |
| 264 | +funcreadInt64SepIdx(fs afero.Fs,path,sepstring,idxint) (int64,error) { |
| 265 | +data,err:=afero.ReadFile(fs,path) |
| 266 | +iferr!=nil { |
| 267 | +return0,xerrors.Errorf("read %s: %w",path,err) |
| 268 | +} |
| 269 | + |
| 270 | +parts:=strings.Split(string(data),sep) |
| 271 | +iflen(parts)<idx { |
| 272 | +return0,xerrors.Errorf("expected line %q to have at least %d parts",string(data),idx+1) |
| 273 | +} |
| 274 | + |
| 275 | +val,err:=strconv.ParseInt(strings.TrimSpace(parts[idx]),10,64) |
| 276 | +iferr!=nil { |
| 277 | +return0,xerrors.Errorf("parse %s: %w",path,err) |
| 278 | +} |
| 279 | + |
| 280 | +returnval,nil |
| 281 | +} |
| 282 | + |
| 283 | +// read the first int64 value from path prefixed with prefix |
| 284 | +funcreadInt64Prefix(fs afero.Fs,path,prefixstring) (int64,error) { |
| 285 | +data,err:=afero.ReadFile(fs,path) |
| 286 | +iferr!=nil { |
| 287 | +return0,xerrors.Errorf("read %s: %w",path,err) |
| 288 | +} |
| 289 | + |
| 290 | +scn:=bufio.NewScanner(bytes.NewReader(data)) |
| 291 | +forscn.Scan() { |
| 292 | +line:=scn.Text() |
| 293 | +if!strings.HasPrefix(line,prefix) { |
| 294 | +continue |
| 295 | +} |
| 296 | + |
| 297 | +parts:=strings.Fields(line) |
| 298 | +iflen(parts)!=2 { |
| 299 | +return0,xerrors.Errorf("parse %s: expected two fields but got %s",path,line) |
| 300 | +} |
| 301 | + |
| 302 | +val,err:=strconv.ParseInt(strings.TrimSpace(parts[1]),10,64) |
| 303 | +iferr!=nil { |
| 304 | +return0,xerrors.Errorf("parse %s: %w",path,err) |
| 305 | +} |
| 306 | + |
| 307 | +returnval,nil |
| 308 | +} |
| 309 | + |
| 310 | +return0,xerrors.Errorf("parse %s: did not find line with prefix %s",path,prefix) |
| 311 | +} |