Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit4a80270

Browse files
authored
Merge pull request#133012 from tallclair/mem-lim-decrease
[FG:InPlacePodVerticalScaling] Support reducing memory limits
2 parents8434145 +75ae2d7 commit4a80270

File tree

8 files changed

+527
-74
lines changed

8 files changed

+527
-74
lines changed

‎pkg/apis/core/validation/validation.go

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -5845,31 +5845,6 @@ func validateContainerResize(newRequirements, oldRequirements *core.ResourceRequ
58455845
allErrs=append(allErrs,field.Forbidden(fldPath.Child("limits"),"resource limits cannot be removed"))
58465846
}
58475847

5848-
// Special case: memory limits may not be decreased if resize policy is NotRequired.
5849-
varmemRestartPolicy core.ResourceResizeRestartPolicy
5850-
for_,policy:=rangeresizePolicies {
5851-
ifpolicy.ResourceName==core.ResourceMemory {
5852-
memRestartPolicy=policy.RestartPolicy
5853-
break
5854-
}
5855-
}
5856-
ifmemRestartPolicy==core.NotRequired||memRestartPolicy=="" {
5857-
newLimit,hasNewLimit:=newRequirements.Limits[core.ResourceMemory]
5858-
oldLimit,hasOldLimit:=oldRequirements.Limits[core.ResourceMemory]
5859-
ifhasNewLimit&&hasOldLimit {
5860-
ifnewLimit.Cmp(oldLimit)<0 {
5861-
allErrs=append(allErrs,field.Forbidden(
5862-
fldPath.Child("limits").Key(core.ResourceMemory.String()),
5863-
fmt.Sprintf("memory limits cannot be decreased unless resizePolicy is %s",core.RestartContainer)))
5864-
}
5865-
}elseifhasNewLimit&&!hasOldLimit {
5866-
// Adding a memory limit is implicitly decreasing the memory limit (from 'max')
5867-
allErrs=append(allErrs,field.Forbidden(
5868-
fldPath.Child("limits").Key(core.ResourceMemory.String()),
5869-
fmt.Sprintf("memory limits cannot be added unless resizePolicy is %s",core.RestartContainer)))
5870-
}
5871-
}
5872-
58735848
// TODO(tallclair): Move resizable resource checks here.
58745849

58755850
returnallErrs

‎pkg/apis/core/validation/validation_test.go

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26882,12 +26882,12 @@ func TestValidatePodResize(t *testing.T) {
2688226882
test: "no restart policy: memory limit decrease",
2688326883
old: mkPod(core.ResourceList{}, getResources("100m", "200Mi", "", "")),
2688426884
new: mkPod(core.ResourceList{}, getResources("100m", "100Mi", "", "")),
26885-
err: "memory limits cannot be decreased",
26885+
err: "",
2688626886
}, {
2688726887
test: "restart NotRequired: memory limit decrease",
2688826888
old: mkPod(core.ResourceList{}, getResources("100m", "200Mi", "", ""), resizePolicy("memory", core.NotRequired)),
2688926889
new: mkPod(core.ResourceList{}, getResources("100m", "100Mi", "", ""), resizePolicy("memory", core.NotRequired)),
26890-
err: "memory limits cannot be decreased",
26890+
err: "",
2689126891
}, {
2689226892
test: "RestartContainer: memory limit decrease",
2689326893
old: mkPod(core.ResourceList{}, getResources("100m", "200Mi", "", ""), resizePolicy("memory", core.RestartContainer)),
@@ -27106,19 +27106,9 @@ func TestValidatePodResize(t *testing.T) {
2710627106
new: mkPodWithInitContainers(core.ResourceList{}, getResources("100m", "200Mi", "", ""), core.ContainerRestartPolicyAlways),
2710727107
err: "",
2710827108
}, {
27109-
test: "memory limit decrease for sidecar containers, no resize policy",
27109+
test: "memory limit decrease for sidecar containers",
2711027110
old: mkPodWithInitContainers(core.ResourceList{}, getResources("100m", "200Mi", "", ""), core.ContainerRestartPolicyAlways),
2711127111
new: mkPodWithInitContainers(core.ResourceList{}, getResources("100m", "100Mi", "", ""), core.ContainerRestartPolicyAlways),
27112-
err: "memory limits cannot be decreased",
27113-
}, {
27114-
test: "memory limit decrease for sidecar containers, resize policy NotRequired",
27115-
old: mkPodWithInitContainers(core.ResourceList{}, getResources("100m", "200Mi", "", ""), core.ContainerRestartPolicyAlways, resizePolicy(core.ResourceMemory, core.NotRequired)),
27116-
new: mkPodWithInitContainers(core.ResourceList{}, getResources("100m", "100Mi", "", ""), core.ContainerRestartPolicyAlways, resizePolicy(core.ResourceMemory, core.NotRequired)),
27117-
err: "memory limits cannot be decreased",
27118-
}, {
27119-
test: "memory limit decrease for sidecar containers, resize policy RestartContainer",
27120-
old: mkPodWithInitContainers(core.ResourceList{}, getResources("100m", "200Mi", "", ""), core.ContainerRestartPolicyAlways, resizePolicy(core.ResourceMemory, core.RestartContainer)),
27121-
new: mkPodWithInitContainers(core.ResourceList{}, getResources("100m", "100Mi", "", ""), core.ContainerRestartPolicyAlways, resizePolicy(core.ResourceMemory, core.RestartContainer)),
2712227112
err: "",
2712327113
}, {
2712427114
test: "storage limit change for sidecar containers",

‎pkg/kubelet/container/helpers.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
"k8s.io/apimachinery/pkg/util/sets"
3232
"k8s.io/client-go/tools/record"
3333
runtimeapi"k8s.io/cri-api/pkg/apis/runtime/v1"
34+
statsapi"k8s.io/kubelet/pkg/apis/stats/v1alpha1"
3435
podutil"k8s.io/kubernetes/pkg/api/v1/pod"
3536
sc"k8s.io/kubernetes/pkg/securitycontext"
3637
hashutil"k8s.io/kubernetes/pkg/util/hash"
@@ -69,6 +70,9 @@ type RuntimeHelper interface {
6970

7071
// SetPodWatchCondition flags a pod to be inspected until the condition is met.
7172
SetPodWatchCondition(types.UID,string,func(*PodStatus)bool)
73+
74+
// PodCPUAndMemoryStats reads the latest CPU & memory usage stats.
75+
PodCPUAndMemoryStats(context.Context,*v1.Pod,*PodStatus) (*statsapi.PodStats,error)
7276
}
7377

7478
// ShouldContainerBeRestarted checks whether a container needs to be restarted.

‎pkg/kubelet/container/testing/fake_runtime_helper.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
kubetypes"k8s.io/apimachinery/pkg/types"
2525
utilfeature"k8s.io/apiserver/pkg/util/feature"
2626
runtimeapi"k8s.io/cri-api/pkg/apis/runtime/v1"
27+
statsapi"k8s.io/kubelet/pkg/apis/stats/v1alpha1"
2728
"k8s.io/kubernetes/pkg/features"
2829
kubecontainer"k8s.io/kubernetes/pkg/kubelet/container"
2930
)
@@ -38,6 +39,7 @@ type FakeRuntimeHelper struct {
3839
PodContainerDirstring
3940
RuntimeHandlersmap[string]kubecontainer.RuntimeHandler
4041
Errerror
42+
PodStatsmap[kubetypes.UID]*statsapi.PodStats
4143
}
4244

4345
func (f*FakeRuntimeHelper)GenerateRunContainerOptions(_ context.Context,pod*v1.Pod,container*v1.Container,podIPstring,podIPs []string,imageVolumes kubecontainer.ImageVolumes) (*kubecontainer.RunContainerOptions,func(),error) {
@@ -118,3 +120,10 @@ func (f *FakeRuntimeHelper) UnprepareDynamicResources(ctx context.Context, pod *
118120
func (f*FakeRuntimeHelper)SetPodWatchCondition(_ kubetypes.UID,_string,_func(*kubecontainer.PodStatus)bool) {
119121
// Not implemented.
120122
}
123+
124+
func (f*FakeRuntimeHelper)PodCPUAndMemoryStats(_ context.Context,pod*v1.Pod,_*kubecontainer.PodStatus) (*statsapi.PodStats,error) {
125+
ifstats,ok:=f.PodStats[pod.UID];ok {
126+
returnstats,nil
127+
}
128+
returnnil,fmt.Errorf("stats for pod %q not found",pod.UID)
129+
}

‎pkg/kubelet/kubelet.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1446,6 +1446,11 @@ func (kl *Kubelet) ListPodCPUAndMemoryStats(ctx context.Context) ([]statsapi.Pod
14461446
returnkl.StatsProvider.ListPodCPUAndMemoryStats(ctx)
14471447
}
14481448

1449+
// PodCPUAndMemoryStats is delegated to StatsProvider
1450+
func (kl*Kubelet)PodCPUAndMemoryStats(ctx context.Context,pod*v1.Pod,podStatus*kubecontainer.PodStatus) (*statsapi.PodStats,error) {
1451+
returnkl.StatsProvider.PodCPUAndMemoryStats(ctx,pod,podStatus)
1452+
}
1453+
14491454
// ListPodStatsAndUpdateCPUNanoCoreUsage is delegated to StatsProvider, which implements stats.Provider interface
14501455
func (kl*Kubelet)ListPodStatsAndUpdateCPUNanoCoreUsage(ctx context.Context) ([]statsapi.PodStats,error) {
14511456
returnkl.StatsProvider.ListPodStatsAndUpdateCPUNanoCoreUsage(ctx)

‎pkg/kubelet/kuberuntime/kuberuntime_manager.go

Lines changed: 91 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import (
3434
"k8s.io/apimachinery/pkg/api/resource"
3535
metav1"k8s.io/apimachinery/pkg/apis/meta/v1"
3636
kubetypes"k8s.io/apimachinery/pkg/types"
37+
utilerrors"k8s.io/apimachinery/pkg/util/errors"
3738
utilruntime"k8s.io/apimachinery/pkg/util/runtime"
3839
utilversion"k8s.io/apimachinery/pkg/util/version"
3940
utilfeature"k8s.io/apiserver/pkg/util/feature"
@@ -696,7 +697,7 @@ func (m *kubeGenericRuntimeManager) computePodResizeAction(pod *v1.Pod, containe
696697
returntrue
697698
}
698699

699-
func (m*kubeGenericRuntimeManager)doPodResizeAction(pod*v1.Pod,podContainerChangespodActions)*kubecontainer.SyncResult {
700+
func (m*kubeGenericRuntimeManager)doPodResizeAction(ctx context.Context,pod*v1.Pod,podStatus*kubecontainer.PodStatus,podContainerChangespodActions)*kubecontainer.SyncResult {
700701
resizeResult:=kubecontainer.NewSyncResult(kubecontainer.ResizePodInPlace,format.Pod(pod))
701702
pcm:=m.containerManager.NewPodContainerManager()
702703
//TODO(vinaykul,InPlacePodVerticalScaling): Figure out best way to get enforceMemoryQoS value (parameter #4 below) in platform-agnostic way
@@ -708,26 +709,34 @@ func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podContainerC
708709
podResources:=cm.ResourceConfigForPod(pod,enforceCPULimits,uint64((m.cpuCFSQuotaPeriod.Duration)/time.Microsecond),false)
709710
ifpodResources==nil {
710711
klog.ErrorS(nil,"Unable to get resource configuration","pod",klog.KObj(pod))
711-
resizeResult.Fail(kubecontainer.ErrResizePodInPlace,fmt.Sprintf("unable to get resource configuration processing resize for pod %s",pod.Name))
712+
resizeResult.Fail(kubecontainer.ErrResizePodInPlace,fmt.Sprintf("unable to get resource configuration processing resize for pod %q",format.Pod(pod)))
712713
returnresizeResult
713714
}
714715
currentPodMemoryConfig,err:=pcm.GetPodCgroupConfig(pod,v1.ResourceMemory)
715716
iferr!=nil {
716717
klog.ErrorS(nil,"Unable to get pod cgroup memory config","pod",klog.KObj(pod))
717-
resizeResult.Fail(kubecontainer.ErrResizePodInPlace,fmt.Sprintf("unable to get pod cgroup memory config for pod %s",pod.Name))
718+
resizeResult.Fail(kubecontainer.ErrResizePodInPlace,fmt.Sprintf("unable to get pod cgroup memory config for pod %q",format.Pod(pod)))
718719
returnresizeResult
719720
}
720721
currentPodCPUConfig,err:=pcm.GetPodCgroupConfig(pod,v1.ResourceCPU)
721722
iferr!=nil {
722723
klog.ErrorS(nil,"Unable to get pod cgroup cpu config","pod",klog.KObj(pod))
723-
resizeResult.Fail(kubecontainer.ErrResizePodInPlace,fmt.Sprintf("unable to get pod cgroup cpu config for pod %s",pod.Name))
724+
resizeResult.Fail(kubecontainer.ErrResizePodInPlace,fmt.Sprintf("unable to get pod cgroup cpu config for pod %q",format.Pod(pod)))
724725
returnresizeResult
725726
}
726727

727728
currentPodResources:=podResources
728729
currentPodResources=mergeResourceConfig(currentPodResources,currentPodMemoryConfig)
729730
currentPodResources=mergeResourceConfig(currentPodResources,currentPodCPUConfig)
730731

732+
// Before proceeding with the resize, perform a best-effort check to catch potential resize
733+
// errors in order to avoid a partial-resize state.
734+
iferr:=m.validatePodResizeAction(ctx,pod,podStatus,currentPodResources,podResources,podContainerChanges);err!=nil {
735+
klog.ErrorS(err,"Allocated pod resize is not currently feasible","pod",klog.KObj(pod))
736+
resizeResult.Fail(kubecontainer.ErrResizePodInPlace,err.Error())
737+
returnresizeResult
738+
}
739+
731740
setPodCgroupConfig:=func(rName v1.ResourceName,setLimitValuebool)error {
732741
varerrerror
733742
resizedResources:=&cm.ResourceConfig{}
@@ -800,19 +809,7 @@ func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podContainerC
800809
deferm.runtimeHelper.SetPodWatchCondition(pod.UID,"doPodResizeAction",func(*kubecontainer.PodStatus)bool {returntrue })
801810

802811
iflen(podContainerChanges.ContainersToUpdate[v1.ResourceMemory])>0||podContainerChanges.UpdatePodResources {
803-
ifpodResources.Memory!=nil {
804-
currentPodMemoryUsage,err:=pcm.GetPodCgroupMemoryUsage(pod)
805-
iferr!=nil {
806-
klog.ErrorS(err,"GetPodCgroupMemoryUsage failed","pod",pod.Name)
807-
resizeResult.Fail(kubecontainer.ErrResizePodInPlace,err.Error())
808-
returnresizeResult
809-
}
810-
ifcurrentPodMemoryUsage>=uint64(*podResources.Memory) {
811-
klog.ErrorS(nil,"Aborting attempt to set pod memory limit less than current memory usage","pod",pod.Name)
812-
resizeResult.Fail(kubecontainer.ErrResizePodInPlace,fmt.Sprintf("aborting attempt to set pod memory limit less than current memory usage for pod %s",pod.Name))
813-
returnresizeResult
814-
}
815-
}else {
812+
ifpodResources.Memory==nil {
816813
// Default pod memory limit to the current memory limit if unset to prevent it from updating.
817814
// TODO(#128675): This does not support removing limits.
818815
podResources.Memory=currentPodMemoryConfig.Memory
@@ -845,6 +842,82 @@ func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podContainerC
845842
returnresizeResult
846843
}
847844

845+
// validatePodResizeAction checks whether the proposed resize actions are currently viable.
846+
func (m*kubeGenericRuntimeManager)validatePodResizeAction(
847+
ctx context.Context,
848+
pod*v1.Pod,
849+
podStatus*kubecontainer.PodStatus,
850+
currentPodResources,desiredPodResources*cm.ResourceConfig,
851+
podContainerChangespodActions,
852+
)error {
853+
iflen(podContainerChanges.ContainersToUpdate[v1.ResourceMemory])>0||podContainerChanges.UpdatePodResources {
854+
returnm.validateMemoryResizeAction(ctx,pod,podStatus,currentPodResources,desiredPodResources,podContainerChanges)
855+
}
856+
857+
returnnil
858+
}
859+
860+
func (m*kubeGenericRuntimeManager)validateMemoryResizeAction(
861+
ctx context.Context,
862+
pod*v1.Pod,
863+
podStatus*kubecontainer.PodStatus,
864+
currentPodResources,desiredPodResources*cm.ResourceConfig,
865+
podContainerChangespodActions,
866+
)error {
867+
// Determine which memory limits are decreasing.
868+
podLimitDecreasing:=desiredPodResources.Memory!=nil&&
869+
(currentPodResources.Memory==nil||// Pod memory limit added
870+
*desiredPodResources.Memory<*currentPodResources.Memory)// Pod memory limit decreasing
871+
872+
decreasingContainerLimits:=map[string]int64{}// Map of container name to desired memory limit.
873+
for_,cUpdate:=rangepodContainerChanges.ContainersToUpdate[v1.ResourceMemory] {
874+
ifcUpdate.desiredContainerResources.memoryLimit!=0 {
875+
ifcUpdate.currentContainerResources==nil||cUpdate.currentContainerResources.memoryLimit==0||// Limit added
876+
cUpdate.desiredContainerResources.memoryLimit<cUpdate.currentContainerResources.memoryLimit {// Limit decreasing
877+
decreasingContainerLimits[cUpdate.container.Name]=cUpdate.desiredContainerResources.memoryLimit
878+
}
879+
}
880+
}
881+
882+
if!podLimitDecreasing&&len(decreasingContainerLimits)==0 {
883+
// No memory limits are decreasing: nothing else to check here.
884+
returnnil
885+
}
886+
887+
// Check whether any of the new memory limits are below current memory usage.
888+
podUsageStats,err:=m.runtimeHelper.PodCPUAndMemoryStats(ctx,pod,podStatus)
889+
iferr!=nil {
890+
returnfmt.Errorf("unable to read memory usage for pod %q",format.Pod(pod))
891+
}
892+
893+
varerrs []error
894+
ifpodLimitDecreasing {
895+
ifpodUsageStats.Memory==nil||podUsageStats.Memory.UsageBytes==nil {
896+
errs=append(errs,fmt.Errorf("missing pod memory usage"))
897+
}elseif*podUsageStats.Memory.UsageBytes>=uint64(*desiredPodResources.Memory) {
898+
errs=append(errs,fmt.Errorf("attempting to set pod memory limit (%d) below current usage (%d)",
899+
*desiredPodResources.Memory,*podUsageStats.Memory.UsageBytes))
900+
}
901+
}
902+
for_,cStats:=rangepodUsageStats.Containers {
903+
ifdesiredLimit,ok:=decreasingContainerLimits[cStats.Name];ok {
904+
ifcStats.Memory==nil||cStats.Memory.UsageBytes==nil {
905+
errs=append(errs,fmt.Errorf("missing container %q memory usage",cStats.Name))
906+
}elseif*cStats.Memory.UsageBytes>=uint64(desiredLimit) {
907+
errs=append(errs,fmt.Errorf("attempting to set container %q memory limit (%d) below current usage (%d)",
908+
cStats.Name,desiredLimit,*podUsageStats.Memory.UsageBytes))
909+
}
910+
}
911+
}
912+
913+
iflen(errs)>0 {
914+
agg:=utilerrors.NewAggregate(errs)
915+
returnfmt.Errorf("cannot decrease memory limits: %w",agg)
916+
}
917+
918+
returnnil
919+
}
920+
848921
func (m*kubeGenericRuntimeManager)updatePodContainerResources(pod*v1.Pod,resourceName v1.ResourceName,containersToUpdate []containerToUpdateInfo)error {
849922
klog.V(5).InfoS("Updating container resources","pod",klog.KObj(pod))
850923

@@ -1406,7 +1479,7 @@ func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, po
14061479
// Step 7: For containers in podContainerChanges.ContainersToUpdate[CPU,Memory] list, invoke UpdateContainerResources
14071480
ifresizable,_:=allocation.IsInPlacePodVerticalScalingAllowed(pod);resizable {
14081481
iflen(podContainerChanges.ContainersToUpdate)>0||podContainerChanges.UpdatePodResources {
1409-
result.SyncResults=append(result.SyncResults,m.doPodResizeAction(pod,podContainerChanges))
1482+
result.SyncResults=append(result.SyncResults,m.doPodResizeAction(ctx,pod,podStatus,podContainerChanges))
14101483
}
14111484
}
14121485

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp