@@ -34,6 +34,7 @@ import (
34
34
"k8s.io/apimachinery/pkg/api/resource"
35
35
metav1"k8s.io/apimachinery/pkg/apis/meta/v1"
36
36
kubetypes"k8s.io/apimachinery/pkg/types"
37
+ utilerrors"k8s.io/apimachinery/pkg/util/errors"
37
38
utilruntime"k8s.io/apimachinery/pkg/util/runtime"
38
39
utilversion"k8s.io/apimachinery/pkg/util/version"
39
40
utilfeature"k8s.io/apiserver/pkg/util/feature"
@@ -696,7 +697,7 @@ func (m *kubeGenericRuntimeManager) computePodResizeAction(pod *v1.Pod, containe
696
697
return true
697
698
}
698
699
699
- func (m * kubeGenericRuntimeManager )doPodResizeAction (pod * v1.Pod ,podContainerChanges podActions )* kubecontainer.SyncResult {
700
+ func (m * kubeGenericRuntimeManager )doPodResizeAction (ctx context. Context , pod * v1.Pod , podStatus * kubecontainer. PodStatus ,podContainerChanges podActions )* kubecontainer.SyncResult {
700
701
resizeResult := kubecontainer .NewSyncResult (kubecontainer .ResizePodInPlace ,format .Pod (pod ))
701
702
pcm := m .containerManager .NewPodContainerManager ()
702
703
//TODO(vinaykul,InPlacePodVerticalScaling): Figure out best way to get enforceMemoryQoS value (parameter #4 below) in platform-agnostic way
@@ -708,26 +709,34 @@ func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podContainerC
708
709
podResources := cm .ResourceConfigForPod (pod ,enforceCPULimits ,uint64 ((m .cpuCFSQuotaPeriod .Duration )/ time .Microsecond ),false )
709
710
if podResources == nil {
710
711
klog .ErrorS (nil ,"Unable to get resource configuration" ,"pod" ,klog .KObj (pod ))
711
- resizeResult .Fail (kubecontainer .ErrResizePodInPlace ,fmt .Sprintf ("unable to get resource configuration processing resize for pod %s " ,pod . Name ))
712
+ resizeResult .Fail (kubecontainer .ErrResizePodInPlace ,fmt .Sprintf ("unable to get resource configuration processing resize for pod %q " ,format . Pod ( pod ) ))
712
713
return resizeResult
713
714
}
714
715
currentPodMemoryConfig ,err := pcm .GetPodCgroupConfig (pod ,v1 .ResourceMemory )
715
716
if err != nil {
716
717
klog .ErrorS (nil ,"Unable to get pod cgroup memory config" ,"pod" ,klog .KObj (pod ))
717
- resizeResult .Fail (kubecontainer .ErrResizePodInPlace ,fmt .Sprintf ("unable to get pod cgroup memory config for pod %s " ,pod . Name ))
718
+ resizeResult .Fail (kubecontainer .ErrResizePodInPlace ,fmt .Sprintf ("unable to get pod cgroup memory config for pod %q " ,format . Pod ( pod ) ))
718
719
return resizeResult
719
720
}
720
721
currentPodCPUConfig ,err := pcm .GetPodCgroupConfig (pod ,v1 .ResourceCPU )
721
722
if err != nil {
722
723
klog .ErrorS (nil ,"Unable to get pod cgroup cpu config" ,"pod" ,klog .KObj (pod ))
723
- resizeResult .Fail (kubecontainer .ErrResizePodInPlace ,fmt .Sprintf ("unable to get pod cgroup cpu config for pod %s " ,pod . Name ))
724
+ resizeResult .Fail (kubecontainer .ErrResizePodInPlace ,fmt .Sprintf ("unable to get pod cgroup cpu config for pod %q " ,format . Pod ( pod ) ))
724
725
return resizeResult
725
726
}
726
727
727
728
currentPodResources := podResources
728
729
currentPodResources = mergeResourceConfig (currentPodResources ,currentPodMemoryConfig )
729
730
currentPodResources = mergeResourceConfig (currentPodResources ,currentPodCPUConfig )
730
731
732
+ // Before proceeding with the resize, perform a best-effort check to catch potential resize
733
+ // errors in order to avoid a partial-resize state.
734
+ if err := m .validatePodResizeAction (ctx ,pod ,podStatus ,currentPodResources ,podResources ,podContainerChanges );err != nil {
735
+ klog .ErrorS (err ,"Allocated pod resize is not currently feasible" ,"pod" ,klog .KObj (pod ))
736
+ resizeResult .Fail (kubecontainer .ErrResizePodInPlace ,err .Error ())
737
+ return resizeResult
738
+ }
739
+
731
740
setPodCgroupConfig := func (rName v1.ResourceName ,setLimitValue bool )error {
732
741
var err error
733
742
resizedResources := & cm.ResourceConfig {}
@@ -800,19 +809,7 @@ func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podContainerC
800
809
defer m .runtimeHelper .SetPodWatchCondition (pod .UID ,"doPodResizeAction" ,func (* kubecontainer.PodStatus )bool {return true })
801
810
802
811
if len (podContainerChanges .ContainersToUpdate [v1 .ResourceMemory ])> 0 || podContainerChanges .UpdatePodResources {
803
- if podResources .Memory != nil {
804
- currentPodMemoryUsage ,err := pcm .GetPodCgroupMemoryUsage (pod )
805
- if err != nil {
806
- klog .ErrorS (err ,"GetPodCgroupMemoryUsage failed" ,"pod" ,pod .Name )
807
- resizeResult .Fail (kubecontainer .ErrResizePodInPlace ,err .Error ())
808
- return resizeResult
809
- }
810
- if currentPodMemoryUsage >= uint64 (* podResources .Memory ) {
811
- klog .ErrorS (nil ,"Aborting attempt to set pod memory limit less than current memory usage" ,"pod" ,pod .Name )
812
- resizeResult .Fail (kubecontainer .ErrResizePodInPlace ,fmt .Sprintf ("aborting attempt to set pod memory limit less than current memory usage for pod %s" ,pod .Name ))
813
- return resizeResult
814
- }
815
- }else {
812
+ if podResources .Memory == nil {
816
813
// Default pod memory limit to the current memory limit if unset to prevent it from updating.
817
814
// TODO(#128675): This does not support removing limits.
818
815
podResources .Memory = currentPodMemoryConfig .Memory
@@ -845,6 +842,82 @@ func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podContainerC
845
842
return resizeResult
846
843
}
847
844
845
+ // validatePodResizeAction checks whether the proposed resize actions are currently viable.
846
+ func (m * kubeGenericRuntimeManager )validatePodResizeAction (
847
+ ctx context.Context ,
848
+ pod * v1.Pod ,
849
+ podStatus * kubecontainer.PodStatus ,
850
+ currentPodResources ,desiredPodResources * cm.ResourceConfig ,
851
+ podContainerChanges podActions ,
852
+ )error {
853
+ if len (podContainerChanges .ContainersToUpdate [v1 .ResourceMemory ])> 0 || podContainerChanges .UpdatePodResources {
854
+ return m .validateMemoryResizeAction (ctx ,pod ,podStatus ,currentPodResources ,desiredPodResources ,podContainerChanges )
855
+ }
856
+
857
+ return nil
858
+ }
859
+
860
+ func (m * kubeGenericRuntimeManager )validateMemoryResizeAction (
861
+ ctx context.Context ,
862
+ pod * v1.Pod ,
863
+ podStatus * kubecontainer.PodStatus ,
864
+ currentPodResources ,desiredPodResources * cm.ResourceConfig ,
865
+ podContainerChanges podActions ,
866
+ )error {
867
+ // Determine which memory limits are decreasing.
868
+ podLimitDecreasing := desiredPodResources .Memory != nil &&
869
+ (currentPodResources .Memory == nil || // Pod memory limit added
870
+ * desiredPodResources .Memory < * currentPodResources .Memory )// Pod memory limit decreasing
871
+
872
+ decreasingContainerLimits := map [string ]int64 {}// Map of container name to desired memory limit.
873
+ for _ ,cUpdate := range podContainerChanges .ContainersToUpdate [v1 .ResourceMemory ] {
874
+ if cUpdate .desiredContainerResources .memoryLimit != 0 {
875
+ if cUpdate .currentContainerResources == nil || cUpdate .currentContainerResources .memoryLimit == 0 || // Limit added
876
+ cUpdate .desiredContainerResources .memoryLimit < cUpdate .currentContainerResources .memoryLimit {// Limit decreasing
877
+ decreasingContainerLimits [cUpdate .container .Name ]= cUpdate .desiredContainerResources .memoryLimit
878
+ }
879
+ }
880
+ }
881
+
882
+ if ! podLimitDecreasing && len (decreasingContainerLimits )== 0 {
883
+ // No memory limits are decreasing: nothing else to check here.
884
+ return nil
885
+ }
886
+
887
+ // Check whether any of the new memory limits are below current memory usage.
888
+ podUsageStats ,err := m .runtimeHelper .PodCPUAndMemoryStats (ctx ,pod ,podStatus )
889
+ if err != nil {
890
+ return fmt .Errorf ("unable to read memory usage for pod %q" ,format .Pod (pod ))
891
+ }
892
+
893
+ var errs []error
894
+ if podLimitDecreasing {
895
+ if podUsageStats .Memory == nil || podUsageStats .Memory .UsageBytes == nil {
896
+ errs = append (errs ,fmt .Errorf ("missing pod memory usage" ))
897
+ }else if * podUsageStats .Memory .UsageBytes >= uint64 (* desiredPodResources .Memory ) {
898
+ errs = append (errs ,fmt .Errorf ("attempting to set pod memory limit (%d) below current usage (%d)" ,
899
+ * desiredPodResources .Memory ,* podUsageStats .Memory .UsageBytes ))
900
+ }
901
+ }
902
+ for _ ,cStats := range podUsageStats .Containers {
903
+ if desiredLimit ,ok := decreasingContainerLimits [cStats .Name ];ok {
904
+ if cStats .Memory == nil || cStats .Memory .UsageBytes == nil {
905
+ errs = append (errs ,fmt .Errorf ("missing container %q memory usage" ,cStats .Name ))
906
+ }else if * cStats .Memory .UsageBytes >= uint64 (desiredLimit ) {
907
+ errs = append (errs ,fmt .Errorf ("attempting to set container %q memory limit (%d) below current usage (%d)" ,
908
+ cStats .Name ,desiredLimit ,* podUsageStats .Memory .UsageBytes ))
909
+ }
910
+ }
911
+ }
912
+
913
+ if len (errs )> 0 {
914
+ agg := utilerrors .NewAggregate (errs )
915
+ return fmt .Errorf ("cannot decrease memory limits: %w" ,agg )
916
+ }
917
+
918
+ return nil
919
+ }
920
+
848
921
func (m * kubeGenericRuntimeManager )updatePodContainerResources (pod * v1.Pod ,resourceName v1.ResourceName ,containersToUpdate []containerToUpdateInfo )error {
849
922
klog .V (5 ).InfoS ("Updating container resources" ,"pod" ,klog .KObj (pod ))
850
923
@@ -1406,7 +1479,7 @@ func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, po
1406
1479
// Step 7: For containers in podContainerChanges.ContainersToUpdate[CPU,Memory] list, invoke UpdateContainerResources
1407
1480
if resizable ,_ := allocation .IsInPlacePodVerticalScalingAllowed (pod );resizable {
1408
1481
if len (podContainerChanges .ContainersToUpdate )> 0 || podContainerChanges .UpdatePodResources {
1409
- result .SyncResults = append (result .SyncResults ,m .doPodResizeAction (pod ,podContainerChanges ))
1482
+ result .SyncResults = append (result .SyncResults ,m .doPodResizeAction (ctx , pod , podStatus ,podContainerChanges ))
1410
1483
}
1411
1484
}
1412
1485