@@ -367,41 +367,84 @@ def version(self):
367367return self ._pg_version
368368
369369def _try_shutdown (self ,max_attempts ,with_force = False ):
370+ assert type (max_attempts )== int # noqa: E721
371+ assert type (with_force )== bool # noqa: E721
372+ assert max_attempts > 0
373+
370374attempts = 0
375+
376+ # try stopping server N times
377+ while attempts < max_attempts :
378+ attempts += 1
379+ try :
380+ self .stop ()
381+ except ExecUtilException :
382+ continue # one more time
383+ except Exception :
384+ eprint ('cannot stop node {}' .format (self .name ))
385+ break
386+
387+ return # OK
388+
389+ # If force stopping is enabled and PID is valid
390+ if not with_force :
391+ return False
392+
371393node_pid = self .pid
394+ assert node_pid is not None
395+ assert type (node_pid )== int # noqa: E721
372396
373- if node_pid > 0 :
374- # try stopping server N times
375- while attempts < max_attempts :
376- try :
377- self .stop ()
378- break # OK
379- except ExecUtilException :
380- pass # one more time
381- except Exception :
382- eprint ('cannot stop node {}' .format (self .name ))
383- break
384-
385- attempts += 1
386-
387- # If force stopping is enabled and PID is valid
388- if with_force and node_pid != 0 :
389- # If we couldn't stop the node
390- p_status_output = self .os_ops .exec_command (cmd = f'ps -o pid= -p{ node_pid } ' ,shell = True ,ignore_errors = True ).decode ('utf-8' )
391- if self .status ()!= NodeStatus .Stopped and p_status_output and str (node_pid )in p_status_output :
392- try :
393- eprint (f'Force stopping node{ self .name } with PID{ node_pid } ' )
394- self .os_ops .kill (node_pid ,signal .SIGKILL ,expect_error = False )
395- except Exception :
396- # The node has already stopped
397- pass
398-
399- # Check that node stopped - print only column pid without headers
400- p_status_output = self .os_ops .exec_command (f'ps -o pid= -p{ node_pid } ' ,shell = True ,ignore_errors = True ).decode ('utf-8' )
401- if p_status_output and str (node_pid )in p_status_output :
402- eprint (f'Failed to stop node{ self .name } .' )
403- else :
404- eprint (f'Node{ self .name } has been stopped successfully.' )
397+ if node_pid == 0 :
398+ return
399+
400+ # TODO: [2025-02-28] It is really the old ugly code. We have to rewrite it!
401+
402+ ps_command = ['ps' ,'-o' ,'pid=' ,'-p' ,str (node_pid )]
403+
404+ ps_output = self .os_ops .exec_command (cmd = ps_command ,shell = True ,ignore_errors = True ).decode ('utf-8' )
405+ assert type (ps_output )== str # noqa: E721
406+
407+ if ps_output == "" :
408+ return
409+
410+ if ps_output != str (node_pid ):
411+ __class__ ._throw_bugcheck__unexpected_result_of_ps (
412+ ps_output ,
413+ ps_command )
414+
415+ try :
416+ eprint ('Force stopping node {0} with PID {1}' .format (self .name ,node_pid ))
417+ self .os_ops .kill (node_pid ,signal .SIGKILL ,expect_error = False )
418+ except Exception :
419+ # The node has already stopped
420+ pass
421+
422+ # Check that node stopped - print only column pid without headers
423+ ps_output = self .os_ops .exec_command (cmd = ps_command ,shell = True ,ignore_errors = True ).decode ('utf-8' )
424+ assert type (ps_output )== str # noqa: E721
425+
426+ if ps_output == "" :
427+ eprint ('Node {0} has been stopped successfully.' .format (self .name ))
428+ return
429+
430+ if ps_output == str (node_pid ):
431+ eprint ('Failed to stop node {0}.' .format (self .name ))
432+ return
433+
434+ __class__ ._throw_bugcheck__unexpected_result_of_ps (
435+ ps_output ,
436+ ps_command )
437+
438+ @staticmethod
439+ def _throw_bugcheck__unexpected_result_of_ps (result ,cmd ):
440+ assert type (result )== str # noqa: E721
441+ assert type (cmd )== list # noqa: E721
442+ errLines = []
443+ errLines .append ("[BUG CHECK] Unexpected result of command ps:" )
444+ errLines .append (result )
445+ errLines .append ("-----" )
446+ errLines .append ("Command line is {0}" .format (cmd ))
447+ raise RuntimeError ("\n " .join (errLines ))
405448
406449def _assign_master (self ,master ):
407450"""NOTE: this is a private method!"""