Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitade2d61

Browse files
committed
Improve detection of child-process SIGPIPE failures.
Commitffa4cbd added logic to detect SIGPIPE failure of a COPY childprocess, but it only worked correctly if the SIGPIPE occurred in theimmediate child process. Depending on the shell in use and thecomplexity of the shell command string, we might instead get backan exit code of 128 + SIGPIPE, representing a shell error exitreporting SIGPIPE in the child process.We could just hack up ClosePipeToProgram() to add the extra case,but it seems like this is a fairly general issue deserving a moregeneral and better-documented solution. I chose to add a coupleof functions in src/common/wait_error.c, which is a natural placeto know about wait-result encodings, that will test for either aspecific child-process signal type or any child-process signal failure.Then, adjust other places that were doing ad-hoc tests of this typeto use the common functions.In RestoreArchivedFile, this fixes a race condition affecting whetherthe process will report an error or just silently proc_exit(1): before,that depended on whether the intermediate shell got SIGTERM'd itselfor reported a child process failing on SIGTERM.Like the previous patch, back-patch to v10; we could go furtherbut there seems no real need to.Per report from Erik Rijkers.Discussion:https://postgr.es/m/f3683f87ab1701bea5d86a7742b22432@xs4all.nl
1 parent5e09280 commitade2d61

File tree

5 files changed

+55
-23
lines changed

5 files changed

+55
-23
lines changed

‎src/backend/access/transam/xlogarchive.c

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ RestoreArchivedFile(char *path, const char *xlogfname,
5959
char*endp;
6060
constchar*sp;
6161
intrc;
62-
boolsignaled;
6362
structstatstat_buf;
6463
XLogSegNorestartSegNo;
6564
XLogRecPtrrestartRedoPtr;
@@ -289,17 +288,12 @@ RestoreArchivedFile(char *path, const char *xlogfname,
289288
* will perform an immediate shutdown when it sees us exiting
290289
* unexpectedly.
291290
*
292-
* Per the Single Unix Spec, shells report exit status > 128 when a called
293-
* command died on a signal. Also, 126 and 127 are used to report
294-
* problems such as an unfindable command; treat those as fatal errors
295-
* too.
291+
* We treat hard shell errors such as "command not found" as fatal, too.
296292
*/
297-
if (WIFSIGNALED(rc)&&WTERMSIG(rc)==SIGTERM)
293+
if (wait_result_is_signal(rc,SIGTERM))
298294
proc_exit(1);
299295

300-
signaled=WIFSIGNALED(rc)||WEXITSTATUS(rc)>125;
301-
302-
ereport(signaled ?FATAL :DEBUG2,
296+
ereport(wait_result_is_any_signal(rc, true) ?FATAL :DEBUG2,
303297
(errmsg("could not restore file \"%s\" from archive: %s",
304298
xlogfname,wait_result_to_str(rc))));
305299

@@ -335,7 +329,6 @@ ExecuteRecoveryCommand(const char *command, const char *commandName, bool failOn
335329
char*endp;
336330
constchar*sp;
337331
intrc;
338-
boolsignaled;
339332
XLogSegNorestartSegNo;
340333
XLogRecPtrrestartRedoPtr;
341334
TimeLineIDrestartTli;
@@ -403,12 +396,9 @@ ExecuteRecoveryCommand(const char *command, const char *commandName, bool failOn
403396
{
404397
/*
405398
* If the failure was due to any sort of signal, it's best to punt and
406-
* abort recovery. See also detailed comments on signals in
407-
* RestoreArchivedFile().
399+
* abort recovery. See comments in RestoreArchivedFile().
408400
*/
409-
signaled=WIFSIGNALED(rc)||WEXITSTATUS(rc)>125;
410-
411-
ereport((signaled&&failOnSignal) ?FATAL :WARNING,
401+
ereport((failOnSignal&&wait_result_is_any_signal(rc, true)) ?FATAL :WARNING,
412402
/*------
413403
translator: First %s represents a postgresql.conf parameter name like
414404
"recovery_end_command", the 2nd is the value of that parameter, the

‎src/backend/commands/copy.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
#include<ctype.h>
1818
#include<unistd.h>
1919
#include<sys/stat.h>
20-
#include<sys/wait.h>
2120

2221
#include"access/heapam.h"
2322
#include"access/htup_details.h"
@@ -1718,7 +1717,7 @@ ClosePipeToProgram(CopyState cstate)
17181717
* problem.
17191718
*/
17201719
if (cstate->is_copy_from&& !cstate->reached_eof&&
1721-
WIFSIGNALED(pclose_rc)&&WTERMSIG(pclose_rc)==SIGPIPE)
1720+
wait_result_is_signal(pclose_rc,SIGPIPE))
17221721
return;
17231722

17241723
ereport(ERROR,

‎src/backend/postmaster/pgarch.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -627,13 +627,11 @@ pgarch_archiveXlog(char *xlog)
627627
* If either the shell itself, or a called command, died on a signal,
628628
* abort the archiver. We do this because system() ignores SIGINT and
629629
* SIGQUIT while waiting; so a signal is very likely something that
630-
* should have interrupted us too. If we overreact it's no big deal,
631-
* the postmaster will just start the archiver again.
632-
*
633-
* Per the Single Unix Spec, shells report exit status > 128 when a
634-
* called command died on a signal.
630+
* should have interrupted us too. Also die if the shell got a hard
631+
* "command not found" type of error. If we overreact it's no big
632+
* deal, the postmaster will just start the archiver again.
635633
*/
636-
intlev=(WIFSIGNALED(rc)||WEXITSTATUS(rc)>128) ?FATAL :LOG;
634+
intlev=wait_result_is_any_signal(rc, true) ?FATAL :LOG;
637635

638636
if (WIFEXITED(rc))
639637
{

‎src/common/wait_error.c

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,3 +82,46 @@ wait_result_to_str(int exitstatus)
8282

8383
returnpstrdup(str);
8484
}
85+
86+
/*
87+
* Return true if a wait(2) result indicates that the child process
88+
* died due to the specified signal.
89+
*
90+
* The reason this is worth having a wrapper function for is that
91+
* there are two cases: the signal might have been received by our
92+
* immediate child process, or there might've been a shell process
93+
* between us and the child that died. The shell will, per POSIX,
94+
* report the child death using exit code 128 + signal number.
95+
*
96+
* If there is no possibility of an intermediate shell, this function
97+
* need not (and probably should not) be used.
98+
*/
99+
bool
100+
wait_result_is_signal(intexit_status,intsignum)
101+
{
102+
if (WIFSIGNALED(exit_status)&&WTERMSIG(exit_status)==signum)
103+
return true;
104+
if (WIFEXITED(exit_status)&&WEXITSTATUS(exit_status)==128+signum)
105+
return true;
106+
return false;
107+
}
108+
109+
/*
110+
* Return true if a wait(2) result indicates that the child process
111+
* died due to any signal. We consider either direct child death
112+
* or a shell report of child process death as matching the condition.
113+
*
114+
* If include_command_not_found is true, also return true for shell
115+
* exit codes indicating "command not found" and the like
116+
* (specifically, exit codes 126 and 127; see above).
117+
*/
118+
bool
119+
wait_result_is_any_signal(intexit_status,boolinclude_command_not_found)
120+
{
121+
if (WIFSIGNALED(exit_status))
122+
return true;
123+
if (WIFEXITED(exit_status)&&
124+
WEXITSTATUS(exit_status)> (include_command_not_found ?125 :128))
125+
return true;
126+
return false;
127+
}

‎src/include/port.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -519,5 +519,7 @@ extern char *escape_single_quotes_ascii(const char *src);
519519

520520
/* port/wait_error.c */
521521
externchar*wait_result_to_str(intexit_status);
522+
externboolwait_result_is_signal(intexit_status,intsignum);
523+
externboolwait_result_is_any_signal(intexit_status,boolinclude_command_not_found);
522524

523525
#endif/* PG_PORT_H */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp