13
13
#include <dirent.h>
14
14
#include <fcntl.h>
15
15
#include <libgen.h>
16
+ #include <signal.h>
16
17
#include <sys/file.h>
17
18
#include <sys/stat.h>
18
19
#include <sys/types.h>
@@ -26,42 +27,185 @@ static pgBackup *read_backup_from_file(const char *path);
26
27
27
28
#define BOOL_TO_STR (val )((val) ? "true" : "false")
28
29
29
- static int lock_fd = -1 ;
30
+ static bool exit_hook_registered = false;
31
+ static char lock_file [MAXPGPATH ];
32
+
33
+ static void
34
+ unlink_lock_atexit (void )
35
+ {
36
+ int res ;
37
+ res = unlink (lock_file );
38
+ if (res != 0 && res != ENOENT )
39
+ elog (WARNING ,"%s: %s" ,lock_file ,strerror (errno ));
40
+ }
30
41
31
42
/*
32
- * Lock of the catalog with pg_probackup.conf file and return 0.
33
- * If the lock is held by another one, return 1 immediately.
43
+ * Create a lockfile.
34
44
*/
35
45
int
36
46
catalog_lock (bool check_catalog )
37
47
{
38
- int ret ;
39
- char id_path [MAXPGPATH ];
40
-
41
- join_path_components (id_path ,backup_path ,BACKUP_CATALOG_CONF_FILE );
42
- lock_fd = open (id_path ,O_RDWR );
43
- if (lock_fd == -1 )
44
- elog (errno == ENOENT ?ERROR :ERROR ,
45
- "cannot open file \"%s\": %s" ,id_path ,strerror (errno ));
46
- #ifdef __IBMC__
47
- ret = lockf (lock_fd ,LOCK_EX |LOCK_NB ,0 );/* non-blocking */
48
+ int fd ;
49
+ char buffer [MAXPGPATH * 2 + 256 ];
50
+ int ntries ;
51
+ int len ;
52
+ int encoded_pid ;
53
+ pid_t my_pid ,
54
+ my_p_pid ;
55
+
56
+ join_path_components (lock_file ,backup_path ,BACKUP_CATALOG_PID );
57
+
58
+ /*
59
+ * If the PID in the lockfile is our own PID or our parent's or
60
+ * grandparent's PID, then the file must be stale (probably left over from
61
+ * a previous system boot cycle). We need to check this because of the
62
+ * likelihood that a reboot will assign exactly the same PID as we had in
63
+ * the previous reboot, or one that's only one or two counts larger and
64
+ * hence the lockfile's PID now refers to an ancestor shell process. We
65
+ * allow pg_ctl to pass down its parent shell PID (our grandparent PID)
66
+ * via the environment variable PG_GRANDPARENT_PID; this is so that
67
+ * launching the postmaster via pg_ctl can be just as reliable as
68
+ * launching it directly. There is no provision for detecting
69
+ * further-removed ancestor processes, but if the init script is written
70
+ * carefully then all but the immediate parent shell will be root-owned
71
+ * processes and so the kill test will fail with EPERM. Note that we
72
+ * cannot get a false negative this way, because an existing postmaster
73
+ * would surely never launch a competing postmaster or pg_ctl process
74
+ * directly.
75
+ */
76
+ my_pid = getpid ();
77
+ #ifndef WIN32
78
+ my_p_pid = getppid ();
48
79
#else
49
- ret = flock (lock_fd ,LOCK_EX |LOCK_NB );/* non-blocking */
80
+
81
+ /*
82
+ * Windows hasn't got getppid(), but doesn't need it since it's not using
83
+ * real kill() either...
84
+ */
85
+ my_p_pid = 0 ;
50
86
#endif
51
- if (ret == -1 )
87
+
88
+ /*
89
+ * We need a loop here because of race conditions. But don't loop forever
90
+ * (for example, a non-writable $backup_path directory might cause a failure
91
+ * that won't go away). 100 tries seems like plenty.
92
+ */
93
+ for (ntries = 0 ;;ntries ++ )
52
94
{
53
- if (errno == EWOULDBLOCK )
95
+ /*
96
+ * Try to create the lock file --- O_EXCL makes this atomic.
97
+ *
98
+ * Think not to make the file protection weaker than 0600. See
99
+ * comments below.
100
+ */
101
+ fd = open (lock_file ,O_RDWR |O_CREAT |O_EXCL ,0600 );
102
+ if (fd >=0 )
103
+ break ;/* Success; exit the retry loop */
104
+
105
+ /*
106
+ * Couldn't create the pid file. Probably it already exists.
107
+ */
108
+ if ((errno != EEXIST && errno != EACCES )|| ntries > 100 )
109
+ elog (ERROR ,"could not create lock file \"%s\": %s" ,
110
+ lock_file ,strerror (errno ));
111
+
112
+ /*
113
+ * Read the file to get the old owner's PID. Note race condition
114
+ * here: file might have been deleted since we tried to create it.
115
+ */
116
+ fd = open (lock_file ,O_RDONLY ,0600 );
117
+ if (fd < 0 )
54
118
{
55
- close (lock_fd );
56
- return 1 ;
119
+ if (errno == ENOENT )
120
+ continue ;/* race condition; try again */
121
+ elog (ERROR ,"could not open lock file \"%s\": %s" ,
122
+ lock_file ,strerror (errno ));
57
123
}
58
- else
124
+ if ((len = read (fd ,buffer ,sizeof (buffer )- 1 ))< 0 )
125
+ elog (ERROR ,"could not read lock file \"%s\": %s" ,
126
+ lock_file ,strerror (errno ));
127
+ close (fd );
128
+
129
+ if (len == 0 )
130
+ elog (ERROR ,"lock file \"%s\" is empty" ,lock_file );
131
+
132
+ buffer [len ]= '\0' ;
133
+ encoded_pid = atoi (buffer );
134
+
135
+ if (encoded_pid <=0 )
136
+ elog (ERROR ,"bogus data in lock file \"%s\": \"%s\"" ,
137
+ lock_file ,buffer );
138
+
139
+ /*
140
+ * Check to see if the other process still exists
141
+ *
142
+ * Per discussion above, my_pid, my_p_pid can be
143
+ * ignored as false matches.
144
+ *
145
+ * Normally kill() will fail with ESRCH if the given PID doesn't
146
+ * exist.
147
+ */
148
+ if (encoded_pid != my_pid && encoded_pid != my_p_pid )
59
149
{
60
- int errno_tmp = errno ;
61
- close (lock_fd );
62
- elog (ERROR ,"cannot lock file \"%s\": %s" ,id_path ,
63
- strerror (errno_tmp ));
150
+ if (kill (encoded_pid ,0 )== 0 ||
151
+ (errno != ESRCH && errno != EPERM ))
152
+ elog (ERROR ,"lock file \"%s\" already exists" ,lock_file );
64
153
}
154
+
155
+ /*
156
+ * Looks like nobody's home. Unlink the file and try again to create
157
+ * it. Need a loop because of possible race condition against other
158
+ * would-be creators.
159
+ */
160
+ if (unlink (lock_file )< 0 )
161
+ elog (ERROR ,"could not remove old lock file \"%s\": %s" ,
162
+ lock_file ,strerror (errno ));
163
+ }
164
+
165
+ /*
166
+ * Successfully created the file, now fill it.
167
+ */
168
+ snprintf (buffer ,sizeof (buffer ),"%d\n" ,my_pid );
169
+
170
+ errno = 0 ;
171
+ if (write (fd ,buffer ,strlen (buffer ))!= strlen (buffer ))
172
+ {
173
+ int save_errno = errno ;
174
+
175
+ close (fd );
176
+ unlink (lock_file );
177
+ /* if write didn't set errno, assume problem is no disk space */
178
+ errno = save_errno ?save_errno :ENOSPC ;
179
+ elog (ERROR ,"could not write lock file \"%s\": %s" ,
180
+ lock_file ,strerror (errno ));
181
+ }
182
+ if (fsync (fd )!= 0 )
183
+ {
184
+ int save_errno = errno ;
185
+
186
+ close (fd );
187
+ unlink (lock_file );
188
+ errno = save_errno ;
189
+ elog (ERROR ,"could not write lock file \"%s\": %s" ,
190
+ lock_file ,strerror (errno ));
191
+ }
192
+ if (close (fd )!= 0 )
193
+ {
194
+ int save_errno = errno ;
195
+
196
+ unlink (lock_file );
197
+ errno = save_errno ;
198
+ elog (ERROR ,"could not write lock file \"%s\": %s" ,
199
+ lock_file ,strerror (errno ));
200
+ }
201
+
202
+ /*
203
+ * Arrange to unlink the lock file(s) at proc_exit.
204
+ */
205
+ if (!exit_hook_registered )
206
+ {
207
+ atexit (unlink_lock_atexit );
208
+ exit_hook_registered = true;
65
209
}
66
210
67
211
if (check_catalog )
@@ -80,16 +224,6 @@ catalog_lock(bool check_catalog)
80
224
return 0 ;
81
225
}
82
226
83
- /*
84
- * Release catalog lock.
85
- */
86
- void
87
- catalog_unlock (void )
88
- {
89
- close (lock_fd );
90
- lock_fd = -1 ;
91
- }
92
-
93
227
/*
94
228
* Create a pgBackup which taken at timestamp.
95
229
* If no backup matches, return NULL.