|
| 1 | +/*------------------------------------------------------------------------- |
| 2 | + * |
| 3 | + * backup_manifest.c |
| 4 | + * code for generating and sending a backup manifest |
| 5 | + * |
| 6 | + * Portions Copyright (c) 2010-2020, PostgreSQL Global Development Group |
| 7 | + * |
| 8 | + * IDENTIFICATION |
| 9 | + * src/backend/replication/backup_manifest.c |
| 10 | + * |
| 11 | + *------------------------------------------------------------------------- |
| 12 | + */ |
| 13 | +#include"postgres.h" |
| 14 | + |
| 15 | +#include"access/timeline.h" |
| 16 | +#include"libpq/libpq.h" |
| 17 | +#include"libpq/pqformat.h" |
| 18 | +#include"mb/pg_wchar.h" |
| 19 | +#include"replication/backup_manifest.h" |
| 20 | +#include"utils/builtins.h" |
| 21 | +#include"utils/json.h" |
| 22 | + |
| 23 | +/* |
| 24 | + * Does the user want a backup manifest? |
| 25 | + * |
| 26 | + * It's simplest to always have a manifest_info object, so that we don't need |
| 27 | + * checks for NULL pointers in too many places. However, if the user doesn't |
| 28 | + * want a manifest, we set manifest->buffile to NULL. |
| 29 | + */ |
| 30 | +staticinlinebool |
| 31 | +IsManifestEnabled(manifest_info*manifest) |
| 32 | +{ |
| 33 | +return (manifest->buffile!=NULL); |
| 34 | +} |
| 35 | + |
| 36 | +/* |
| 37 | + * Convenience macro for appending data to the backup manifest. |
| 38 | + */ |
| 39 | +#defineAppendToManifest(manifest, ...) \ |
| 40 | +{ \ |
| 41 | +char *_manifest_s = psprintf(__VA_ARGS__);\ |
| 42 | +AppendStringToManifest(manifest, _manifest_s);\ |
| 43 | +pfree(_manifest_s);\ |
| 44 | +} |
| 45 | + |
| 46 | +/* |
| 47 | + * Initialize state so that we can construct a backup manifest. |
| 48 | + * |
| 49 | + * NB: Although the checksum type for the data files is configurable, the |
| 50 | + * checksum for the manifest itself always uses SHA-256. See comments in |
| 51 | + * SendBackupManifest. |
| 52 | + */ |
| 53 | +void |
| 54 | +InitializeManifest(manifest_info*manifest,manifest_optionwant_manifest, |
| 55 | +pg_checksum_typemanifest_checksum_type) |
| 56 | +{ |
| 57 | +if (want_manifest==MANIFEST_OPTION_NO) |
| 58 | +manifest->buffile=NULL; |
| 59 | +else |
| 60 | +manifest->buffile=BufFileCreateTemp(false); |
| 61 | +manifest->checksum_type=manifest_checksum_type; |
| 62 | +pg_sha256_init(&manifest->manifest_ctx); |
| 63 | +manifest->manifest_size=UINT64CONST(0); |
| 64 | +manifest->force_encode= (want_manifest==MANIFEST_OPTION_FORCE_ENCODE); |
| 65 | +manifest->first_file= true; |
| 66 | +manifest->still_checksumming= true; |
| 67 | + |
| 68 | +if (want_manifest!=MANIFEST_OPTION_NO) |
| 69 | +AppendToManifest(manifest, |
| 70 | +"{ \"PostgreSQL-Backup-Manifest-Version\": 1,\n" |
| 71 | +"\"Files\": ["); |
| 72 | +} |
| 73 | + |
| 74 | +/* |
| 75 | + * Append a cstring to the manifest. |
| 76 | + */ |
| 77 | +void |
| 78 | +AppendStringToManifest(manifest_info*manifest,char*s) |
| 79 | +{ |
| 80 | +intlen=strlen(s); |
| 81 | +size_twritten; |
| 82 | + |
| 83 | +Assert(manifest!=NULL); |
| 84 | +if (manifest->still_checksumming) |
| 85 | +pg_sha256_update(&manifest->manifest_ctx, (uint8*)s,len); |
| 86 | +written=BufFileWrite(manifest->buffile,s,len); |
| 87 | +if (written!=len) |
| 88 | +ereport(ERROR, |
| 89 | +(errcode_for_file_access(), |
| 90 | +errmsg("could not write to temporary file: %m"))); |
| 91 | +manifest->manifest_size+=len; |
| 92 | +} |
| 93 | + |
| 94 | +/* |
| 95 | + * Add an entry to the backup manifest for a file. |
| 96 | + */ |
| 97 | +void |
| 98 | +AddFileToManifest(manifest_info*manifest,constchar*spcoid, |
| 99 | +constchar*pathname,size_tsize,pg_time_tmtime, |
| 100 | +pg_checksum_context*checksum_ctx) |
| 101 | +{ |
| 102 | +charpathbuf[MAXPGPATH]; |
| 103 | +intpathlen; |
| 104 | +StringInfoDatabuf; |
| 105 | + |
| 106 | +if (!IsManifestEnabled(manifest)) |
| 107 | +return; |
| 108 | + |
| 109 | +/* |
| 110 | + * If this file is part of a tablespace, the pathname passed to this |
| 111 | + * function will be relative to the tar file that contains it. We want the |
| 112 | + * pathname relative to the data directory (ignoring the intermediate |
| 113 | + * symlink traversal). |
| 114 | + */ |
| 115 | +if (spcoid!=NULL) |
| 116 | +{ |
| 117 | +snprintf(pathbuf,sizeof(pathbuf),"pg_tblspc/%s/%s",spcoid, |
| 118 | +pathname); |
| 119 | +pathname=pathbuf; |
| 120 | +} |
| 121 | + |
| 122 | +/* |
| 123 | + * Each file's entry needs to be separated from any entry that follows by a |
| 124 | + * comma, but there's no comma before the first one or after the last one. |
| 125 | + * To make that work, adding a file to the manifest starts by terminating |
| 126 | + * the most recently added line, with a comma if appropriate, but does not |
| 127 | + * terminate the line inserted for this file. |
| 128 | + */ |
| 129 | +initStringInfo(&buf); |
| 130 | +if (manifest->first_file) |
| 131 | +{ |
| 132 | +appendStringInfoString(&buf,"\n"); |
| 133 | +manifest->first_file= false; |
| 134 | +} |
| 135 | +else |
| 136 | +appendStringInfoString(&buf,",\n"); |
| 137 | + |
| 138 | +/* |
| 139 | + * Write the relative pathname to this file out to the manifest. The |
| 140 | + * manifest is always stored in UTF-8, so we have to encode paths that are |
| 141 | + * not valid in that encoding. |
| 142 | + */ |
| 143 | +pathlen=strlen(pathname); |
| 144 | +if (!manifest->force_encode&& |
| 145 | +pg_verify_mbstr(PG_UTF8,pathname,pathlen, true)) |
| 146 | +{ |
| 147 | +appendStringInfoString(&buf,"{ \"Path\": "); |
| 148 | +escape_json(&buf,pathname); |
| 149 | +appendStringInfoString(&buf,", "); |
| 150 | +} |
| 151 | +else |
| 152 | +{ |
| 153 | +appendStringInfoString(&buf,"{ \"Encoded-Path\": \""); |
| 154 | +enlargeStringInfo(&buf,2*pathlen); |
| 155 | +buf.len+=hex_encode((char*)pathname,pathlen, |
| 156 | +&buf.data[buf.len]); |
| 157 | +appendStringInfoString(&buf,"\", "); |
| 158 | +} |
| 159 | + |
| 160 | +appendStringInfo(&buf,"\"Size\": %zu, ",size); |
| 161 | + |
| 162 | +/* |
| 163 | + * Convert last modification time to a string and append it to the |
| 164 | + * manifest. Since it's not clear what time zone to use and since time |
| 165 | + * zone definitions can change, possibly causing confusion, use GMT |
| 166 | + * always. |
| 167 | + */ |
| 168 | +appendStringInfoString(&buf,"\"Last-Modified\": \""); |
| 169 | +enlargeStringInfo(&buf,128); |
| 170 | +buf.len+=pg_strftime(&buf.data[buf.len],128,"%Y-%m-%d %H:%M:%S %Z", |
| 171 | +pg_gmtime(&mtime)); |
| 172 | +appendStringInfoString(&buf,"\""); |
| 173 | + |
| 174 | +/* Add checksum information. */ |
| 175 | +if (checksum_ctx->type!=CHECKSUM_TYPE_NONE) |
| 176 | +{ |
| 177 | +uint8checksumbuf[PG_CHECKSUM_MAX_LENGTH]; |
| 178 | +intchecksumlen; |
| 179 | + |
| 180 | +checksumlen=pg_checksum_final(checksum_ctx,checksumbuf); |
| 181 | + |
| 182 | +appendStringInfo(&buf, |
| 183 | +", \"Checksum-Algorithm\": \"%s\", \"Checksum\": \"", |
| 184 | +pg_checksum_type_name(checksum_ctx->type)); |
| 185 | +enlargeStringInfo(&buf,2*checksumlen); |
| 186 | +buf.len+=hex_encode((char*)checksumbuf,checksumlen, |
| 187 | +&buf.data[buf.len]); |
| 188 | +appendStringInfoString(&buf,"\""); |
| 189 | +} |
| 190 | + |
| 191 | +/* Close out the object. */ |
| 192 | +appendStringInfoString(&buf," }"); |
| 193 | + |
| 194 | +/* OK, add it to the manifest. */ |
| 195 | +AppendStringToManifest(manifest,buf.data); |
| 196 | + |
| 197 | +/* Avoid leaking memory. */ |
| 198 | +pfree(buf.data); |
| 199 | +} |
| 200 | + |
| 201 | +/* |
| 202 | + * Add information about the WAL that will need to be replayed when restoring |
| 203 | + * this backup to the manifest. |
| 204 | + */ |
| 205 | +void |
| 206 | +AddWALInfoToManifest(manifest_info*manifest,XLogRecPtrstartptr, |
| 207 | +TimeLineIDstarttli,XLogRecPtrendptr,TimeLineIDendtli) |
| 208 | +{ |
| 209 | +List*timelines; |
| 210 | +ListCell*lc; |
| 211 | +boolfirst_wal_range= true; |
| 212 | +boolfound_start_timeline= false; |
| 213 | + |
| 214 | +if (!IsManifestEnabled(manifest)) |
| 215 | +return; |
| 216 | + |
| 217 | +/* Terminate the list of files. */ |
| 218 | +AppendStringToManifest(manifest,"\n],\n"); |
| 219 | + |
| 220 | +/* Read the timeline history for the ending timeline. */ |
| 221 | +timelines=readTimeLineHistory(endtli); |
| 222 | + |
| 223 | +/* Start a list of LSN ranges. */ |
| 224 | +AppendStringToManifest(manifest,"\"WAL-Ranges\": [\n"); |
| 225 | + |
| 226 | +foreach(lc,timelines) |
| 227 | +{ |
| 228 | +TimeLineHistoryEntry*entry=lfirst(lc); |
| 229 | +XLogRecPtrtl_beginptr; |
| 230 | + |
| 231 | +/* |
| 232 | + * We only care about timelines that were active during the backup. |
| 233 | + * Skip any that ended before the backup started. (Note that if |
| 234 | + * entry->end is InvalidXLogRecPtr, it means that the timeline has not |
| 235 | + * yet ended.) |
| 236 | + */ |
| 237 | +if (!XLogRecPtrIsInvalid(entry->end)&&entry->end<startptr) |
| 238 | +continue; |
| 239 | + |
| 240 | +/* |
| 241 | + * Because the timeline history file lists newer timelines before |
| 242 | + * older ones, the first timeline we encounter that is new enough to |
| 243 | + * matter ought to match the ending timeline of the backup. |
| 244 | + */ |
| 245 | +if (first_wal_range&&endtli!=entry->tli) |
| 246 | +ereport(ERROR, |
| 247 | +errmsg("expected end timeline %u but found timeline %u", |
| 248 | +starttli,entry->tli)); |
| 249 | + |
| 250 | +if (!XLogRecPtrIsInvalid(entry->begin)) |
| 251 | +tl_beginptr=entry->begin; |
| 252 | +else |
| 253 | +{ |
| 254 | +tl_beginptr=startptr; |
| 255 | + |
| 256 | +/* |
| 257 | + * If we reach a TLI that has no valid beginning LSN, there can't |
| 258 | + * be any more timelines in the history after this point, so we'd |
| 259 | + * better have arrived at the expected starting TLI. If not, |
| 260 | + * something's gone horribly wrong. |
| 261 | + */ |
| 262 | +if (starttli!=entry->tli) |
| 263 | +ereport(ERROR, |
| 264 | +errmsg("expected start timeline %u but found timeline %u", |
| 265 | +starttli,entry->tli)); |
| 266 | +} |
| 267 | + |
| 268 | +AppendToManifest(manifest, |
| 269 | +"%s{ \"Timeline\": %u, \"Start-LSN\": \"%X/%X\", \"End-LSN\": \"%X/%X\" }", |
| 270 | +first_wal_range ?"" :",\n", |
| 271 | +entry->tli, |
| 272 | + (uint32) (tl_beginptr >>32), (uint32)tl_beginptr, |
| 273 | + (uint32) (endptr >>32), (uint32)endptr); |
| 274 | + |
| 275 | +if (starttli==entry->tli) |
| 276 | +{ |
| 277 | +found_start_timeline= true; |
| 278 | +break; |
| 279 | +} |
| 280 | + |
| 281 | +endptr=entry->begin; |
| 282 | +first_wal_range= false; |
| 283 | +} |
| 284 | + |
| 285 | +/* |
| 286 | + * The last entry in the timeline history for the ending timeline should |
| 287 | + * be the ending timeline itself. Verify that this is what we observed. |
| 288 | + */ |
| 289 | +if (!found_start_timeline) |
| 290 | +ereport(ERROR, |
| 291 | +errmsg("start timeline %u not found history of timeline %u", |
| 292 | +starttli,endtli)); |
| 293 | + |
| 294 | +/* Terminate the list of WAL ranges. */ |
| 295 | +AppendStringToManifest(manifest,"\n],\n"); |
| 296 | +} |
| 297 | + |
| 298 | +/* |
| 299 | + * Finalize the backup manifest, and send it to the client. |
| 300 | + */ |
| 301 | +void |
| 302 | +SendBackupManifest(manifest_info*manifest) |
| 303 | +{ |
| 304 | +StringInfoDataprotobuf; |
| 305 | +uint8checksumbuf[PG_SHA256_DIGEST_LENGTH]; |
| 306 | +charchecksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH]; |
| 307 | +size_tmanifest_bytes_done=0; |
| 308 | + |
| 309 | +if (!IsManifestEnabled(manifest)) |
| 310 | +return; |
| 311 | + |
| 312 | +/* |
| 313 | + * Append manifest checksum, so that the problems with the manifest itself |
| 314 | + * can be detected. |
| 315 | + * |
| 316 | + * We always use SHA-256 for this, regardless of what algorithm is chosen |
| 317 | + * for checksumming the files. If we ever want to make the checksum |
| 318 | + * algorithm used for the manifest file variable, the client will need a |
| 319 | + * way to figure out which algorithm to use as close to the beginning of |
| 320 | + * the manifest file as possible, to avoid having to read the whole thing |
| 321 | + * twice. |
| 322 | + */ |
| 323 | +manifest->still_checksumming= false; |
| 324 | +pg_sha256_final(&manifest->manifest_ctx,checksumbuf); |
| 325 | +AppendStringToManifest(manifest,"\"Manifest-Checksum\": \""); |
| 326 | +hex_encode((char*)checksumbuf,sizeofchecksumbuf,checksumstringbuf); |
| 327 | +checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH-1]='\0'; |
| 328 | +AppendStringToManifest(manifest,checksumstringbuf); |
| 329 | +AppendStringToManifest(manifest,"\"}\n"); |
| 330 | + |
| 331 | +/* |
| 332 | + * We've written all the data to the manifest file. Rewind the file so |
| 333 | + * that we can read it all back. |
| 334 | + */ |
| 335 | +if (BufFileSeek(manifest->buffile,0,0L,SEEK_SET)) |
| 336 | +ereport(ERROR, |
| 337 | +(errcode_for_file_access(), |
| 338 | +errmsg("could not rewind temporary file: %m"))); |
| 339 | + |
| 340 | +/* Send CopyOutResponse message */ |
| 341 | +pq_beginmessage(&protobuf,'H'); |
| 342 | +pq_sendbyte(&protobuf,0);/* overall format */ |
| 343 | +pq_sendint16(&protobuf,0);/* natts */ |
| 344 | +pq_endmessage(&protobuf); |
| 345 | + |
| 346 | +/* |
| 347 | + * Send CopyData messages. |
| 348 | + * |
| 349 | + * We choose to read back the data from the temporary file in chunks of |
| 350 | + * size BLCKSZ; this isn't necessary, but buffile.c uses that as the I/O |
| 351 | + * size, so it seems to make sense to match that value here. |
| 352 | + */ |
| 353 | +while (manifest_bytes_done<manifest->manifest_size) |
| 354 | +{ |
| 355 | +charmanifestbuf[BLCKSZ]; |
| 356 | +size_tbytes_to_read; |
| 357 | +size_trc; |
| 358 | + |
| 359 | +bytes_to_read=Min(sizeof(manifestbuf), |
| 360 | +manifest->manifest_size-manifest_bytes_done); |
| 361 | +rc=BufFileRead(manifest->buffile,manifestbuf,bytes_to_read); |
| 362 | +if (rc!=bytes_to_read) |
| 363 | +ereport(ERROR, |
| 364 | +(errcode_for_file_access(), |
| 365 | +errmsg("could not read from temporary file: %m"))); |
| 366 | +pq_putmessage('d',manifestbuf,bytes_to_read); |
| 367 | +manifest_bytes_done+=bytes_to_read; |
| 368 | +} |
| 369 | + |
| 370 | +/* No more data, so send CopyDone message */ |
| 371 | +pq_putemptymessage('c'); |
| 372 | + |
| 373 | +/* Release resources */ |
| 374 | +BufFileClose(manifest->buffile); |
| 375 | +} |