@@ -6326,6 +6326,8 @@ heap_inplace_update_and_unlock(Relation relation,
6326
6326
HeapTupleHeader htup = oldtup -> t_data ;
6327
6327
uint32 oldlen ;
6328
6328
uint32 newlen ;
6329
+ char * dst ;
6330
+ char * src ;
6329
6331
int nmsgs = 0 ;
6330
6332
SharedInvalidationMessage * invalMessages = NULL ;
6331
6333
bool RelcacheInitFileInval = false;
@@ -6336,6 +6338,9 @@ heap_inplace_update_and_unlock(Relation relation,
6336
6338
if (oldlen != newlen || htup -> t_hoff != tuple -> t_data -> t_hoff )
6337
6339
elog (ERROR ,"wrong tuple length" );
6338
6340
6341
+ dst = (char * )htup + htup -> t_hoff ;
6342
+ src = (char * )tuple -> t_data + tuple -> t_data -> t_hoff ;
6343
+
6339
6344
/*
6340
6345
* Construct shared cache inval if necessary. Note that because we only
6341
6346
* pass the new version of the tuple, this mustn't be used for any
@@ -6359,15 +6364,15 @@ heap_inplace_update_and_unlock(Relation relation,
6359
6364
*/
6360
6365
PreInplace_Inval ();
6361
6366
6362
- /* NO EREPORT(ERROR) from here till changes are logged */
6363
- START_CRIT_SECTION ();
6364
-
6365
- memcpy ((char * )htup + htup -> t_hoff ,
6366
- (char * )tuple -> t_data + tuple -> t_data -> t_hoff ,
6367
- newlen );
6368
-
6369
6367
/*----------
6370
- * XXX A crash here can allow datfrozenxid() to get ahead of relfrozenxid:
6368
+ * NO EREPORT(ERROR) from here till changes are complete
6369
+ *
6370
+ * Our buffer lock won't stop a reader having already pinned and checked
6371
+ * visibility for this tuple. Hence, we write WAL first, then mutate the
6372
+ * buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(),
6373
+ * checkpoint delay makes that acceptable. With the usual order of
6374
+ * changes, a crash after memcpy() and before XLogInsert() could allow
6375
+ * datfrozenxid to overtake relfrozenxid:
6371
6376
*
6372
6377
* ["D" is a VACUUM (ONLY_DATABASE_STATS)]
6373
6378
* ["R" is a VACUUM tbl]
@@ -6377,14 +6382,28 @@ heap_inplace_update_and_unlock(Relation relation,
6377
6382
* D: raise pg_database.datfrozenxid, XLogInsert(), finish
6378
6383
* [crash]
6379
6384
* [recovery restores datfrozenxid w/o relfrozenxid]
6385
+ *
6386
+ * Like in MarkBufferDirtyHint() subroutine XLogSaveBufferForHint(), copy
6387
+ * the buffer to the stack before logging. Here, that facilitates a FPI
6388
+ * of the post-mutation block before we accept other sessions seeing it.
6380
6389
*/
6381
-
6382
- MarkBufferDirty (buffer );
6390
+ Assert ((MyProc -> delayChkptFlags & DELAY_CHKPT_START )== 0 );
6391
+ START_CRIT_SECTION ();
6392
+ MyProc -> delayChkptFlags |=DELAY_CHKPT_START ;
6383
6393
6384
6394
/* XLOG stuff */
6385
6395
if (RelationNeedsWAL (relation ))
6386
6396
{
6387
6397
xl_heap_inplace xlrec ;
6398
+ PGAlignedBlock copied_buffer ;
6399
+ char * origdata = (char * )BufferGetBlock (buffer );
6400
+ Page page = BufferGetPage (buffer );
6401
+ uint16 lower = ((PageHeader )page )-> pd_lower ;
6402
+ uint16 upper = ((PageHeader )page )-> pd_upper ;
6403
+ uintptr_t dst_offset_in_block ;
6404
+ RelFileLocator rlocator ;
6405
+ ForkNumber forkno ;
6406
+ BlockNumber blkno ;
6388
6407
XLogRecPtr recptr ;
6389
6408
6390
6409
xlrec .offnum = ItemPointerGetOffsetNumber (& tuple -> t_self );
@@ -6399,16 +6418,28 @@ heap_inplace_update_and_unlock(Relation relation,
6399
6418
XLogRegisterData ((char * )invalMessages ,
6400
6419
nmsgs * sizeof (SharedInvalidationMessage ));
6401
6420
6402
- XLogRegisterBuffer (0 ,buffer ,REGBUF_STANDARD );
6403
- XLogRegisterBufData (0 , (char * )htup + htup -> t_hoff ,newlen );
6421
+ /* register block matching what buffer will look like after changes */
6422
+ memcpy (copied_buffer .data ,origdata ,lower );
6423
+ memcpy (copied_buffer .data + upper ,origdata + upper ,BLCKSZ - upper );
6424
+ dst_offset_in_block = dst - origdata ;
6425
+ memcpy (copied_buffer .data + dst_offset_in_block ,src ,newlen );
6426
+ BufferGetTag (buffer ,& rlocator ,& forkno ,& blkno );
6427
+ Assert (forkno == MAIN_FORKNUM );
6428
+ XLogRegisterBlock (0 ,& rlocator ,forkno ,blkno ,copied_buffer .data ,
6429
+ REGBUF_STANDARD );
6430
+ XLogRegisterBufData (0 ,src ,newlen );
6404
6431
6405
6432
/* inplace updates aren't decoded atm, don't log the origin */
6406
6433
6407
6434
recptr = XLogInsert (RM_HEAP_ID ,XLOG_HEAP_INPLACE );
6408
6435
6409
- PageSetLSN (BufferGetPage ( buffer ) ,recptr );
6436
+ PageSetLSN (page ,recptr );
6410
6437
}
6411
6438
6439
+ memcpy (dst ,src ,newlen );
6440
+
6441
+ MarkBufferDirty (buffer );
6442
+
6412
6443
LockBuffer (buffer ,BUFFER_LOCK_UNLOCK );
6413
6444
6414
6445
/*
@@ -6421,6 +6452,7 @@ heap_inplace_update_and_unlock(Relation relation,
6421
6452
*/
6422
6453
AtInplace_Inval ();
6423
6454
6455
+ MyProc -> delayChkptFlags &= ~DELAY_CHKPT_START ;
6424
6456
END_CRIT_SECTION ();
6425
6457
UnlockTuple (relation ,& tuple -> t_self ,InplaceUpdateTupleLock );
6426
6458