@@ -6184,13 +6184,18 @@ heap_inplace_update_and_unlock(Relation relation,
61846184HeapTupleHeader htup = oldtup -> t_data ;
61856185uint32 oldlen ;
61866186uint32 newlen ;
6187+ char * dst ;
6188+ char * src ;
61876189
61886190Assert (ItemPointerEquals (& oldtup -> t_self ,& tuple -> t_self ));
61896191oldlen = oldtup -> t_len - htup -> t_hoff ;
61906192newlen = tuple -> t_len - tuple -> t_data -> t_hoff ;
61916193if (oldlen != newlen || htup -> t_hoff != tuple -> t_data -> t_hoff )
61926194elog (ERROR ,"wrong tuple length" );
61936195
6196+ dst = (char * )htup + htup -> t_hoff ;
6197+ src = (char * )tuple -> t_data + tuple -> t_data -> t_hoff ;
6198+
61946199/*
61956200 * Construct shared cache inval if necessary. Note that because we only
61966201 * pass the new version of the tuple, this mustn't be used for any
@@ -6209,15 +6214,15 @@ heap_inplace_update_and_unlock(Relation relation,
62096214 */
62106215PreInplace_Inval ();
62116216
6212- /* NO EREPORT(ERROR) from here till changes are logged */
6213- START_CRIT_SECTION ();
6214-
6215- memcpy ((char * )htup + htup -> t_hoff ,
6216- (char * )tuple -> t_data + tuple -> t_data -> t_hoff ,
6217- newlen );
6218-
62196217/*----------
6220- * XXX A crash here can allow datfrozenxid() to get ahead of relfrozenxid:
6218+ * NO EREPORT(ERROR) from here till changes are complete
6219+ *
6220+ * Our buffer lock won't stop a reader having already pinned and checked
6221+ * visibility for this tuple. Hence, we write WAL first, then mutate the
6222+ * buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(),
6223+ * checkpoint delay makes that acceptable. With the usual order of
6224+ * changes, a crash after memcpy() and before XLogInsert() could allow
6225+ * datfrozenxid to overtake relfrozenxid:
62216226 *
62226227 * ["D" is a VACUUM (ONLY_DATABASE_STATS)]
62236228 * ["R" is a VACUUM tbl]
@@ -6227,31 +6232,57 @@ heap_inplace_update_and_unlock(Relation relation,
62276232 * D: raise pg_database.datfrozenxid, XLogInsert(), finish
62286233 * [crash]
62296234 * [recovery restores datfrozenxid w/o relfrozenxid]
6235+ *
6236+ * Like in MarkBufferDirtyHint() subroutine XLogSaveBufferForHint(), copy
6237+ * the buffer to the stack before logging. Here, that facilitates a FPI
6238+ * of the post-mutation block before we accept other sessions seeing it.
62306239 */
6231-
6232- MarkBufferDirty (buffer );
6240+ Assert ((MyProc -> delayChkptFlags & DELAY_CHKPT_START )== 0 );
6241+ START_CRIT_SECTION ();
6242+ MyProc -> delayChkptFlags |=DELAY_CHKPT_START ;
62336243
62346244/* XLOG stuff */
62356245if (RelationNeedsWAL (relation ))
62366246{
62376247xl_heap_inplace xlrec ;
6248+ PGAlignedBlock copied_buffer ;
6249+ char * origdata = (char * )BufferGetBlock (buffer );
6250+ Page page = BufferGetPage (buffer );
6251+ uint16 lower = ((PageHeader )page )-> pd_lower ;
6252+ uint16 upper = ((PageHeader )page )-> pd_upper ;
6253+ uintptr_t dst_offset_in_block ;
6254+ RelFileLocator rlocator ;
6255+ ForkNumber forkno ;
6256+ BlockNumber blkno ;
62386257XLogRecPtr recptr ;
62396258
62406259xlrec .offnum = ItemPointerGetOffsetNumber (& tuple -> t_self );
62416260
62426261XLogBeginInsert ();
62436262XLogRegisterData ((char * )& xlrec ,SizeOfHeapInplace );
62446263
6245- XLogRegisterBuffer (0 ,buffer ,REGBUF_STANDARD );
6246- XLogRegisterBufData (0 , (char * )htup + htup -> t_hoff ,newlen );
6264+ /* register block matching what buffer will look like after changes */
6265+ memcpy (copied_buffer .data ,origdata ,lower );
6266+ memcpy (copied_buffer .data + upper ,origdata + upper ,BLCKSZ - upper );
6267+ dst_offset_in_block = dst - origdata ;
6268+ memcpy (copied_buffer .data + dst_offset_in_block ,src ,newlen );
6269+ BufferGetTag (buffer ,& rlocator ,& forkno ,& blkno );
6270+ Assert (forkno == MAIN_FORKNUM );
6271+ XLogRegisterBlock (0 ,& rlocator ,forkno ,blkno ,copied_buffer .data ,
6272+ REGBUF_STANDARD );
6273+ XLogRegisterBufData (0 ,src ,newlen );
62476274
62486275/* inplace updates aren't decoded atm, don't log the origin */
62496276
62506277recptr = XLogInsert (RM_HEAP_ID ,XLOG_HEAP_INPLACE );
62516278
6252- PageSetLSN (BufferGetPage ( buffer ) ,recptr );
6279+ PageSetLSN (page ,recptr );
62536280}
62546281
6282+ memcpy (dst ,src ,newlen );
6283+
6284+ MarkBufferDirty (buffer );
6285+
62556286LockBuffer (buffer ,BUFFER_LOCK_UNLOCK );
62566287
62576288/*
@@ -6264,6 +6295,7 @@ heap_inplace_update_and_unlock(Relation relation,
62646295 */
62656296AtInplace_Inval ();
62666297
6298+ MyProc -> delayChkptFlags &= ~DELAY_CHKPT_START ;
62676299END_CRIT_SECTION ();
62686300UnlockTuple (relation ,& tuple -> t_self ,InplaceUpdateTupleLock );
62696301