@@ -6184,13 +6184,18 @@ heap_inplace_update_and_unlock(Relation relation,
6184
6184
HeapTupleHeader htup = oldtup -> t_data ;
6185
6185
uint32 oldlen ;
6186
6186
uint32 newlen ;
6187
+ char * dst ;
6188
+ char * src ;
6187
6189
6188
6190
Assert (ItemPointerEquals (& oldtup -> t_self ,& tuple -> t_self ));
6189
6191
oldlen = oldtup -> t_len - htup -> t_hoff ;
6190
6192
newlen = tuple -> t_len - tuple -> t_data -> t_hoff ;
6191
6193
if (oldlen != newlen || htup -> t_hoff != tuple -> t_data -> t_hoff )
6192
6194
elog (ERROR ,"wrong tuple length" );
6193
6195
6196
+ dst = (char * )htup + htup -> t_hoff ;
6197
+ src = (char * )tuple -> t_data + tuple -> t_data -> t_hoff ;
6198
+
6194
6199
/*
6195
6200
* Construct shared cache inval if necessary. Note that because we only
6196
6201
* pass the new version of the tuple, this mustn't be used for any
@@ -6209,15 +6214,15 @@ heap_inplace_update_and_unlock(Relation relation,
6209
6214
*/
6210
6215
PreInplace_Inval ();
6211
6216
6212
- /* NO EREPORT(ERROR) from here till changes are logged */
6213
- START_CRIT_SECTION ();
6214
-
6215
- memcpy ((char * )htup + htup -> t_hoff ,
6216
- (char * )tuple -> t_data + tuple -> t_data -> t_hoff ,
6217
- newlen );
6218
-
6219
6217
/*----------
6220
- * XXX A crash here can allow datfrozenxid() to get ahead of relfrozenxid:
6218
+ * NO EREPORT(ERROR) from here till changes are complete
6219
+ *
6220
+ * Our buffer lock won't stop a reader having already pinned and checked
6221
+ * visibility for this tuple. Hence, we write WAL first, then mutate the
6222
+ * buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(),
6223
+ * checkpoint delay makes that acceptable. With the usual order of
6224
+ * changes, a crash after memcpy() and before XLogInsert() could allow
6225
+ * datfrozenxid to overtake relfrozenxid:
6221
6226
*
6222
6227
* ["D" is a VACUUM (ONLY_DATABASE_STATS)]
6223
6228
* ["R" is a VACUUM tbl]
@@ -6227,31 +6232,57 @@ heap_inplace_update_and_unlock(Relation relation,
6227
6232
* D: raise pg_database.datfrozenxid, XLogInsert(), finish
6228
6233
* [crash]
6229
6234
* [recovery restores datfrozenxid w/o relfrozenxid]
6235
+ *
6236
+ * Like in MarkBufferDirtyHint() subroutine XLogSaveBufferForHint(), copy
6237
+ * the buffer to the stack before logging. Here, that facilitates a FPI
6238
+ * of the post-mutation block before we accept other sessions seeing it.
6230
6239
*/
6231
-
6232
- MarkBufferDirty (buffer );
6240
+ Assert ((MyProc -> delayChkptFlags & DELAY_CHKPT_START )== 0 );
6241
+ START_CRIT_SECTION ();
6242
+ MyProc -> delayChkptFlags |=DELAY_CHKPT_START ;
6233
6243
6234
6244
/* XLOG stuff */
6235
6245
if (RelationNeedsWAL (relation ))
6236
6246
{
6237
6247
xl_heap_inplace xlrec ;
6248
+ PGAlignedBlock copied_buffer ;
6249
+ char * origdata = (char * )BufferGetBlock (buffer );
6250
+ Page page = BufferGetPage (buffer );
6251
+ uint16 lower = ((PageHeader )page )-> pd_lower ;
6252
+ uint16 upper = ((PageHeader )page )-> pd_upper ;
6253
+ uintptr_t dst_offset_in_block ;
6254
+ RelFileLocator rlocator ;
6255
+ ForkNumber forkno ;
6256
+ BlockNumber blkno ;
6238
6257
XLogRecPtr recptr ;
6239
6258
6240
6259
xlrec .offnum = ItemPointerGetOffsetNumber (& tuple -> t_self );
6241
6260
6242
6261
XLogBeginInsert ();
6243
6262
XLogRegisterData ((char * )& xlrec ,SizeOfHeapInplace );
6244
6263
6245
- XLogRegisterBuffer (0 ,buffer ,REGBUF_STANDARD );
6246
- XLogRegisterBufData (0 , (char * )htup + htup -> t_hoff ,newlen );
6264
+ /* register block matching what buffer will look like after changes */
6265
+ memcpy (copied_buffer .data ,origdata ,lower );
6266
+ memcpy (copied_buffer .data + upper ,origdata + upper ,BLCKSZ - upper );
6267
+ dst_offset_in_block = dst - origdata ;
6268
+ memcpy (copied_buffer .data + dst_offset_in_block ,src ,newlen );
6269
+ BufferGetTag (buffer ,& rlocator ,& forkno ,& blkno );
6270
+ Assert (forkno == MAIN_FORKNUM );
6271
+ XLogRegisterBlock (0 ,& rlocator ,forkno ,blkno ,copied_buffer .data ,
6272
+ REGBUF_STANDARD );
6273
+ XLogRegisterBufData (0 ,src ,newlen );
6247
6274
6248
6275
/* inplace updates aren't decoded atm, don't log the origin */
6249
6276
6250
6277
recptr = XLogInsert (RM_HEAP_ID ,XLOG_HEAP_INPLACE );
6251
6278
6252
- PageSetLSN (BufferGetPage ( buffer ) ,recptr );
6279
+ PageSetLSN (page ,recptr );
6253
6280
}
6254
6281
6282
+ memcpy (dst ,src ,newlen );
6283
+
6284
+ MarkBufferDirty (buffer );
6285
+
6255
6286
LockBuffer (buffer ,BUFFER_LOCK_UNLOCK );
6256
6287
6257
6288
/*
@@ -6264,6 +6295,7 @@ heap_inplace_update_and_unlock(Relation relation,
6264
6295
*/
6265
6296
AtInplace_Inval ();
6266
6297
6298
+ MyProc -> delayChkptFlags &= ~DELAY_CHKPT_START ;
6267
6299
END_CRIT_SECTION ();
6268
6300
UnlockTuple (relation ,& tuple -> t_self ,InplaceUpdateTupleLock );
6269
6301