63
63
#include "storage/procarray.h"
64
64
#include "storage/standby.h"
65
65
#include "utils/datum.h"
66
- #include "utils/injection_point.h"
67
66
#include "utils/inval.h"
68
67
#include "utils/relcache.h"
69
68
#include "utils/snapmgr.h"
@@ -6041,23 +6040,245 @@ heap_abort_speculative(Relation relation, ItemPointer tid)
6041
6040
}
6042
6041
6043
6042
/*
6044
- * heap_inplace_update - update a tuple "in place" (ie, overwrite it)
6045
- *
6046
- * Overwriting violates both MVCC and transactional safety, so the uses
6047
- * of this function in Postgres are extremely limited. Nonetheless we
6048
- * find some places to use it.
6049
- *
6050
- * The tuple cannot change size, and therefore it's reasonable to assume
6051
- * that its null bitmap (if any) doesn't change either. So we just
6052
- * overwrite the data portion of the tuple without touching the null
6053
- * bitmap or any of the header fields.
6043
+ * heap_inplace_lock - protect inplace update from concurrent heap_update()
6044
+ *
6045
+ * Evaluate whether the tuple's state is compatible with a no-key update.
6046
+ * Current transaction rowmarks are fine, as is KEY SHARE from any
6047
+ * transaction. If compatible, return true with the buffer exclusive-locked,
6048
+ * and the caller must release that by calling
6049
+ * heap_inplace_update_and_unlock(), calling heap_inplace_unlock(), or raising
6050
+ * an error. Otherwise, return false after blocking transactions, if any,
6051
+ * have ended.
6052
+ *
6053
+ * Since this is intended for system catalogs and SERIALIZABLE doesn't cover
6054
+ * DDL, this doesn't guarantee any particular predicate locking.
6055
+ *
6056
+ * One could modify this to return true for tuples with delete in progress,
6057
+ * All inplace updaters take a lock that conflicts with DROP. If explicit
6058
+ * "DELETE FROM pg_class" is in progress, we'll wait for it like we would an
6059
+ * update.
6060
+ *
6061
+ * Readers of inplace-updated fields expect changes to those fields are
6062
+ * durable. For example, vac_truncate_clog() reads datfrozenxid from
6063
+ * pg_database tuples via catalog snapshots. A future snapshot must not
6064
+ * return a lower datfrozenxid for the same database OID (lower in the
6065
+ * FullTransactionIdPrecedes() sense). We achieve that since no update of a
6066
+ * tuple can start while we hold a lock on its buffer. In cases like
6067
+ * BEGIN;GRANT;CREATE INDEX;COMMIT we're inplace-updating a tuple visible only
6068
+ * to this transaction. ROLLBACK then is one case where it's okay to lose
6069
+ * inplace updates. (Restoring relhasindex=false on ROLLBACK is fine, since
6070
+ * any concurrent CREATE INDEX would have blocked, then inplace-updated the
6071
+ * committed tuple.)
6072
+ *
6073
+ * In principle, we could avoid waiting by overwriting every tuple in the
6074
+ * updated tuple chain. Reader expectations permit updating a tuple only if
6075
+ * it's aborted, is the tail of the chain, or we already updated the tuple
6076
+ * referenced in its t_ctid. Hence, we would need to overwrite the tuples in
6077
+ * order from tail to head. That would imply either (a) mutating all tuples
6078
+ * in one critical section or (b) accepting a chance of partial completion.
6079
+ * Partial completion of a relfrozenxid update would have the weird
6080
+ * consequence that the table's next VACUUM could see the table's relfrozenxid
6081
+ * move forward between vacuum_get_cutoffs() and finishing.
6082
+ */
6083
+ bool
6084
+ heap_inplace_lock (Relation relation ,
6085
+ HeapTuple oldtup_ptr ,Buffer buffer )
6086
+ {
6087
+ HeapTupleData oldtup = * oldtup_ptr ;/* minimize diff vs. heap_update() */
6088
+ TM_Result result ;
6089
+ bool ret ;
6090
+
6091
+ Assert (BufferIsValid (buffer ));
6092
+
6093
+ LockBuffer (buffer ,BUFFER_LOCK_EXCLUSIVE );
6094
+
6095
+ /*----------
6096
+ * Interpret HeapTupleSatisfiesUpdate() like heap_update() does, except:
6097
+ *
6098
+ * - wait unconditionally
6099
+ * - no tuple locks
6100
+ * - don't recheck header after wait: simpler to defer to next iteration
6101
+ * - don't try to continue even if the updater aborts: likewise
6102
+ * - no crosscheck
6103
+ */
6104
+ result = HeapTupleSatisfiesUpdate (& oldtup ,GetCurrentCommandId (false),
6105
+ buffer );
6106
+
6107
+ if (result == TM_Invisible )
6108
+ {
6109
+ /* no known way this can happen */
6110
+ ereport (ERROR ,
6111
+ (errcode (ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE ),
6112
+ errmsg_internal ("attempted to overwrite invisible tuple" )));
6113
+ }
6114
+ else if (result == TM_SelfModified )
6115
+ {
6116
+ /*
6117
+ * CREATE INDEX might reach this if an expression is silly enough to
6118
+ * call e.g. SELECT ... FROM pg_class FOR SHARE. C code of other SQL
6119
+ * statements might get here after a heap_update() of the same row, in
6120
+ * the absence of an intervening CommandCounterIncrement().
6121
+ */
6122
+ ereport (ERROR ,
6123
+ (errcode (ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE ),
6124
+ errmsg ("tuple to be updated was already modified by an operation triggered by the current command" )));
6125
+ }
6126
+ else if (result == TM_BeingModified )
6127
+ {
6128
+ TransactionId xwait ;
6129
+ uint16 infomask ;
6130
+
6131
+ xwait = HeapTupleHeaderGetRawXmax (oldtup .t_data );
6132
+ infomask = oldtup .t_data -> t_infomask ;
6133
+
6134
+ if (infomask & HEAP_XMAX_IS_MULTI )
6135
+ {
6136
+ LockTupleMode lockmode = LockTupleNoKeyExclusive ;
6137
+ MultiXactStatus mxact_status = MultiXactStatusNoKeyUpdate ;
6138
+ int remain ;
6139
+ bool current_is_member ;
6140
+
6141
+ if (DoesMultiXactIdConflict ((MultiXactId )xwait ,infomask ,
6142
+ lockmode ,& current_is_member ))
6143
+ {
6144
+ LockBuffer (buffer ,BUFFER_LOCK_UNLOCK );
6145
+ ret = false;
6146
+ MultiXactIdWait ((MultiXactId )xwait ,mxact_status ,infomask ,
6147
+ relation ,& oldtup .t_self ,XLTW_Update ,
6148
+ & remain );
6149
+ }
6150
+ else
6151
+ ret = true;
6152
+ }
6153
+ else if (TransactionIdIsCurrentTransactionId (xwait ))
6154
+ ret = true;
6155
+ else if (HEAP_XMAX_IS_KEYSHR_LOCKED (infomask ))
6156
+ ret = true;
6157
+ else
6158
+ {
6159
+ LockBuffer (buffer ,BUFFER_LOCK_UNLOCK );
6160
+ ret = false;
6161
+ XactLockTableWait (xwait ,relation ,& oldtup .t_self ,
6162
+ XLTW_Update );
6163
+ }
6164
+ }
6165
+ else
6166
+ {
6167
+ ret = (result == TM_Ok );
6168
+ if (!ret )
6169
+ {
6170
+ LockBuffer (buffer ,BUFFER_LOCK_UNLOCK );
6171
+ }
6172
+ }
6173
+
6174
+ /*
6175
+ * GetCatalogSnapshot() relies on invalidation messages to know when to
6176
+ * take a new snapshot. COMMIT of xwait is responsible for sending the
6177
+ * invalidation. We're not acquiring heavyweight locks sufficient to
6178
+ * block if not yet sent, so we must take a new snapshot to ensure a later
6179
+ * attempt has a fair chance. While we don't need this if xwait aborted,
6180
+ * don't bother optimizing that.
6181
+ */
6182
+ if (!ret )
6183
+ InvalidateCatalogSnapshot ();
6184
+ return ret ;
6185
+ }
6186
+
6187
+ /*
6188
+ * heap_inplace_update_and_unlock - core of systable_inplace_update_finish
6054
6189
*
6055
- * tuple is an in-memory tuple structure containing the data to be written
6056
- * over the target tuple. Also, tuple->t_self identifies the target tuple.
6190
+ * The tuple cannot change size, and therefore its header fields and null
6191
+ * bitmap (if any) don't change either.
6192
+ */
6193
+ void
6194
+ heap_inplace_update_and_unlock (Relation relation ,
6195
+ HeapTuple oldtup ,HeapTuple tuple ,
6196
+ Buffer buffer )
6197
+ {
6198
+ HeapTupleHeader htup = oldtup -> t_data ;
6199
+ uint32 oldlen ;
6200
+ uint32 newlen ;
6201
+
6202
+ Assert (ItemPointerEquals (& oldtup -> t_self ,& tuple -> t_self ));
6203
+ oldlen = oldtup -> t_len - htup -> t_hoff ;
6204
+ newlen = tuple -> t_len - tuple -> t_data -> t_hoff ;
6205
+ if (oldlen != newlen || htup -> t_hoff != tuple -> t_data -> t_hoff )
6206
+ elog (ERROR ,"wrong tuple length" );
6207
+
6208
+ /* NO EREPORT(ERROR) from here till changes are logged */
6209
+ START_CRIT_SECTION ();
6210
+
6211
+ memcpy ((char * )htup + htup -> t_hoff ,
6212
+ (char * )tuple -> t_data + tuple -> t_data -> t_hoff ,
6213
+ newlen );
6214
+
6215
+ /*----------
6216
+ * XXX A crash here can allow datfrozenxid() to get ahead of relfrozenxid:
6217
+ *
6218
+ * ["D" is a VACUUM (ONLY_DATABASE_STATS)]
6219
+ * ["R" is a VACUUM tbl]
6220
+ * D: vac_update_datfrozenid() -> systable_beginscan(pg_class)
6221
+ * D: systable_getnext() returns pg_class tuple of tbl
6222
+ * R: memcpy() into pg_class tuple of tbl
6223
+ * D: raise pg_database.datfrozenxid, XLogInsert(), finish
6224
+ * [crash]
6225
+ * [recovery restores datfrozenxid w/o relfrozenxid]
6226
+ */
6227
+
6228
+ MarkBufferDirty (buffer );
6229
+
6230
+ /* XLOG stuff */
6231
+ if (RelationNeedsWAL (relation ))
6232
+ {
6233
+ xl_heap_inplace xlrec ;
6234
+ XLogRecPtr recptr ;
6235
+
6236
+ xlrec .offnum = ItemPointerGetOffsetNumber (& tuple -> t_self );
6237
+
6238
+ XLogBeginInsert ();
6239
+ XLogRegisterData ((char * )& xlrec ,SizeOfHeapInplace );
6240
+
6241
+ XLogRegisterBuffer (0 ,buffer ,REGBUF_STANDARD );
6242
+ XLogRegisterBufData (0 , (char * )htup + htup -> t_hoff ,newlen );
6243
+
6244
+ /* inplace updates aren't decoded atm, don't log the origin */
6245
+
6246
+ recptr = XLogInsert (RM_HEAP_ID ,XLOG_HEAP_INPLACE );
6247
+
6248
+ PageSetLSN (BufferGetPage (buffer ),recptr );
6249
+ }
6250
+
6251
+ END_CRIT_SECTION ();
6252
+
6253
+ heap_inplace_unlock (relation ,oldtup ,buffer );
6254
+
6255
+ /*
6256
+ * Send out shared cache inval if necessary. Note that because we only
6257
+ * pass the new version of the tuple, this mustn't be used for any
6258
+ * operations that could change catcache lookup keys. But we aren't
6259
+ * bothering with index updates either, so that's true a fortiori.
6260
+ *
6261
+ * XXX ROLLBACK discards the invalidation. See test inplace-inval.spec.
6262
+ */
6263
+ if (!IsBootstrapProcessingMode ())
6264
+ CacheInvalidateHeapTuple (relation ,tuple ,NULL );
6265
+ }
6266
+
6267
+ /*
6268
+ * heap_inplace_unlock - reverse of heap_inplace_lock
6269
+ */
6270
+ void
6271
+ heap_inplace_unlock (Relation relation ,
6272
+ HeapTuple oldtup ,Buffer buffer )
6273
+ {
6274
+ LockBuffer (buffer ,BUFFER_LOCK_UNLOCK );
6275
+ }
6276
+
6277
+ /*
6278
+ * heap_inplace_update - deprecated
6057
6279
*
6058
- * Note that the tuple updated here had better not come directly from the
6059
- * syscache if the relation has a toast relation as this tuple could
6060
- * include toast values that have been expanded, causing a failure here.
6280
+ * This exists only to keep modules working in back branches. Affected
6281
+ * modules should migrate to systable_inplace_update_begin().
6061
6282
*/
6062
6283
void
6063
6284
heap_inplace_update (Relation relation ,HeapTuple tuple )
@@ -6081,7 +6302,6 @@ heap_inplace_update(Relation relation, HeapTuple tuple)
6081
6302
(errcode (ERRCODE_INVALID_TRANSACTION_STATE ),
6082
6303
errmsg ("cannot update tuples during a parallel operation" )));
6083
6304
6084
- INJECTION_POINT ("inplace-before-pin" );
6085
6305
buffer = ReadBuffer (relation ,ItemPointerGetBlockNumber (& (tuple -> t_self )));
6086
6306
LockBuffer (buffer ,BUFFER_LOCK_EXCLUSIVE );
6087
6307
page = (Page )BufferGetPage (buffer );