88 *
99 *
1010 * IDENTIFICATION
11- * $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.73 2008/09/30 10:52:10 heikki Exp $
11+ * $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.74 2008/11/06 20:51:14 tgl Exp $
1212 *
1313 *-------------------------------------------------------------------------
1414 */
1515
1616#include "postgres.h"
1717
18+ #include "access/heapam.h"
1819#include "access/hio.h"
1920#include "storage/bufmgr.h"
2021#include "storage/freespace.h"
@@ -56,6 +57,43 @@ RelationPutHeapTuple(Relation relation,
5657((HeapTupleHeader )item )-> t_ctid = tuple -> t_self ;
5758}
5859
60+ /*
61+ * Read in a buffer, using bulk-insert strategy if bistate isn't NULL.
62+ */
63+ static Buffer
64+ ReadBufferBI (Relation relation ,BlockNumber targetBlock ,
65+ BulkInsertState bistate )
66+ {
67+ Buffer buffer ;
68+
69+ /* If not bulk-insert, exactly like ReadBuffer */
70+ if (!bistate )
71+ return ReadBuffer (relation ,targetBlock );
72+
73+ /* If we have the desired block already pinned, re-pin and return it */
74+ if (bistate -> current_buf != InvalidBuffer )
75+ {
76+ if (BufferGetBlockNumber (bistate -> current_buf )== targetBlock )
77+ {
78+ IncrBufferRefCount (bistate -> current_buf );
79+ return bistate -> current_buf ;
80+ }
81+ /* ... else drop the old buffer */
82+ ReleaseBuffer (bistate -> current_buf );
83+ bistate -> current_buf = InvalidBuffer ;
84+ }
85+
86+ /* Perform a read using the buffer strategy */
87+ buffer = ReadBufferExtended (relation ,MAIN_FORKNUM ,targetBlock ,
88+ RBM_NORMAL ,bistate -> strategy );
89+
90+ /* Save the selected block as target for future inserts */
91+ IncrBufferRefCount (buffer );
92+ bistate -> current_buf = buffer ;
93+
94+ return buffer ;
95+ }
96+
5997/*
6098 * RelationGetBufferForTuple
6199 *
@@ -80,20 +118,26 @@ RelationPutHeapTuple(Relation relation,
80118 *happen if space is freed in that page after heap_update finds there's not
81119 *enough there).In that case, the page will be pinned and locked only once.
82120 *
83- *If use_fsm is true (the normal case), we use FSM to help us find free
84- *space.If use_fsm isfalse , wealways append a new empty page to the
85- *end of the relation if the tuple won't fit on the current target page.
121+ *We normally use FSM to help us find free space. However,
122+ *if HEAP_INSERT_SKIP_FSM isspecified , wejust append a new empty page to
123+ *the end of the relation if the tuple won't fit on the current target page.
86124 *This can save some cycles when we know the relation is new and doesn't
87125 *contain useful amounts of free space.
88126 *
89- *The use_fsm = false case is also useful for non-WAL-logged additions to a
127+ *HEAP_INSERT_SKIP_FSM is also useful for non-WAL-logged additions to a
90128 *relation, if the caller holds exclusive lock and is careful to invalidate
91129 *relation->rd_targblock before the first insertion --- that ensures that
92130 *all insertions will occur into newly added pages and not be intermixed
93131 *with tuples from other transactions. That way, a crash can't risk losing
94132 *any committed data of other transactions. (See heap_insert's comments
95133 *for additional constraints needed for safe usage of this behavior.)
96134 *
135+ *The caller can also provide a BulkInsertState object to optimize many
136+ *insertions into the same relation. This keeps a pin on the current
137+ *insertion target page (to save pin/unpin cycles) and also passes a
138+ *BULKWRITE buffer selection strategy object to the buffer manager.
139+ *Passing NULL for bistate selects the default behavior.
140+ *
97141 *We always try to avoid filling existing pages further than the fillfactor.
98142 *This is OK since this routine is not consulted when updating a tuple and
99143 *keeping it on the same page, which is the scenario fillfactor is meant
@@ -104,8 +148,10 @@ RelationPutHeapTuple(Relation relation,
104148 */
105149Buffer
106150RelationGetBufferForTuple (Relation relation ,Size len ,
107- Buffer otherBuffer ,bool use_fsm )
151+ Buffer otherBuffer ,int options ,
152+ struct BulkInsertStateData * bistate )
108153{
154+ bool use_fsm = !(options & HEAP_INSERT_SKIP_FSM );
109155Buffer buffer = InvalidBuffer ;
110156Page page ;
111157Size pageFreeSpace ,
@@ -116,6 +162,9 @@ RelationGetBufferForTuple(Relation relation, Size len,
116162
117163len = MAXALIGN (len );/* be conservative */
118164
165+ /* Bulk insert is not supported for updates, only inserts. */
166+ Assert (otherBuffer == InvalidBuffer || !bistate );
167+
119168/*
120169 * If we're gonna fail for oversize tuple, do it right away
121170 */
@@ -137,25 +186,27 @@ RelationGetBufferForTuple(Relation relation, Size len,
137186
138187/*
139188 * We first try to put the tuple on the same page we last inserted a tuple
140- * on, as cached in the relcache entry. If that doesn't work, we ask the
141- *shared Free Space Map to locate a suitable page. Since the FSM's info
142- * might be out of date, we have to be prepared to loop around and retry
143- * multiple times. (To insure this isn't an infinite loop, we must update
144- * the FSM with the correct amount of free space on each page that proves
145- * not to be suitable.) If the FSM has no record of a page with enough
146- * free space, we give up and extend the relation.
189+ * on, as cached in theBulkInsertState or relcache entry. If that
190+ *doesn't work, we ask the Free Space Map to locate a suitable page.
191+ *Since the FSM's info might be out of date, we have to be prepared to
192+ *loop around and retry multiple times. (To insure this isn't an infinite
193+ *loop, we must update the FSM with the correct amount of free space on
194+ *each page that proves not to be suitable.) If the FSM has no record of
195+ *a page with enough free space, we give up and extend the relation.
147196 *
148197 * When use_fsm is false, we either put the tuple onto the existing target
149198 * page or extend the relation.
150199 */
151- if (len + saveFreeSpace <=MaxHeapTupleSize )
152- targetBlock = relation -> rd_targblock ;
153- else
200+ if (len + saveFreeSpace > MaxHeapTupleSize )
154201{
155- /* can't fit, don'tscrew up FSM request tracking by trying */
202+ /* can't fit, don'tbother asking FSM */
156203targetBlock = InvalidBlockNumber ;
157204use_fsm = false;
158205}
206+ else if (bistate && bistate -> current_buf != InvalidBuffer )
207+ targetBlock = BufferGetBlockNumber (bistate -> current_buf );
208+ else
209+ targetBlock = relation -> rd_targblock ;
159210
160211if (targetBlock == InvalidBlockNumber && use_fsm )
161212{
@@ -189,7 +240,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
189240if (otherBuffer == InvalidBuffer )
190241{
191242/* easy case */
192- buffer = ReadBuffer (relation ,targetBlock );
243+ buffer = ReadBufferBI (relation ,targetBlock , bistate );
193244LockBuffer (buffer ,BUFFER_LOCK_EXCLUSIVE );
194245}
195246else if (otherBlock == targetBlock )
@@ -274,7 +325,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
274325 * it worth keeping an accurate file length in shared memory someplace,
275326 * rather than relying on the kernel to do it for us?
276327 */
277- buffer = ReadBuffer (relation ,P_NEW );
328+ buffer = ReadBufferBI (relation ,P_NEW , bistate );
278329
279330/*
280331 * We can be certain that locking the otherBuffer first is OK, since it