88 * Portions Copyright (c) 1994, Regents of the University of California
99 *
1010 * IDENTIFICATION
11- * $PostgreSQL: pgsql/src/backend/storage/freespace/fsmpage.c,v 1.1 2008/09/30 10:52:13 heikki Exp $
11+ * $PostgreSQL: pgsql/src/backend/storage/freespace/fsmpage.c,v 1.2 2008/10/07 21: 10:11 tgl Exp $
1212 *
1313 * NOTES:
1414 *
1515 * The public functions in this file form an API that hides the internal
1616 * structure of a FSM page. This allows freespace.c to treat each FSM page
1717 * as a black box with SlotsPerPage "slots". fsm_set_avail() and
18- * fsm_get_avail() let's you get/set the value of a slot, and
19- * fsm_search_avail()let's you search for a slot with value >= X.
18+ * fsm_get_avail() let you get/set the value of a slot, and
19+ * fsm_search_avail()lets you search for a slot with value >= X.
2020 *
2121 *-------------------------------------------------------------------------
2222 */
2525#include "storage/bufmgr.h"
2626#include "storage/fsm_internals.h"
2727
28- /*macros to navigate the tree within a page. */
28+ /*Macros to navigate the tree within a page. Root has index zero . */
2929#define leftchild (x )(2 * (x) + 1)
3030#define rightchild (x )(2 * (x) + 2)
3131#define parentof (x )(((x) - 1) / 2)
3232
33- /* returns right sibling of x, wrapping around within the level */
33+ /*
34+ * Find right neighbor of x, wrapping around within the level
35+ */
3436static int
35- rightsibling (int x )
37+ rightneighbor (int x )
3638{
3739/*
3840 * Move right. This might wrap around, stepping to the leftmost node at
@@ -42,8 +44,9 @@ rightsibling(int x)
4244
4345/*
4446 * Check if we stepped to the leftmost node at next level, and correct
45- * if so. The leftmost nodes at each level are of form x = 2^level - 1, so
46- * check if (x + 1) is a power of two.
47+ * if so. The leftmost nodes at each level are numbered x = 2^level - 1,
48+ * so check if (x + 1) is a power of two, using a standard
49+ * twos-complement-arithmetic trick.
4750 */
4851if (((x + 1 )& x )== 0 )
4952x = parentof (x );
@@ -52,8 +55,7 @@ rightsibling(int x)
5255}
5356
5457/*
55- * Sets the value of a slot on page. Returns true if the page was
56- * modified.
58+ * Sets the value of a slot on page. Returns true if the page was modified.
5759 *
5860 * The caller must hold an exclusive lock on the page.
5961 */
@@ -101,8 +103,8 @@ fsm_set_avail(Page page, int slot, uint8 value)
101103}while (nodeno > 0 );
102104
103105/*
104- * sanity check: if the new valuevalue is higher than the value
105- * at the top, the tree is corrupt.
106+ * sanity check: if the new valueis (still) higher than the value
107+ * at the top, the tree is corrupt. If so, rebuild.
106108 */
107109if (value > fsmpage -> fp_nodes [0 ])
108110fsm_rebuild_page (page );
@@ -121,32 +123,36 @@ fsm_get_avail(Page page, int slot)
121123{
122124FSMPage fsmpage = (FSMPage )PageGetContents (page );
123125
126+ Assert (slot < LeafNodesPerPage );
127+
124128return fsmpage -> fp_nodes [NonLeafNodesPerPage + slot ];
125129}
126130
127131/*
128132 * Returns the value at the root of a page.
133+ *
129134 * Since this is just a read-only access of a single byte, the page doesn't
130135 * need to be locked.
131136 */
132137uint8
133138fsm_get_max_avail (Page page )
134139{
135140FSMPage fsmpage = (FSMPage )PageGetContents (page );
141+
136142return fsmpage -> fp_nodes [0 ];
137143}
138144
139145/*
140- * Searches for a slot withmin. category. Returns slot number, or -1 if
141- * none found.
146+ * Searches for a slot with category at least minvalue.
147+ *Returns slot number, or -1 if none found.
142148 *
143149 * The caller must hold at least a shared lock on the page, and this
144150 * function can unlock and lock the page again in exclusive mode if it
145151 * needs to be updated. exclusive_lock_held should be set to true if the
146152 * caller is already holding an exclusive lock, to avoid extra work.
147153 *
148154 * If advancenext is false, fp_next_slot is set to point to the returned
149- * slot, and if it's true, to the slotnext to the returned slot.
155+ * slot, and if it's true, to the slotafter the returned slot.
150156 */
151157int
152158fsm_search_avail (Buffer buf ,uint8 minvalue ,bool advancenext ,
@@ -160,33 +166,44 @@ fsm_search_avail(Buffer buf, uint8 minvalue, bool advancenext,
160166
161167restart :
162168/*
163- * Check the root first, and exit quickly if there's nopage with
169+ * Check the root first, and exit quickly if there's noleaf with
164170 * enough free space
165171 */
166172if (fsmpage -> fp_nodes [0 ]< minvalue )
167173return -1 ;
168174
169-
170- /* fp_next_slot is just a hint, so check that it's sane */
175+ /*
176+ * Start search using fp_next_slot. It's just a hint, so check that it's
177+ * sane. (This also handles wrapping around when the prior call returned
178+ * the last slot on the page.)
179+ */
171180target = fsmpage -> fp_next_slot ;
172181if (target < 0 || target >=LeafNodesPerPage )
173182target = 0 ;
174183target += NonLeafNodesPerPage ;
175184
176- /*
177- * Start the search from the target slot. At every step, move one
178- * node to the right,and climb up to the parent. Stop when we reach a
179- * node with enough free space. (note that moving to theright only
180- *makes a difference if we're on the right child of the parent)
185+ /*----------
186+ * Start the search from the target slot. At every step, move one
187+ * node to the right,then climb up to the parent. Stop when we reach
188+ *a node with enough free space (as we must, since theroot has enough
189+ *space).
181190 *
182- * The idea is to graduall expand our "search triangle", that is, all
183- * nodes covered by the current node. In the beginning, just the target
184- * node is included, and more nodes to the right of the target node,
185- * taking wrap-around into account, is included at each step. Nodes are
186- * added to the search triangle in left-to-right order, starting from
187- * the target node. This ensures that we'll find the first suitable node
188- * to the right of the target node, and not some other node with enough
189- * free space.
191+ * The idea is to gradually expand our "search triangle", that is, all
192+ * nodes covered by the current node, and to be sure we search to the
193+ * right from the start point. At the first step, only the target slot
194+ * is examined. When we move up from a left child to its parent, we are
195+ * adding the right-hand subtree of that parent to the search triangle.
196+ * When we move right then up from a right child, we are dropping the
197+ * current search triangle (which we know doesn't contain any suitable
198+ * page) and instead looking at the next-larger-size triangle to its
199+ * right. So we never look left from our original start point, and at
200+ * each step the size of the search triangle doubles, ensuring it takes
201+ * only log2(N) work to search N pages.
202+ *
203+ * The "move right" operation will wrap around if it hits the right edge
204+ * of the tree, so the behavior is still good if we start near the right.
205+ * Note also that the move-and-climb behavior ensures that we can't end
206+ * up on one of the missing nodes at the right of the leaf level.
190207 *
191208 * For example, consider this tree:
192209 *
@@ -196,25 +213,27 @@ fsm_search_avail(Buffer buf, uint8 minvalue, bool advancenext,
196213 * 4 5 5 7 2 6 5 2
197214 * T
198215 *
199- * Imagine that target node is the node indicated by the letter T, and
200- * we're searching for a node with value of 6 or higher. The search
201- * begins at T. At first iteration, we move to the right, and to the
202- * parent, arriving the rightmost 5. At the 2nd iteration, we move to the
203- * right, wrapping around, and climb up, arriving at the 7 at the 2nd
204- * level. 7 satisfies our search, so we descend down to the bottom,
205- * following the path of sevens.
216+ * Assume that the target node is the node indicated by the letter T,
217+ * and we're searching for a node with value of 6 or higher. The search
218+ * begins at T. At the first iteration, we move to the right, then to the
219+ * parent, arriving at the rightmost 5. At the second iteration, we move
220+ * to the right, wrapping around, then climb up, arriving at the 7 on the
221+ * third level. 7 satisfies our search, so we descend down to the bottom,
222+ * following the path of sevens. This is in fact the first suitable page
223+ * to the right of (allowing for wraparound) our start point.
224+ *----------
206225 */
207226nodeno = target ;
208227while (nodeno > 0 )
209228{
210229if (fsmpage -> fp_nodes [nodeno ] >=minvalue )
211230break ;
212-
231+
213232/*
214- * Move to the right, wrapping aroundat the level if necessary, and
215- * climb up.
233+ * Move to the right, wrapping aroundon same level if necessary,
234+ *then climb up.
216235 */
217- nodeno = parentof (rightsibling (nodeno ));
236+ nodeno = parentof (rightneighbor (nodeno ));
218237}
219238
220239/*
@@ -271,10 +290,10 @@ fsm_search_avail(Buffer buf, uint8 minvalue, bool advancenext,
271290slot = nodeno - NonLeafNodesPerPage ;
272291
273292/*
274- * Update the next slot pointer. Note that we do this even if we're only
293+ * Update the next-target pointer. Note that we do this even if we're only
275294 * holding a shared lock, on the grounds that it's better to use a shared
276295 * lock and get a garbled next pointer every now and then, than take the
277- * concurrency hit of anexlusive lock.
296+ * concurrency hit of anexclusive lock.
278297 *
279298 * Wrap-around is handled at the beginning of this function.
280299 */
@@ -324,7 +343,7 @@ fsm_rebuild_page(Page page)
324343int nodeno ;
325344
326345/*
327- * Start from the lowest non-leaflevel , at last node, working our way
346+ * Start from the lowest non-leaf level , at last node, working our way
328347 * backwards, through all non-leaf nodes at all levels, up to the root.
329348 */
330349for (nodeno = NonLeafNodesPerPage - 1 ;nodeno >=0 ;nodeno -- )
@@ -333,6 +352,7 @@ fsm_rebuild_page(Page page)
333352int rchild = lchild + 1 ;
334353uint8 newvalue = 0 ;
335354
355+ /* The first few nodes we examine might have zero or one child. */
336356if (lchild < NodesPerPage )
337357newvalue = fsmpage -> fp_nodes [lchild ];
338358
@@ -349,4 +369,3 @@ fsm_rebuild_page(Page page)
349369
350370return changed ;
351371}
352-