Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitd68efb3

Browse files
committed
Repair problems with hash indexes that span multiple segments: the hash code's
preference for filling pages out-of-order tends to confuse the sanity checksin md.c, as per report from Balazs Nagy in bug #2737. The fix is to ensurethat the smgr-level code always has the same idea of the logical EOF as thehash index code does, by using ReadBuffer(P_NEW) where we are adding a singlepage to the end of the index, and using smgrextend() to reserve a large batchof pages when creating a new splitpoint. The patch is a bit ugly because itavoids making any changes in md.c, which seems the most prudent approach for abackpatchable beta-period fix. After 8.3 development opens, I'll take a lookat a cleaner but more invasive patch, in particular getting rid of the nowunnecessary hack to allow reading beyond EOF in mdread().Backpatch as far as 7.4. The bug likely exists in 7.3 as well, but becauseof the magnitude of the 7.3-to-7.4 changes in hash, the later-version patchdoesn't even begin to apply. Given the other known bugs in the 7.3-era hashcode, it does not seem worth trying to develop a separate patch for 7.3.
1 parentfa3d622 commitd68efb3

File tree

2 files changed

+184
-44
lines changed

2 files changed

+184
-44
lines changed

‎src/backend/access/hash/hashovfl.c

Lines changed: 52 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.52 2006/03/31 23:32:05 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.53 2006/11/19 21:33:22 tgl Exp $
1212
*
1313
* NOTES
1414
* Overflow pages look like ordinary relation pages.
@@ -20,7 +20,7 @@
2020
#include"access/hash.h"
2121

2222

23-
staticBlockNumber_hash_getovflpage(Relationrel,Buffermetabuf);
23+
staticBuffer_hash_getovflpage(Relationrel,Buffermetabuf);
2424
staticuint32_hash_firstfreebit(uint32map);
2525

2626

@@ -99,18 +99,14 @@ blkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
9999
Buffer
100100
_hash_addovflpage(Relationrel,Buffermetabuf,Bufferbuf)
101101
{
102-
BlockNumberovflblkno;
103102
Bufferovflbuf;
104103
Pagepage;
105104
Pageovflpage;
106105
HashPageOpaquepageopaque;
107106
HashPageOpaqueovflopaque;
108107

109-
/* allocate an empty overflow page */
110-
ovflblkno=_hash_getovflpage(rel,metabuf);
111-
112-
/* lock the overflow page */
113-
ovflbuf=_hash_getbuf(rel,ovflblkno,HASH_WRITE);
108+
/* allocate and lock an empty overflow page */
109+
ovflbuf=_hash_getovflpage(rel,metabuf);
114110
ovflpage=BufferGetPage(ovflbuf);
115111

116112
/*
@@ -150,7 +146,7 @@ _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf)
150146
MarkBufferDirty(ovflbuf);
151147

152148
/* logically chain overflow page to previous page */
153-
pageopaque->hasho_nextblkno=ovflblkno;
149+
pageopaque->hasho_nextblkno=BufferGetBlockNumber(ovflbuf);
154150
_hash_wrtbuf(rel,buf);
155151

156152
returnovflbuf;
@@ -159,16 +155,18 @@ _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf)
159155
/*
160156
*_hash_getovflpage()
161157
*
162-
*Find an available overflow page and return its block number.
158+
*Find an available overflow page and return it. The returned buffer
159+
*is pinned and write-locked, but its contents are not initialized.
163160
*
164161
* The caller must hold a pin, but no lock, on the metapage buffer.
165-
*The buffer isreturned in the same state.
162+
*That buffer isleft in the same state at exit.
166163
*/
167-
staticBlockNumber
164+
staticBuffer
168165
_hash_getovflpage(Relationrel,Buffermetabuf)
169166
{
170167
HashMetaPagemetap;
171168
Buffermapbuf=0;
169+
Buffernewbuf;
172170
BlockNumberblkno;
173171
uint32orig_firstfree;
174172
uint32splitnum;
@@ -243,11 +241,10 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
243241
_hash_chgbufaccess(rel,metabuf,HASH_NOLOCK,HASH_WRITE);
244242
}
245243

246-
/* No Free Page Found - have to allocate a new page */
247-
bit=metap->hashm_spares[splitnum];
248-
metap->hashm_spares[splitnum]++;
249-
250-
/* Check if we need to allocate a new bitmap page */
244+
/*
245+
* No free pages --- have to extend the relation to add an overflow page.
246+
* First, check to see if we have to add a new bitmap page too.
247+
*/
251248
if (last_bit== (uint32) (BMPGSZ_BIT(metap)-1))
252249
{
253250
/*
@@ -258,22 +255,39 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
258255
* marked "in use". Subsequent pages do not exist yet, but it is
259256
* convenient to pre-mark them as "in use" too.
260257
*/
261-
_hash_initbitmap(rel,metap,bitno_to_blkno(metap,bit));
262-
263258
bit=metap->hashm_spares[splitnum];
259+
_hash_initbitmap(rel,metap,bitno_to_blkno(metap,bit));
264260
metap->hashm_spares[splitnum]++;
265261
}
266262
else
267263
{
268264
/*
269-
* Nothing to do here; since the pagewaspast the last used page, we
270-
* know its bitmap bit was preinitialized to "in use".
265+
* Nothing to do here; since the pagewill bepast the last used page,
266+
*weknow its bitmap bit was preinitialized to "in use".
271267
*/
272268
}
273269

274270
/* Calculate address of the new overflow page */
271+
bit=metap->hashm_spares[splitnum];
275272
blkno=bitno_to_blkno(metap,bit);
276273

274+
/*
275+
* We have to fetch the page with P_NEW to ensure smgr's idea of the
276+
* relation length stays in sync with ours. XXX It's annoying to do this
277+
* with metapage write lock held; would be better to use a lock that
278+
* doesn't block incoming searches. Best way to fix it would be to stop
279+
* maintaining hashm_spares[hashm_ovflpoint] and rely entirely on the
280+
* smgr relation length to track where new overflow pages come from;
281+
* then we could release the metapage before we do the smgrextend.
282+
* FIXME later (not in beta...)
283+
*/
284+
newbuf=_hash_getbuf(rel,P_NEW,HASH_WRITE);
285+
if (BufferGetBlockNumber(newbuf)!=blkno)
286+
elog(ERROR,"unexpected hash relation size: %u, should be %u",
287+
BufferGetBlockNumber(newbuf),blkno);
288+
289+
metap->hashm_spares[splitnum]++;
290+
277291
/*
278292
* Adjust hashm_firstfree to avoid redundant searches.But don't risk
279293
* changing it if someone moved it while we were searching bitmap pages.
@@ -284,7 +298,7 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
284298
/* Write updated metapage and release lock, but not pin */
285299
_hash_chgbufaccess(rel,metabuf,HASH_WRITE,HASH_NOLOCK);
286300

287-
returnblkno;
301+
returnnewbuf;
288302

289303
found:
290304
/* convert bit to bit number within page */
@@ -300,7 +314,7 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
300314
/* convert bit to absolute bit number */
301315
bit+= (i <<BMPG_SHIFT(metap));
302316

303-
/* Calculate address of thenew overflow page */
317+
/* Calculate address of therecycled overflow page */
304318
blkno=bitno_to_blkno(metap,bit);
305319

306320
/*
@@ -320,7 +334,8 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
320334
_hash_chgbufaccess(rel,metabuf,HASH_READ,HASH_NOLOCK);
321335
}
322336

323-
returnblkno;
337+
/* Fetch and return the recycled page */
338+
return_hash_getbuf(rel,blkno,HASH_WRITE);
324339
}
325340

326341
/*
@@ -388,7 +403,11 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
388403
prevblkno=ovflopaque->hasho_prevblkno;
389404
bucket=ovflopaque->hasho_bucket;
390405

391-
/* Zero the page for debugging's sake; then write and release it */
406+
/*
407+
* Zero the page for debugging's sake; then write and release it.
408+
* (Note: if we failed to zero the page here, we'd have problems
409+
* with the Assert in _hash_pageinit() when the page is reused.)
410+
*/
392411
MemSet(ovflpage,0,BufferGetPageSize(ovflbuf));
393412
_hash_wrtbuf(rel,ovflbuf);
394413

@@ -488,12 +507,19 @@ _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno)
488507
/*
489508
* It is okay to write-lock the new bitmap page while holding metapage
490509
* write lock, because no one else could be contending for the new page.
510+
* Also, the metapage lock makes it safe to extend the index using P_NEW,
511+
* which we want to do to ensure the smgr's idea of the relation size
512+
* stays in step with ours.
491513
*
492514
* There is some loss of concurrency in possibly doing I/O for the new
493515
* page while holding the metapage lock, but this path is taken so seldom
494516
* that it's not worth worrying about.
495517
*/
496-
buf=_hash_getbuf(rel,blkno,HASH_WRITE);
518+
buf=_hash_getbuf(rel,P_NEW,HASH_WRITE);
519+
if (BufferGetBlockNumber(buf)!=blkno)
520+
elog(ERROR,"unexpected hash relation size: %u, should be %u",
521+
BufferGetBlockNumber(buf),blkno);
522+
497523
pg=BufferGetPage(buf);
498524

499525
/* initialize the page */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp