Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitc02fdc9

Browse files
committed
Logical Tape Set: use min heap for freelist.
Previously, the freelist of blocks was tracked as anoccasionally-sorted array. A min heap is more resilient to largerfreelists or more frequent changes between reading and writing.Discussion:https://postgr.es/m/97c46a59c27f3c38e486ca170fcbc618d97ab049.camel%40j-davis.com
1 parentcac8ce4 commitc02fdc9

File tree

1 file changed

+104
-56
lines changed

1 file changed

+104
-56
lines changed

‎src/backend/utils/sort/logtape.c

Lines changed: 104 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,8 @@
4949
* when reading, and read multiple blocks from the same tape in one go,
5050
* whenever the buffer becomes empty.
5151
*
52-
* To support the above policy of writing to the lowest free block,
53-
* ltsGetFreeBlock sorts the list of free block numbers into decreasing
54-
* order each time it is asked for a block and the list isn't currently
55-
* sorted. This is an efficient way to handle it because we expect cycles
56-
* of releasing many blocks followed by re-using many blocks, due to
57-
* the larger read buffer.
52+
* To support the above policy of writing to the lowest free block, the
53+
* freelist is a min heap.
5854
*
5955
* Since all the bookkeeping and buffer memory is allocated with palloc(),
6056
* and the underlying file(s) are made with OpenTemporaryFile, all resources
@@ -170,7 +166,7 @@ struct LogicalTapeSet
170166
/*
171167
* File size tracking. nBlocksWritten is the size of the underlying file,
172168
* in BLCKSZ blocks. nBlocksAllocated is the number of blocks allocated
173-
* byltsGetFreeBlock(), and it is always greater than or equal to
169+
* byltsReleaseBlock(), and it is always greater than or equal to
174170
* nBlocksWritten. Blocks between nBlocksAllocated and nBlocksWritten are
175171
* blocks that have been allocated for a tape, but have not been written
176172
* to the underlying file yet. nHoleBlocks tracks the total number of
@@ -188,17 +184,11 @@ struct LogicalTapeSet
188184
* If forgetFreeSpace is true then any freed blocks are simply forgotten
189185
* rather than being remembered in freeBlocks[]. See notes for
190186
* LogicalTapeSetForgetFreeSpace().
191-
*
192-
* If blocksSorted is true then the block numbers in freeBlocks are in
193-
* *decreasing* order, so that removing the last entry gives us the lowest
194-
* free block. We re-sort the blocks whenever a block is demanded; this
195-
* should be reasonably efficient given the expected usage pattern.
196187
*/
197188
boolforgetFreeSpace;/* are we remembering free blocks? */
198-
boolblocksSorted;/* is freeBlocks[] currently in order? */
199-
long*freeBlocks;/* resizable array */
200-
intnFreeBlocks;/* # of currently free blocks */
201-
intfreeBlocksLen;/* current allocated length of freeBlocks[] */
189+
long*freeBlocks;/* resizable array holding minheap */
190+
longnFreeBlocks;/* # of currently free blocks */
191+
SizefreeBlocksLen;/* current allocated length of freeBlocks[] */
202192

203193
/* The array of logical tapes. */
204194
intnTapes;/* # of logical tapes in set */
@@ -321,46 +311,88 @@ ltsReadFillBuffer(LogicalTapeSet *lts, LogicalTape *lt)
321311
return (lt->nbytes>0);
322312
}
323313

324-
/*
325-
* qsort comparator for sorting freeBlocks[] into decreasing order.
326-
*/
327-
staticint
328-
freeBlocks_cmp(constvoid*a,constvoid*b)
314+
staticinlinevoid
315+
swap_nodes(long*heap,unsigned longa,unsigned longb)
316+
{
317+
unsigned longswap;
318+
319+
swap=heap[a];
320+
heap[a]=heap[b];
321+
heap[b]=swap;
322+
}
323+
324+
staticinlineunsigned long
325+
left_offset(unsigned longi)
326+
{
327+
return2*i+1;
328+
}
329+
330+
staticinlineunsigned long
331+
right_offset(unsignedi)
332+
{
333+
return2*i+2;
334+
}
335+
336+
staticinlineunsigned long
337+
parent_offset(unsigned longi)
329338
{
330-
longablk=*((constlong*)a);
331-
longbblk=*((constlong*)b);
332-
333-
/* can't just subtract because long might be wider than int */
334-
if (ablk<bblk)
335-
return1;
336-
if (ablk>bblk)
337-
return-1;
338-
return0;
339+
return (i-1) /2;
339340
}
340341

341342
/*
342-
* Select a currently unused block for writing to.
343+
* Select the lowest currently unused block by taking the first element from
344+
* the freelist min heap.
343345
*/
344346
staticlong
345347
ltsGetFreeBlock(LogicalTapeSet*lts)
346348
{
347-
/*
348-
* If there are multiple free blocks, we select the one appearing last in
349-
* freeBlocks[] (after sorting the array if needed). If there are none,
350-
* assign the next block at the end of the file.
351-
*/
352-
if (lts->nFreeBlocks>0)
349+
long*heap=lts->freeBlocks;
350+
longblocknum;
351+
intheapsize;
352+
unsigned longpos;
353+
354+
/* freelist empty; allocate a new block */
355+
if (lts->nFreeBlocks==0)
356+
returnlts->nBlocksAllocated++;
357+
358+
if (lts->nFreeBlocks==1)
353359
{
354-
if (!lts->blocksSorted)
355-
{
356-
qsort((void*)lts->freeBlocks,lts->nFreeBlocks,
357-
sizeof(long),freeBlocks_cmp);
358-
lts->blocksSorted= true;
359-
}
360-
returnlts->freeBlocks[--lts->nFreeBlocks];
360+
lts->nFreeBlocks--;
361+
returnlts->freeBlocks[0];
361362
}
362-
else
363-
returnlts->nBlocksAllocated++;
363+
364+
/* take top of minheap */
365+
blocknum=heap[0];
366+
367+
/* replace with end of minheap array */
368+
heap[0]=heap[--lts->nFreeBlocks];
369+
370+
/* sift down */
371+
pos=0;
372+
heapsize=lts->nFreeBlocks;
373+
while (true)
374+
{
375+
unsigned longleft=left_offset(pos);
376+
unsigned longright=right_offset(pos);
377+
unsigned longmin_child;
378+
379+
if (left<heapsize&&right<heapsize)
380+
min_child= (heap[left]<heap[right]) ?left :right;
381+
elseif (left<heapsize)
382+
min_child=left;
383+
elseif (right<heapsize)
384+
min_child=right;
385+
else
386+
break;
387+
388+
if (heap[min_child] >=heap[pos])
389+
break;
390+
391+
swap_nodes(heap,min_child,pos);
392+
pos=min_child;
393+
}
394+
395+
returnblocknum;
364396
}
365397

366398
/*
@@ -369,7 +401,8 @@ ltsGetFreeBlock(LogicalTapeSet *lts)
369401
staticvoid
370402
ltsReleaseBlock(LogicalTapeSet*lts,longblocknum)
371403
{
372-
intndx;
404+
long*heap;
405+
unsigned longpos;
373406

374407
/*
375408
* Do nothing if we're no longer interested in remembering free space.
@@ -382,19 +415,35 @@ ltsReleaseBlock(LogicalTapeSet *lts, long blocknum)
382415
*/
383416
if (lts->nFreeBlocks >=lts->freeBlocksLen)
384417
{
418+
/*
419+
* If the freelist becomes very large, just return and leak this free
420+
* block.
421+
*/
422+
if (lts->freeBlocksLen*2>MaxAllocSize)
423+
return;
424+
385425
lts->freeBlocksLen *=2;
386426
lts->freeBlocks= (long*)repalloc(lts->freeBlocks,
387427
lts->freeBlocksLen*sizeof(long));
388428
}
389429

390-
/*
391-
* Add blocknum to array, and mark the array unsorted if it's no longer in
392-
* decreasing order.
393-
*/
394-
ndx=lts->nFreeBlocks++;
395-
lts->freeBlocks[ndx]=blocknum;
396-
if (ndx>0&&lts->freeBlocks[ndx-1]<blocknum)
397-
lts->blocksSorted= false;
430+
heap=lts->freeBlocks;
431+
pos=lts->nFreeBlocks;
432+
433+
/* place entry at end of minheap array */
434+
heap[pos]=blocknum;
435+
lts->nFreeBlocks++;
436+
437+
/* sift up */
438+
while (pos!=0)
439+
{
440+
unsigned longparent=parent_offset(pos);
441+
if (heap[parent]<heap[pos])
442+
break;
443+
444+
swap_nodes(heap,parent,pos);
445+
pos=parent;
446+
}
398447
}
399448

400449
/*
@@ -524,7 +573,6 @@ LogicalTapeSetCreate(int ntapes, TapeShare *shared, SharedFileSet *fileset,
524573
lts->nBlocksWritten=0L;
525574
lts->nHoleBlocks=0L;
526575
lts->forgetFreeSpace= false;
527-
lts->blocksSorted= true;/* a zero-length array is sorted ... */
528576
lts->freeBlocksLen=32;/* reasonable initial guess */
529577
lts->freeBlocks= (long*)palloc(lts->freeBlocksLen*sizeof(long));
530578
lts->nFreeBlocks=0;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp