Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit1aba62e

Browse files
committed
Allow per-tablespace effective_io_concurrency
Per discussion, nowadays it is possible to have tablespaces that havewildly different I/O characteristics from others. Setting differenteffective_io_concurrency parameters for those has been measured toimprove performance.Author: Julien RouhaudReviewed by: Andres Freund
1 parent665a00c commit1aba62e

File tree

12 files changed

+145
-63
lines changed

12 files changed

+145
-63
lines changed

‎doc/src/sgml/config.sgml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1901,7 +1901,10 @@ include_dir 'conf.d'
19011901
</para>
19021902

19031903
<para>
1904-
The default is 1 on supported systems, otherwise 0.
1904+
The default is 1 on supported systems, otherwise 0. This value can
1905+
be overriden for tables in a particular tablespace by setting the
1906+
tablespace parameter of the same name (see
1907+
<xref linkend="sql-altertablespace">).
19051908
</para>
19061909
</listitem>
19071910
</varlistentry>

‎doc/src/sgml/ref/create_tablespace.sgml

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -104,14 +104,15 @@ CREATE TABLESPACE <replaceable class="parameter">tablespace_name</replaceable>
104104
<listitem>
105105
<para>
106106
A tablespace parameter to be set or reset. Currently, the only
107-
available parameters are <varname>seq_page_cost</> and
108-
<varname>random_page_cost</>. Setting either value for a particular
109-
tablespace will override the planner's usual estimate of the cost of
110-
reading pages from tables in that tablespace, as established by
111-
the configuration parameters of the same name (see
112-
<xref linkend="guc-seq-page-cost">,
113-
<xref linkend="guc-random-page-cost">). This may be useful if one
114-
tablespace is located on a disk which is faster or slower than the
107+
available parameters are <varname>seq_page_cost</>,
108+
<varname>random_page_cost</> and <varname>effective_io_concurrency</>.
109+
Setting either value for a particular tablespace will override the
110+
planner's usual estimate of the cost of reading pages from tables in
111+
that tablespace, as established by the configuration parameters of the
112+
same name (see <xref linkend="guc-seq-page-cost">,
113+
<xref linkend="guc-random-page-cost">,
114+
<xref linkend="guc-effective-io-concurrency">). This may be useful if
115+
one tablespace is located on a disk which is faster or slower than the
115116
remainder of the I/O subsystem.
116117
</para>
117118
</listitem>

‎src/backend/access/common/reloptions.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,19 @@ static relopt_int intRelOpts[] =
254254
},
255255
-1,64,MAX_KILOBYTES
256256
},
257+
{
258+
{
259+
"effective_io_concurrency",
260+
"Number of simultaneous requests that can be handled efficiently by the disk subsystem.",
261+
RELOPT_KIND_TABLESPACE,
262+
AccessExclusiveLock
263+
},
264+
#ifdefUSE_PREFETCH
265+
-1,0,MAX_IO_CONCURRENCY
266+
#else
267+
0,0,0
268+
#endif
269+
},
257270

258271
/* list terminator */
259272
{{NULL}}
@@ -1438,7 +1451,8 @@ tablespace_reloptions(Datum reloptions, bool validate)
14381451
intnumoptions;
14391452
staticconstrelopt_parse_elttab[]= {
14401453
{"random_page_cost",RELOPT_TYPE_REAL, offsetof(TableSpaceOpts,random_page_cost)},
1441-
{"seq_page_cost",RELOPT_TYPE_REAL, offsetof(TableSpaceOpts,seq_page_cost)}
1454+
{"seq_page_cost",RELOPT_TYPE_REAL, offsetof(TableSpaceOpts,seq_page_cost)},
1455+
{"effective_io_concurrency",RELOPT_TYPE_INT, offsetof(TableSpaceOpts,effective_io_concurrency)}
14421456
};
14431457

14441458
options=parseRelOptions(reloptions,validate,RELOPT_KIND_TABLESPACE,

‎src/backend/executor/nodeBitmapHeapscan.c

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include"storage/predicate.h"
4545
#include"utils/memutils.h"
4646
#include"utils/rel.h"
47+
#include"utils/spccache.h"
4748
#include"utils/snapmgr.h"
4849
#include"utils/tqual.h"
4950

@@ -95,9 +96,8 @@ BitmapHeapNext(BitmapHeapScanState *node)
9596
* prefetching. node->prefetch_pages tracks exactly how many pages ahead
9697
* the prefetch iterator is. Also, node->prefetch_target tracks the
9798
* desired prefetch distance, which starts small and increases up to the
98-
* GUC-controlled maximum, target_prefetch_pages. This is to avoid doing
99-
* a lot of prefetching in a scan that stops after a few tuples because of
100-
* a LIMIT.
99+
* node->prefetch_maximum. This is to avoid doing a lot of prefetching in
100+
* a scan that stops after a few tuples because of a LIMIT.
101101
*/
102102
if (tbm==NULL)
103103
{
@@ -111,7 +111,7 @@ BitmapHeapNext(BitmapHeapScanState *node)
111111
node->tbmres=tbmres=NULL;
112112

113113
#ifdefUSE_PREFETCH
114-
if (target_prefetch_pages>0)
114+
if (node->prefetch_maximum>0)
115115
{
116116
node->prefetch_iterator=prefetch_iterator=tbm_begin_iterate(tbm);
117117
node->prefetch_pages=0;
@@ -188,10 +188,10 @@ BitmapHeapNext(BitmapHeapScanState *node)
188188
* page/tuple, then to one after the second tuple is fetched, then
189189
* it doubles as later pages are fetched.
190190
*/
191-
if (node->prefetch_target >=target_prefetch_pages)
191+
if (node->prefetch_target >=node->prefetch_maximum)
192192
/* don't increase any further */ ;
193-
elseif (node->prefetch_target >=target_prefetch_pages /2)
194-
node->prefetch_target=target_prefetch_pages;
193+
elseif (node->prefetch_target >=node->prefetch_maximum /2)
194+
node->prefetch_target=node->prefetch_maximum;
195195
elseif (node->prefetch_target>0)
196196
node->prefetch_target *=2;
197197
else
@@ -211,7 +211,7 @@ BitmapHeapNext(BitmapHeapScanState *node)
211211
* Try to prefetch at least a few pages even before we get to the
212212
* second page if we don't stop reading after the first tuple.
213213
*/
214-
if (node->prefetch_target<target_prefetch_pages)
214+
if (node->prefetch_target<node->prefetch_maximum)
215215
node->prefetch_target++;
216216
#endif/* USE_PREFETCH */
217217
}
@@ -539,6 +539,7 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
539539
{
540540
BitmapHeapScanState*scanstate;
541541
RelationcurrentRelation;
542+
intio_concurrency;
542543

543544
/* check for unsupported flags */
544545
Assert(!(eflags& (EXEC_FLAG_BACKWARD |EXEC_FLAG_MARK)));
@@ -564,6 +565,8 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
564565
scanstate->prefetch_iterator=NULL;
565566
scanstate->prefetch_pages=0;
566567
scanstate->prefetch_target=0;
568+
/* may be updated below */
569+
scanstate->prefetch_maximum=target_prefetch_pages;
567570

568571
/*
569572
* Miscellaneous initialization
@@ -598,6 +601,22 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
598601
*/
599602
currentRelation=ExecOpenScanRelation(estate,node->scan.scanrelid,eflags);
600603

604+
/*
605+
* Determine the maximum for prefetch_target. If the tablespace has a
606+
* specific IO concurrency set, use that to compute the corresponding
607+
* maximum value; otherwise, we already initialized to the value computed
608+
* by the GUC machinery.
609+
*/
610+
io_concurrency=
611+
get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
612+
if (io_concurrency!=effective_io_concurrency)
613+
{
614+
doublemaximum;
615+
616+
if (ComputeIoConcurrency(io_concurrency,&maximum))
617+
scanstate->prefetch_maximum=rint(maximum);
618+
}
619+
601620
scanstate->ss.ss_currentRelation=currentRelation;
602621

603622
/*

‎src/backend/storage/buffer/bufmgr.c

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,14 @@ boolzero_damaged_pages = false;
8080
intbgwriter_lru_maxpages=100;
8181
doublebgwriter_lru_multiplier=2.0;
8282
booltrack_io_timing= false;
83+
inteffective_io_concurrency=0;
8384

8485
/*
8586
* How many buffers PrefetchBuffer callers should try to stay ahead of their
8687
* ReadBuffer calls by. This is maintained by the assign hook for
87-
* effective_io_concurrency. Zero means "never prefetch".
88+
* effective_io_concurrency. Zero means "never prefetch". This value is
89+
* only used for buffers not belonging to tablespaces that have their
90+
* effective_io_concurrency parameter set.
8891
*/
8992
inttarget_prefetch_pages=0;
9093

@@ -415,6 +418,64 @@ static void CheckForBufferLeaks(void);
415418
staticintrnode_comparator(constvoid*p1,constvoid*p2);
416419

417420

421+
/*
422+
* ComputeIoConcurrency -- get the number of pages to prefetch for a given
423+
*number of spindles.
424+
*/
425+
bool
426+
ComputeIoConcurrency(intio_concurrency,double*target)
427+
{
428+
doublenew_prefetch_pages=0.0;
429+
inti;
430+
431+
/*
432+
* Make sure the io_concurrency value is within valid range; it may have
433+
* been forced with a manual pg_tablespace update.
434+
*/
435+
io_concurrency=Min(Max(io_concurrency,0),MAX_IO_CONCURRENCY);
436+
437+
/*----------
438+
* The user-visible GUC parameter is the number of drives (spindles),
439+
* which we need to translate to a number-of-pages-to-prefetch target.
440+
* The target value is stashed in *extra and then assigned to the actual
441+
* variable by assign_effective_io_concurrency.
442+
*
443+
* The expected number of prefetch pages needed to keep N drives busy is:
444+
*
445+
* drives | I/O requests
446+
* -------+----------------
447+
*1 | 1
448+
*2 | 2/1 + 2/2 = 3
449+
*3 | 3/1 + 3/2 + 3/3 = 5 1/2
450+
*4 | 4/1 + 4/2 + 4/3 + 4/4 = 8 1/3
451+
*n | n * H(n)
452+
*
453+
* This is called the "coupon collector problem" and H(n) is called the
454+
* harmonic series. This could be approximated by n * ln(n), but for
455+
* reasonable numbers of drives we might as well just compute the series.
456+
*
457+
* Alternatively we could set the target to the number of pages necessary
458+
* so that the expected number of active spindles is some arbitrary
459+
* percentage of the total. This sounds the same but is actually slightly
460+
* different. The result ends up being ln(1-P)/ln((n-1)/n) where P is
461+
* that desired fraction.
462+
*
463+
* Experimental results show that both of these formulas aren't aggressive
464+
* enough, but we don't really have any better proposals.
465+
*
466+
* Note that if io_concurrency = 0 (disabled), we must set target = 0.
467+
*----------
468+
*/
469+
470+
for (i=1;i <=io_concurrency;i++)
471+
new_prefetch_pages+= (double)io_concurrency / (double)i;
472+
473+
*target=new_prefetch_pages;
474+
475+
/* This range check shouldn't fail, but let's be paranoid */
476+
return (new_prefetch_pages>0.0&&new_prefetch_pages< (double)INT_MAX);
477+
}
478+
418479
/*
419480
* PrefetchBuffer -- initiate asynchronous read of a block of a relation
420481
*

‎src/backend/utils/cache/spccache.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include"commands/tablespace.h"
2424
#include"miscadmin.h"
2525
#include"optimizer/cost.h"
26+
#include"storage/bufmgr.h"
2627
#include"utils/catcache.h"
2728
#include"utils/hsearch.h"
2829
#include"utils/inval.h"
@@ -198,3 +199,14 @@ get_tablespace_page_costs(Oid spcid,
198199
*spc_seq_page_cost=spc->opts->seq_page_cost;
199200
}
200201
}
202+
203+
int
204+
get_tablespace_io_concurrency(Oidspcid)
205+
{
206+
TableSpaceCacheEntry*spc=get_tablespace(spcid);
207+
208+
if (!spc->opts||spc->opts->effective_io_concurrency<0)
209+
returneffective_io_concurrency;
210+
else
211+
returnspc->opts->effective_io_concurrency;
212+
}

‎src/backend/utils/misc/guc.c

Lines changed: 3 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,6 @@ static intwal_block_size;
490490
staticbooldata_checksums;
491491
staticintwal_segment_size;
492492
staticboolinteger_datetimes;
493-
staticinteffective_io_concurrency;
494493
staticboolassert_enabled;
495494

496495
/* should be static, but commands/variable.c needs to get at this */
@@ -2352,7 +2351,7 @@ static struct config_int ConfigureNamesInt[] =
23522351
},
23532352
&effective_io_concurrency,
23542353
#ifdefUSE_PREFETCH
2355-
1,0,1000,
2354+
1,0,MAX_IO_CONCURRENCY,
23562355
#else
23572356
0,0,0,
23582357
#endif
@@ -9986,47 +9985,9 @@ static bool
99869985
check_effective_io_concurrency(int*newval,void**extra,GucSourcesource)
99879986
{
99889987
#ifdefUSE_PREFETCH
9989-
doublenew_prefetch_pages=0.0;
9990-
inti;
9991-
9992-
/*----------
9993-
* The user-visible GUC parameter is the number of drives (spindles),
9994-
* which we need to translate to a number-of-pages-to-prefetch target.
9995-
* The target value is stashed in *extra and then assigned to the actual
9996-
* variable by assign_effective_io_concurrency.
9997-
*
9998-
* The expected number of prefetch pages needed to keep N drives busy is:
9999-
*
10000-
* drives | I/O requests
10001-
* -------+----------------
10002-
*1 | 1
10003-
*2 | 2/1 + 2/2 = 3
10004-
*3 | 3/1 + 3/2 + 3/3 = 5 1/2
10005-
*4 | 4/1 + 4/2 + 4/3 + 4/4 = 8 1/3
10006-
*n | n * H(n)
10007-
*
10008-
* This is called the "coupon collector problem" and H(n) is called the
10009-
* harmonic series. This could be approximated by n * ln(n), but for
10010-
* reasonable numbers of drives we might as well just compute the series.
10011-
*
10012-
* Alternatively we could set the target to the number of pages necessary
10013-
* so that the expected number of active spindles is some arbitrary
10014-
* percentage of the total. This sounds the same but is actually slightly
10015-
* different. The result ends up being ln(1-P)/ln((n-1)/n) where P is
10016-
* that desired fraction.
10017-
*
10018-
* Experimental results show that both of these formulas aren't aggressive
10019-
* enough, but we don't really have any better proposals.
10020-
*
10021-
* Note that if *newval = 0 (disabled), we must set target = 0.
10022-
*----------
10023-
*/
10024-
10025-
for (i=1;i <=*newval;i++)
10026-
new_prefetch_pages+= (double)*newval / (double)i;
9988+
doublenew_prefetch_pages;
100279989

10028-
/* This range check shouldn't fail, but let's be paranoid */
10029-
if (new_prefetch_pages >=0.0&&new_prefetch_pages< (double)INT_MAX)
9990+
if (ComputeIoConcurrency(*newval,&new_prefetch_pages))
100309991
{
100319992
int*myextra= (int*)guc_malloc(ERROR,sizeof(int));
100329993

‎src/bin/psql/tab-complete.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1885,7 +1885,7 @@ psql_completion(const char *text, int start, int end)
18851885
pg_strcasecmp(prev_wd,"(")==0)
18861886
{
18871887
staticconstchar*constlist_TABLESPACEOPTIONS[]=
1888-
{"seq_page_cost","random_page_cost",NULL};
1888+
{"seq_page_cost","random_page_cost","effective_io_concurrency",NULL};
18891889

18901890
COMPLETE_WITH_LIST(list_TABLESPACEOPTIONS);
18911891
}

‎src/include/commands/tablespace.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ typedef struct TableSpaceOpts
3939
int32vl_len_;/* varlena header (do not touch directly!) */
4040
float8random_page_cost;
4141
float8seq_page_cost;
42+
inteffective_io_concurrency;
4243
}TableSpaceOpts;
4344

4445
externOidCreateTableSpace(CreateTableSpaceStmt*stmt);

‎src/include/nodes/execnodes.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1424,7 +1424,8 @@ typedef struct BitmapIndexScanState
14241424
*lossy_pages total number of lossy pages retrieved
14251425
*prefetch_iterator iterator for prefetching ahead of current page
14261426
*prefetch_pages # pages prefetch iterator is ahead of current
1427-
*prefetch_target target prefetch distance
1427+
*prefetch_target current target prefetch distance
1428+
*prefetch_maximum maximum value for prefetch_target
14281429
* ----------------
14291430
*/
14301431
typedefstructBitmapHeapScanState
@@ -1439,6 +1440,7 @@ typedef struct BitmapHeapScanState
14391440
TBMIterator*prefetch_iterator;
14401441
intprefetch_pages;
14411442
intprefetch_target;
1443+
intprefetch_maximum;
14421444
}BitmapHeapScanState;
14431445

14441446
/* ----------------

‎src/include/storage/bufmgr.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,17 @@ extern inttarget_prefetch_pages;
5858
/* in buf_init.c */
5959
externPGDLLIMPORTchar*BufferBlocks;
6060

61+
/* in guc.c */
62+
externinteffective_io_concurrency;
63+
6164
/* in localbuf.c */
6265
externPGDLLIMPORTintNLocBuffer;
6366
externPGDLLIMPORTBlock*LocalBufferBlockPointers;
6467
externPGDLLIMPORTint32*LocalRefCount;
6568

69+
/* upper limit for effective_io_concurrency */
70+
#defineMAX_IO_CONCURRENCY 1000
71+
6672
/* special block number for ReadBuffer() */
6773
#defineP_NEWInvalidBlockNumber/* grow the file to get a new page */
6874

@@ -144,6 +150,7 @@ extern PGDLLIMPORT int32 *LocalRefCount;
144150
/*
145151
* prototypes for functions in bufmgr.c
146152
*/
153+
externboolComputeIoConcurrency(intio_concurrency,double*target);
147154
externvoidPrefetchBuffer(Relationreln,ForkNumberforkNum,
148155
BlockNumberblockNum);
149156
externBufferReadBuffer(Relationreln,BlockNumberblockNum);

‎src/include/utils/spccache.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,6 @@
1515

1616
voidget_tablespace_page_costs(Oidspcid,float8*spc_random_page_cost,
1717
float8*spc_seq_page_cost);
18+
intget_tablespace_io_concurrency(Oidspcid);
1819

1920
#endif/* SPCCACHE_H */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp