Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitc89bdf7

Browse files
committed
Eliminate some more O(N^2) behaviors in pg_dump/pg_restore.
This patch fixes three places (which AFAICT is all of them) where runtimewas O(N^2) in the number of TOC entries, by using an index array to replacelinear searches of the TOC list. This performance issue is a bit less badthan those recently fixed, because it depends on the number of items dumpednot the number in the source database, so the problem can be dodged bydoing partial dumps.The previous coding already had an instance of one of the two index arraysneeded, but it was only calculated in parallel-restore cases; now we needit all the time. I also chose to move the arrays into the ArchiveHandledata structure, to make this code a bit more ready for the day that wetry to sling multiple ArchiveHandles around in pg_dump or pg_restore.Since we still need some server-side work before pg_dump can really copenicely with tens of thousands of tables, there's probably little point inback-patching.
1 parent2d612ab commitc89bdf7

File tree

2 files changed

+101
-94
lines changed

2 files changed

+101
-94
lines changed

‎src/bin/pg_dump/pg_backup_archiver.c

Lines changed: 96 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -114,19 +114,13 @@ typedef struct _outputContext
114114

115115
staticconstchar*modulename=gettext_noop("archiver");
116116

117-
/* index array created by fix_dependencies -- only used in parallel restore */
118-
staticTocEntry**tocsByDumpId;/* index by dumpId - 1 */
119-
staticDumpIdmaxDumpId;/* length of above array */
120-
121117

122118
staticArchiveHandle*_allocAH(constchar*FileSpec,constArchiveFormatfmt,
123119
constintcompression,ArchiveModemode);
124120
staticvoid_getObjectDescription(PQExpBufferbuf,TocEntry*te,
125121
ArchiveHandle*AH);
126122
staticvoid_printTocEntry(ArchiveHandle*AH,TocEntry*te,RestoreOptions*ropt,boolisData,boolacl_pass);
127123
staticchar*replace_line_endings(constchar*str);
128-
129-
130124
staticvoid_doSetFixedOutputState(ArchiveHandle*AH);
131125
staticvoid_doSetSessionAuth(ArchiveHandle*AH,constchar*user);
132126
staticvoid_doSetWithOids(ArchiveHandle*AH,constboolwithOids);
@@ -141,6 +135,7 @@ static teReqs _tocEntryRequired(TocEntry *te, RestoreOptions *ropt, bool include
141135
staticbool_tocEntryIsACL(TocEntry*te);
142136
staticvoid_disableTriggersIfNecessary(ArchiveHandle*AH,TocEntry*te,RestoreOptions*ropt);
143137
staticvoid_enableTriggersIfNecessary(ArchiveHandle*AH,TocEntry*te,RestoreOptions*ropt);
138+
staticvoidbuildTocEntryArrays(ArchiveHandle*AH);
144139
staticTocEntry*getTocEntryByDumpId(ArchiveHandle*AH,DumpIdid);
145140
staticvoid_moveBefore(ArchiveHandle*AH,TocEntry*pos,TocEntry*te);
146141
staticint_discoverArchiveFormat(ArchiveHandle*AH);
@@ -171,9 +166,8 @@ static void mark_work_done(ArchiveHandle *AH, TocEntry *ready_list,
171166
ParallelSlot*slots,intn_slots);
172167
staticvoidfix_dependencies(ArchiveHandle*AH);
173168
staticboolhas_lock_conflicts(TocEntry*te1,TocEntry*te2);
174-
staticvoidrepoint_table_dependencies(ArchiveHandle*AH,
175-
DumpIdtableId,DumpIdtableDataId);
176-
staticvoididentify_locking_dependencies(TocEntry*te);
169+
staticvoidrepoint_table_dependencies(ArchiveHandle*AH);
170+
staticvoididentify_locking_dependencies(ArchiveHandle*AH,TocEntry*te);
177171
staticvoidreduce_dependencies(ArchiveHandle*AH,TocEntry*te,
178172
TocEntry*ready_list);
179173
staticvoidmark_create_done(ArchiveHandle*AH,TocEntry*te);
@@ -305,6 +299,13 @@ RestoreArchive(Archive *AHX, RestoreOptions *ropt)
305299
}
306300
#endif
307301

302+
/*
303+
* Prepare index arrays, so we can assume we have them throughout restore.
304+
* It's possible we already did this, though.
305+
*/
306+
if (AH->tocsByDumpId==NULL)
307+
buildTocEntryArrays(AH);
308+
308309
/*
309310
* If we're using a DB connection, then connect it.
310311
*/
@@ -1524,16 +1525,68 @@ _moveBefore(ArchiveHandle *AH, TocEntry *pos, TocEntry *te)
15241525
pos->prev=te;
15251526
}
15261527

1527-
staticTocEntry*
1528-
getTocEntryByDumpId(ArchiveHandle*AH,DumpIdid)
1528+
/*
1529+
* Build index arrays for the TOC list
1530+
*
1531+
* This should be invoked only after we have created or read in all the TOC
1532+
* items.
1533+
*
1534+
* The arrays are indexed by dump ID (so entry zero is unused). Note that the
1535+
* array entries run only up to maxDumpId. We might see dependency dump IDs
1536+
* beyond that (if the dump was partial); so always check the array bound
1537+
* before trying to touch an array entry.
1538+
*/
1539+
staticvoid
1540+
buildTocEntryArrays(ArchiveHandle*AH)
15291541
{
1542+
DumpIdmaxDumpId=AH->maxDumpId;
15301543
TocEntry*te;
15311544

1545+
AH->tocsByDumpId= (TocEntry**)pg_calloc(maxDumpId+1,sizeof(TocEntry*));
1546+
AH->tableDataId= (DumpId*)pg_calloc(maxDumpId+1,sizeof(DumpId));
1547+
15321548
for (te=AH->toc->next;te!=AH->toc;te=te->next)
15331549
{
1534-
if (te->dumpId==id)
1535-
returnte;
1550+
/* this check is purely paranoia, maxDumpId should be correct */
1551+
if (te->dumpId <=0||te->dumpId>maxDumpId)
1552+
exit_horribly(modulename,"bad dumpId");
1553+
1554+
/* tocsByDumpId indexes all TOCs by their dump ID */
1555+
AH->tocsByDumpId[te->dumpId]=te;
1556+
1557+
/*
1558+
* tableDataId provides the TABLE DATA item's dump ID for each TABLE
1559+
* TOC entry that has a DATA item. We compute this by reversing the
1560+
* TABLE DATA item's dependency, knowing that a TABLE DATA item has
1561+
* just one dependency and it is the TABLE item.
1562+
*/
1563+
if (strcmp(te->desc,"TABLE DATA")==0&&te->nDeps>0)
1564+
{
1565+
DumpIdtableId=te->dependencies[0];
1566+
1567+
/*
1568+
* The TABLE item might not have been in the archive, if this was
1569+
* a data-only dump; but its dump ID should be less than its data
1570+
* item's dump ID, so there should be a place for it in the array.
1571+
*/
1572+
if (tableId <=0||tableId>maxDumpId)
1573+
exit_horribly(modulename,"bad table dumpId for TABLE DATA item");
1574+
1575+
AH->tableDataId[tableId]=te->dumpId;
1576+
}
15361577
}
1578+
}
1579+
1580+
staticTocEntry*
1581+
getTocEntryByDumpId(ArchiveHandle*AH,DumpIdid)
1582+
{
1583+
/* build index arrays if we didn't already */
1584+
if (AH->tocsByDumpId==NULL)
1585+
buildTocEntryArrays(AH);
1586+
1587+
if (id>0&&id <=AH->maxDumpId)
1588+
returnAH->tocsByDumpId[id];
1589+
15371590
returnNULL;
15381591
}
15391592

@@ -3978,9 +4031,8 @@ mark_work_done(ArchiveHandle *AH, TocEntry *ready_list,
39784031
* This function takes care of fixing up some missing or badly designed
39794032
* dependencies, and then prepares subsidiary data structures that will be
39804033
* used in the main parallel-restore logic, including:
3981-
* 1. We build the tocsByDumpId[] index array.
3982-
* 2. We build the revDeps[] arrays of incoming dependency dumpIds.
3983-
* 3. We set up depCount fields that are the number of as-yet-unprocessed
4034+
* 1. We build the revDeps[] arrays of incoming dependency dumpIds.
4035+
* 2. We set up depCount fields that are the number of as-yet-unprocessed
39844036
* dependencies for each TOC entry.
39854037
*
39864038
* We also identify locking dependencies so that we can avoid trying to
@@ -3993,22 +4045,11 @@ fix_dependencies(ArchiveHandle *AH)
39934045
inti;
39944046

39954047
/*
3996-
* It is convenient to have an array that indexes the TOC entries by dump
3997-
* ID, rather than searching the TOC list repeatedly. Entries for dump
3998-
* IDs not present in the TOC will be NULL.
3999-
*
4000-
* NOTE: because maxDumpId is just the highest dump ID defined in the
4001-
* archive, there might be dependencies for IDs > maxDumpId. All uses of
4002-
* this array must guard against out-of-range dependency numbers.
4003-
*
4004-
* Also, initialize the depCount/revDeps/nRevDeps fields, and make sure
4005-
* the TOC items are marked as not being in any parallel-processing list.
4048+
* Initialize the depCount/revDeps/nRevDeps fields, and make sure the TOC
4049+
* items are marked as not being in any parallel-processing list.
40064050
*/
4007-
maxDumpId=AH->maxDumpId;
4008-
tocsByDumpId= (TocEntry**)pg_calloc(maxDumpId,sizeof(TocEntry*));
40094051
for (te=AH->toc->next;te!=AH->toc;te=te->next)
40104052
{
4011-
tocsByDumpId[te->dumpId-1]=te;
40124053
te->depCount=te->nDeps;
40134054
te->revDeps=NULL;
40144055
te->nRevDeps=0;
@@ -4019,34 +4060,9 @@ fix_dependencies(ArchiveHandle *AH)
40194060
/*
40204061
* POST_DATA items that are shown as depending on a table need to be
40214062
* re-pointed to depend on that table's data, instead. This ensures they
4022-
* won't get scheduled until the data has been loaded. We handle this by
4023-
* first finding TABLE/TABLE DATA pairs and then scanning all the
4024-
* dependencies.
4025-
*
4026-
* Note: currently, a TABLE DATA should always have exactly one
4027-
* dependency, on its TABLE item. So we don't bother to search, but look
4028-
* just at the first dependency. We do trouble to make sure that it's a
4029-
* TABLE, if possible.However, if the dependency isn't in the archive
4030-
* then just assume it was a TABLE; this is to cover cases where the table
4031-
* was suppressed but we have the data and some dependent post-data items.
4032-
*
4033-
* XXX this is O(N^2) if there are a lot of tables. We ought to fix
4034-
* pg_dump to produce correctly-linked dependencies in the first place.
4063+
* won't get scheduled until the data has been loaded.
40354064
*/
4036-
for (te=AH->toc->next;te!=AH->toc;te=te->next)
4037-
{
4038-
if (strcmp(te->desc,"TABLE DATA")==0&&te->nDeps>0)
4039-
{
4040-
DumpIdtableId=te->dependencies[0];
4041-
4042-
if (tableId>maxDumpId||
4043-
tocsByDumpId[tableId-1]==NULL||
4044-
strcmp(tocsByDumpId[tableId-1]->desc,"TABLE")==0)
4045-
{
4046-
repoint_table_dependencies(AH,tableId,te->dumpId);
4047-
}
4048-
}
4049-
}
4065+
repoint_table_dependencies(AH);
40504066

40514067
/*
40524068
* Pre-8.4 versions of pg_dump neglected to set up a dependency from BLOB
@@ -4093,8 +4109,8 @@ fix_dependencies(ArchiveHandle *AH)
40934109
{
40944110
DumpIddepid=te->dependencies[i];
40954111

4096-
if (depid <=maxDumpId&&tocsByDumpId[depid-1]!=NULL)
4097-
tocsByDumpId[depid-1]->nRevDeps++;
4112+
if (depid <=AH->maxDumpId&&AH->tocsByDumpId[depid]!=NULL)
4113+
AH->tocsByDumpId[depid]->nRevDeps++;
40984114
else
40994115
te->depCount--;
41004116
}
@@ -4121,9 +4137,9 @@ fix_dependencies(ArchiveHandle *AH)
41214137
{
41224138
DumpIddepid=te->dependencies[i];
41234139

4124-
if (depid <=maxDumpId&&tocsByDumpId[depid-1]!=NULL)
4140+
if (depid <=AH->maxDumpId&&AH->tocsByDumpId[depid]!=NULL)
41254141
{
4126-
TocEntry*otherte=tocsByDumpId[depid-1];
4142+
TocEntry*otherte=AH->tocsByDumpId[depid];
41274143

41284144
otherte->revDeps[otherte->nRevDeps++]=te->dumpId;
41294145
}
@@ -4137,32 +4153,34 @@ fix_dependencies(ArchiveHandle *AH)
41374153
{
41384154
te->lockDeps=NULL;
41394155
te->nLockDeps=0;
4140-
identify_locking_dependencies(te);
4156+
identify_locking_dependencies(AH,te);
41414157
}
41424158
}
41434159

41444160
/*
4145-
* Change dependencies ontableIdto depend ontableDataId instead,
4161+
* Change dependencies ontable itemsto depend ontable data items instead,
41464162
* but only in POST_DATA items.
41474163
*/
41484164
staticvoid
4149-
repoint_table_dependencies(ArchiveHandle*AH,
4150-
DumpIdtableId,DumpIdtableDataId)
4165+
repoint_table_dependencies(ArchiveHandle*AH)
41514166
{
41524167
TocEntry*te;
41534168
inti;
4169+
DumpIdolddep;
41544170

41554171
for (te=AH->toc->next;te!=AH->toc;te=te->next)
41564172
{
41574173
if (te->section!=SECTION_POST_DATA)
41584174
continue;
41594175
for (i=0;i<te->nDeps;i++)
41604176
{
4161-
if (te->dependencies[i]==tableId)
4177+
olddep=te->dependencies[i];
4178+
if (olddep <=AH->maxDumpId&&
4179+
AH->tableDataId[olddep]!=0)
41624180
{
4163-
te->dependencies[i]=tableDataId;
4181+
te->dependencies[i]=AH->tableDataId[olddep];
41644182
ahlog(AH,2,"transferring dependency %d -> %d to %d\n",
4165-
te->dumpId,tableId,tableDataId);
4183+
te->dumpId,olddep,AH->tableDataId[olddep]);
41664184
}
41674185
}
41684186
}
@@ -4174,7 +4192,7 @@ repoint_table_dependencies(ArchiveHandle *AH,
41744192
* itself). Record their dump IDs in the entry's lockDeps[] array.
41754193
*/
41764194
staticvoid
4177-
identify_locking_dependencies(TocEntry*te)
4195+
identify_locking_dependencies(ArchiveHandle*AH,TocEntry*te)
41784196
{
41794197
DumpId*lockids;
41804198
intnlockids;
@@ -4205,8 +4223,8 @@ identify_locking_dependencies(TocEntry *te)
42054223
{
42064224
DumpIddepid=te->dependencies[i];
42074225

4208-
if (depid <=maxDumpId&&tocsByDumpId[depid-1]&&
4209-
strcmp(tocsByDumpId[depid-1]->desc,"TABLE DATA")==0)
4226+
if (depid <=AH->maxDumpId&&AH->tocsByDumpId[depid]!=NULL&&
4227+
strcmp(AH->tocsByDumpId[depid]->desc,"TABLE DATA")==0)
42104228
lockids[nlockids++]=depid;
42114229
}
42124230

@@ -4234,7 +4252,7 @@ reduce_dependencies(ArchiveHandle *AH, TocEntry *te, TocEntry *ready_list)
42344252

42354253
for (i=0;i<te->nRevDeps;i++)
42364254
{
4237-
TocEntry*otherte=tocsByDumpId[te->revDeps[i]-1];
4255+
TocEntry*otherte=AH->tocsByDumpId[te->revDeps[i]];
42384256

42394257
otherte->depCount--;
42404258
if (otherte->depCount==0&&otherte->par_prev!=NULL)
@@ -4254,18 +4272,11 @@ reduce_dependencies(ArchiveHandle *AH, TocEntry *te, TocEntry *ready_list)
42544272
staticvoid
42554273
mark_create_done(ArchiveHandle*AH,TocEntry*te)
42564274
{
4257-
TocEntry*tes;
4258-
4259-
for (tes=AH->toc->next;tes!=AH->toc;tes=tes->next)
4275+
if (AH->tableDataId[te->dumpId]!=0)
42604276
{
4261-
if (strcmp(tes->desc,"TABLE DATA")==0&&
4262-
strcmp(tes->tag,te->tag)==0&&
4263-
strcmp(tes->namespace ?tes->namespace :"",
4264-
te->namespace ?te->namespace :"")==0)
4265-
{
4266-
tes->created= true;
4267-
break;
4268-
}
4277+
TocEntry*ted=AH->tocsByDumpId[AH->tableDataId[te->dumpId]];
4278+
4279+
ted->created= true;
42694280
}
42704281
}
42714282

@@ -4277,22 +4288,14 @@ static void
42774288
inhibit_data_for_failed_table(ArchiveHandle*AH,TocEntry*te)
42784289
{
42794290
RestoreOptions*ropt=AH->ropt;
4280-
TocEntry*tes;
42814291

42824292
ahlog(AH,1,"table \"%s\" could not be created, will not restore its data\n",
42834293
te->tag);
42844294

4285-
for (tes=AH->toc->next;tes!=AH->toc;tes=tes->next)
4295+
if (AH->tableDataId[te->dumpId]!=0)
42864296
{
4287-
if (strcmp(tes->desc,"TABLE DATA")==0&&
4288-
strcmp(tes->tag,te->tag)==0&&
4289-
strcmp(tes->namespace ?tes->namespace :"",
4290-
te->namespace ?te->namespace :"")==0)
4291-
{
4292-
/* mark it unwanted; we assume idWanted array already exists */
4293-
ropt->idWanted[tes->dumpId-1]= false;
4294-
break;
4295-
}
4297+
/* mark it unwanted; we assume idWanted array already exists */
4298+
ropt->idWanted[AH->tableDataId[te->dumpId]-1]= false;
42964299
}
42974300
}
42984301

‎src/bin/pg_dump/pg_backup_archiver.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,10 +251,14 @@ typedef struct _archiveHandle
251251
void*OF;
252252
intgzOut;/* Output file */
253253

254-
struct_tocEntry*toc;/*List of TOC entries */
254+
struct_tocEntry*toc;/*Header of circular list of TOC entries */
255255
inttocCount;/* Number of TOC entries */
256256
DumpIdmaxDumpId;/* largest DumpId among all TOC entries */
257257

258+
/* arrays created after the TOC list is complete: */
259+
struct_tocEntry**tocsByDumpId;/* TOCs indexed by dumpId */
260+
DumpId*tableDataId;/* TABLE DATA ids, indexed by table dumpId */
261+
258262
struct_tocEntry*currToc;/* Used when dumping data */
259263
intcompression;/* Compression requested on open Possible
260264
* values for compression: -1

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp