Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commite1a6679

Browse files
committed
Fix O(N^2) behavior in pg_dump when many objects are in dependency loops.
Combining the loop workspace with the record of already-processed objectsmight have been a cute trick, but it behaves horridly if there are manydependency loops to repair: the time spent in the first step of findLoop()grows as O(N^2). Instead use a separate flag array indexed by dump ID,which we can check in constant time. The length of the workspace arrayis now never more than the actual length of a dependency chain, whichshould be reasonably short in all cases of practical interest. The codeis noticeably easier to understand this way, too.Per gripe from Mike Roest. Since this is a longstanding performance bug,backpatch to all supported versions.
1 parentb77da19 commite1a6679

File tree

1 file changed

+59
-60
lines changed

1 file changed

+59
-60
lines changed

‎src/bin/pg_dump/pg_dump_sort.c

Lines changed: 59 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -106,11 +106,11 @@ static bool TopoSort(DumpableObject **objs,
106106
staticvoidaddHeapElement(intval,int*heap,intheapLength);
107107
staticintremoveHeapElement(int*heap,intheapLength);
108108
staticvoidfindDependencyLoops(DumpableObject**objs,intnObjs,inttotObjs);
109-
staticboolfindLoop(DumpableObject*obj,
109+
staticintfindLoop(DumpableObject*obj,
110110
DumpIdstartPoint,
111+
bool*processed,
111112
DumpableObject**workspace,
112-
intdepth,
113-
int*newDepth);
113+
intdepth);
114114
staticvoidrepairDependencyLoop(DumpableObject**loop,
115115
intnLoop);
116116
staticvoiddescribeDumpableObject(DumpableObject*obj,
@@ -500,58 +500,52 @@ static void
500500
findDependencyLoops(DumpableObject**objs,intnObjs,inttotObjs)
501501
{
502502
/*
503-
* We use a workspace array, the initial part of which stores objects
504-
* already processed, and the rest of which is used as temporary space to
505-
* try to build a loop in.This is convenient because we do not care
506-
* about loops involving already-processed objects (see notes above); we
507-
* can easily reject such loops in findLoop() because of this
508-
* representation.After we identify and process a loop, we can add it to
509-
* the initial part of the workspace just by moving the boundary pointer.
510-
*
511-
* When we determine that an object is not part of any interesting loop,
512-
* we also add it to the initial part of the workspace. This is not
513-
* necessary for correctness, but saves later invocations of findLoop()
514-
* from uselessly chasing references to such an object.
515-
*
516-
* We make the workspace large enough to hold all objects in the original
517-
* universe. This is probably overkill, but it's provably enough space...
503+
* We use two data structures here. One is a bool array processed[],
504+
* which is indexed by dump ID and marks the objects already processed
505+
* during this invocation of findDependencyLoops(). The other is a
506+
* workspace[] array of DumpableObject pointers, in which we try to build
507+
* lists of objects constituting loops. We make workspace[] large enough
508+
* to hold all the objects, which is huge overkill in most cases but could
509+
* theoretically be necessary if there is a single dependency chain
510+
* linking all the objects.
518511
*/
512+
bool*processed;
519513
DumpableObject**workspace;
520-
intinitiallen;
521514
boolfixedloop;
522515
inti;
523516

517+
processed= (bool*)calloc(getMaxDumpId()+1,sizeof(bool));
524518
workspace= (DumpableObject**)malloc(totObjs*sizeof(DumpableObject*));
525-
if (workspace==NULL)
519+
if (processed==NULL||workspace==NULL)
526520
exit_horribly(NULL,modulename,"out of memory\n");
527-
initiallen=0;
528521
fixedloop= false;
529522

530523
for (i=0;i<nObjs;i++)
531524
{
532525
DumpableObject*obj=objs[i];
533-
intnewlen;
526+
intlooplen;
527+
intj;
534528

535-
workspace[initiallen]=NULL;/* see test below */
529+
looplen=findLoop(obj,obj->dumpId,processed,workspace,0);
536530

537-
if (findLoop(obj,obj->dumpId,workspace,initiallen,&newlen))
531+
if (looplen>0)
538532
{
539-
/* Found a loop of length newlen - initiallen */
540-
repairDependencyLoop(&workspace[initiallen],newlen-initiallen);
541-
/* Add loop members to workspace */
542-
initiallen=newlen;
533+
/* Found a loop, repair it */
534+
repairDependencyLoop(workspace,looplen);
543535
fixedloop= true;
536+
/* Mark loop members as processed */
537+
for (j=0;j<looplen;j++)
538+
processed[workspace[j]->dumpId]= true;
544539
}
545540
else
546541
{
547542
/*
548-
*Didn't find aloop, but add this object to workspace anyway,
549-
*unless it's already present. We piggyback on the test that
550-
* findLoop()already did: it won't have tentatively added obj to
551-
*workspace if it's already present.
543+
*There's noloop starting at this object, but mark it processed
544+
*anyway. This is not necessary for correctness, but saves later
545+
*invocations offindLoop()from uselessly chasing references to
546+
*such an object.
552547
*/
553-
if (workspace[initiallen]==obj)
554-
initiallen++;
548+
processed[obj->dumpId]= true;
555549
}
556550
}
557551

@@ -560,45 +554,51 @@ findDependencyLoops(DumpableObject **objs, int nObjs, int totObjs)
560554
exit_horribly(NULL,modulename,"could not identify dependency loop\n");
561555

562556
free(workspace);
557+
free(processed);
563558
}
564559

565560
/*
566561
* Recursively search for a circular dependency loop that doesn't include
567-
* anyexisting workspace members.
562+
* anyalready-processed objects.
568563
*
569564
*obj: object we are examining now
570565
*startPoint: dumpId of starting object for the hoped-for circular loop
571-
*workspace[]: work array for previously processed and current objects
566+
*processed[]: flag array marking already-processed objects
567+
*workspace[]: work array in which we are building list of loop members
572568
*depth: number of valid entries in workspace[] at call
573-
*newDepth: if successful, set to new number of workspace[] entries
574569
*
575-
* On success,*newDepthisset and workspace[]entries depth..*newDepth-1
576-
*are filledwith pointers to the members of the loop.
570+
* On success,the length of the loopisreturned, and workspace[]is filled
571+
* with pointers to the members of the loop. On failure, we return 0.
577572
*
578573
* Note: it is possible that the given starting object is a member of more
579574
* than one cycle; if so, we will find an arbitrary one of the cycles.
580575
*/
581-
staticbool
576+
staticint
582577
findLoop(DumpableObject*obj,
583578
DumpIdstartPoint,
579+
bool*processed,
584580
DumpableObject**workspace,
585-
intdepth,
586-
int*newDepth)
581+
intdepth)
587582
{
588583
inti;
589584

590585
/*
591-
* Reject if obj is already present in workspace. This test serves three
592-
* purposes: it prevents us from finding loops that overlap
593-
* previously-processed loops, it prevents us from going into infinite
594-
* recursion if we are given a startPoint object that links to a cycle
595-
* it's not a member of, and it guarantees that we can't overflow the
596-
* allocated size of workspace[].
586+
* Reject if obj is already processed. This test prevents us from finding
587+
* loops that overlap previously-processed loops.
588+
*/
589+
if (processed[obj->dumpId])
590+
return0;
591+
592+
/*
593+
* Reject if obj is already present in workspace. This test prevents us
594+
* from going into infinite recursion if we are given a startPoint object
595+
* that links to a cycle it's not a member of, and it guarantees that we
596+
* can't overflow the allocated size of workspace[].
597597
*/
598598
for (i=0;i<depth;i++)
599599
{
600600
if (workspace[i]==obj)
601-
returnfalse;
601+
return0;
602602
}
603603

604604
/*
@@ -612,10 +612,7 @@ findLoop(DumpableObject *obj,
612612
for (i=0;i<obj->nDeps;i++)
613613
{
614614
if (obj->dependencies[i]==startPoint)
615-
{
616-
*newDepth=depth;
617-
return true;
618-
}
615+
returndepth;
619616
}
620617

621618
/*
@@ -624,18 +621,20 @@ findLoop(DumpableObject *obj,
624621
for (i=0;i<obj->nDeps;i++)
625622
{
626623
DumpableObject*nextobj=findObjectByDumpId(obj->dependencies[i]);
624+
intnewDepth;
627625

628626
if (!nextobj)
629627
continue;/* ignore dependencies on undumped objects */
630-
if (findLoop(nextobj,
631-
startPoint,
632-
workspace,
633-
depth,
634-
newDepth))
635-
return true;
628+
newDepth=findLoop(nextobj,
629+
startPoint,
630+
processed,
631+
workspace,
632+
depth);
633+
if (newDepth>0)
634+
returnnewDepth;
636635
}
637636

638-
returnfalse;
637+
return0;
639638
}
640639

641640
/*

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp