Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitd5881c0

Browse files
committed
Fix O(N^2) behavior in pg_dump when many objects are in dependency loops.
Combining the loop workspace with the record of already-processed objectsmight have been a cute trick, but it behaves horridly if there are manydependency loops to repair: the time spent in the first step of findLoop()grows as O(N^2). Instead use a separate flag array indexed by dump ID,which we can check in constant time. The length of the workspace arrayis now never more than the actual length of a dependency chain, whichshould be reasonably short in all cases of practical interest. The codeis noticeably easier to understand this way, too.Per gripe from Mike Roest. Since this is a longstanding performance bug,backpatch to all supported versions.
1 parent0d8117a commitd5881c0

File tree

1 file changed

+58
-59
lines changed

1 file changed

+58
-59
lines changed

‎src/bin/pg_dump/pg_dump_sort.c

Lines changed: 58 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -111,11 +111,11 @@ static bool TopoSort(DumpableObject **objs,
111111
staticvoidaddHeapElement(intval,int*heap,intheapLength);
112112
staticintremoveHeapElement(int*heap,intheapLength);
113113
staticvoidfindDependencyLoops(DumpableObject**objs,intnObjs,inttotObjs);
114-
staticboolfindLoop(DumpableObject*obj,
114+
staticintfindLoop(DumpableObject*obj,
115115
DumpIdstartPoint,
116+
bool*processed,
116117
DumpableObject**workspace,
117-
intdepth,
118-
int*newDepth);
118+
intdepth);
119119
staticvoidrepairDependencyLoop(DumpableObject**loop,
120120
intnLoop);
121121
staticvoiddescribeDumpableObject(DumpableObject*obj,
@@ -506,56 +506,50 @@ static void
506506
findDependencyLoops(DumpableObject**objs,intnObjs,inttotObjs)
507507
{
508508
/*
509-
* We use a workspace array, the initial part of which stores objects
510-
* already processed, and the rest of which is used as temporary space to
511-
* try to build a loop in.This is convenient because we do not care
512-
* about loops involving already-processed objects (see notes above); we
513-
* can easily reject such loops in findLoop() because of this
514-
* representation.After we identify and process a loop, we can add it to
515-
* the initial part of the workspace just by moving the boundary pointer.
516-
*
517-
* When we determine that an object is not part of any interesting loop,
518-
* we also add it to the initial part of the workspace. This is not
519-
* necessary for correctness, but saves later invocations of findLoop()
520-
* from uselessly chasing references to such an object.
521-
*
522-
* We make the workspace large enough to hold all objects in the original
523-
* universe. This is probably overkill, but it's provably enough space...
509+
* We use two data structures here. One is a bool array processed[],
510+
* which is indexed by dump ID and marks the objects already processed
511+
* during this invocation of findDependencyLoops(). The other is a
512+
* workspace[] array of DumpableObject pointers, in which we try to build
513+
* lists of objects constituting loops. We make workspace[] large enough
514+
* to hold all the objects, which is huge overkill in most cases but could
515+
* theoretically be necessary if there is a single dependency chain
516+
* linking all the objects.
524517
*/
518+
bool*processed;
525519
DumpableObject**workspace;
526-
intinitiallen;
527520
boolfixedloop;
528521
inti;
529522

523+
processed= (bool*)pg_calloc(getMaxDumpId()+1,sizeof(bool));
530524
workspace= (DumpableObject**)pg_malloc(totObjs*sizeof(DumpableObject*));
531-
initiallen=0;
532525
fixedloop= false;
533526

534527
for (i=0;i<nObjs;i++)
535528
{
536529
DumpableObject*obj=objs[i];
537-
intnewlen;
530+
intlooplen;
531+
intj;
538532

539-
workspace[initiallen]=NULL;/* see test below */
533+
looplen=findLoop(obj,obj->dumpId,processed,workspace,0);
540534

541-
if (findLoop(obj,obj->dumpId,workspace,initiallen,&newlen))
535+
if (looplen>0)
542536
{
543-
/* Found a loop of length newlen - initiallen */
544-
repairDependencyLoop(&workspace[initiallen],newlen-initiallen);
545-
/* Add loop members to workspace */
546-
initiallen=newlen;
537+
/* Found a loop, repair it */
538+
repairDependencyLoop(workspace,looplen);
547539
fixedloop= true;
540+
/* Mark loop members as processed */
541+
for (j=0;j<looplen;j++)
542+
processed[workspace[j]->dumpId]= true;
548543
}
549544
else
550545
{
551546
/*
552-
*Didn't find aloop, but add this object to workspace anyway,
553-
*unless it's already present. We piggyback on the test that
554-
* findLoop()already did: it won't have tentatively added obj to
555-
*workspace if it's already present.
547+
*There's noloop starting at this object, but mark it processed
548+
*anyway. This is not necessary for correctness, but saves later
549+
*invocations offindLoop()from uselessly chasing references to
550+
*such an object.
556551
*/
557-
if (workspace[initiallen]==obj)
558-
initiallen++;
552+
processed[obj->dumpId]= true;
559553
}
560554
}
561555

@@ -564,45 +558,51 @@ findDependencyLoops(DumpableObject **objs, int nObjs, int totObjs)
564558
exit_horribly(modulename,"could not identify dependency loop\n");
565559

566560
free(workspace);
561+
free(processed);
567562
}
568563

569564
/*
570565
* Recursively search for a circular dependency loop that doesn't include
571-
* anyexisting workspace members.
566+
* anyalready-processed objects.
572567
*
573568
*obj: object we are examining now
574569
*startPoint: dumpId of starting object for the hoped-for circular loop
575-
*workspace[]: work array for previously processed and current objects
570+
*processed[]: flag array marking already-processed objects
571+
*workspace[]: work array in which we are building list of loop members
576572
*depth: number of valid entries in workspace[] at call
577-
*newDepth: if successful, set to new number of workspace[] entries
578573
*
579-
* On success,*newDepthisset and workspace[]entries depth..*newDepth-1
580-
*are filledwith pointers to the members of the loop.
574+
* On success,the length of the loopisreturned, and workspace[]is filled
575+
* with pointers to the members of the loop. On failure, we return 0.
581576
*
582577
* Note: it is possible that the given starting object is a member of more
583578
* than one cycle; if so, we will find an arbitrary one of the cycles.
584579
*/
585-
staticbool
580+
staticint
586581
findLoop(DumpableObject*obj,
587582
DumpIdstartPoint,
583+
bool*processed,
588584
DumpableObject**workspace,
589-
intdepth,
590-
int*newDepth)
585+
intdepth)
591586
{
592587
inti;
593588

594589
/*
595-
* Reject if obj is already present in workspace. This test serves three
596-
* purposes: it prevents us from finding loops that overlap
597-
* previously-processed loops, it prevents us from going into infinite
598-
* recursion if we are given a startPoint object that links to a cycle
599-
* it's not a member of, and it guarantees that we can't overflow the
600-
* allocated size of workspace[].
590+
* Reject if obj is already processed. This test prevents us from finding
591+
* loops that overlap previously-processed loops.
592+
*/
593+
if (processed[obj->dumpId])
594+
return0;
595+
596+
/*
597+
* Reject if obj is already present in workspace. This test prevents us
598+
* from going into infinite recursion if we are given a startPoint object
599+
* that links to a cycle it's not a member of, and it guarantees that we
600+
* can't overflow the allocated size of workspace[].
601601
*/
602602
for (i=0;i<depth;i++)
603603
{
604604
if (workspace[i]==obj)
605-
returnfalse;
605+
return0;
606606
}
607607

608608
/*
@@ -616,10 +616,7 @@ findLoop(DumpableObject *obj,
616616
for (i=0;i<obj->nDeps;i++)
617617
{
618618
if (obj->dependencies[i]==startPoint)
619-
{
620-
*newDepth=depth;
621-
return true;
622-
}
619+
returndepth;
623620
}
624621

625622
/*
@@ -628,18 +625,20 @@ findLoop(DumpableObject *obj,
628625
for (i=0;i<obj->nDeps;i++)
629626
{
630627
DumpableObject*nextobj=findObjectByDumpId(obj->dependencies[i]);
628+
intnewDepth;
631629

632630
if (!nextobj)
633631
continue;/* ignore dependencies on undumped objects */
634-
if (findLoop(nextobj,
635-
startPoint,
636-
workspace,
637-
depth,
638-
newDepth))
639-
return true;
632+
newDepth=findLoop(nextobj,
633+
startPoint,
634+
processed,
635+
workspace,
636+
depth);
637+
if (newDepth>0)
638+
returnnewDepth;
640639
}
641640

642-
returnfalse;
641+
return0;
643642
}
644643

645644
/*

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp