Movatterモバイル変換


[0]ホーム

URL:


LLVM 20.0.0git
SampleProfileInference.cpp
Go to the documentation of this file.
1//===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a profile inference algorithm. Given an incomplete and
10// possibly imprecise block counts, the algorithm reconstructs realistic block
11// and edge counts that satisfy flow conservation rules, while minimally modify
12// input block counts.
13//
14//===----------------------------------------------------------------------===//
15
16#include "llvm/Transforms/Utils/SampleProfileInference.h"
17#include "llvm/ADT/BitVector.h"
18#include "llvm/Support/CommandLine.h"
19#include "llvm/Support/Debug.h"
20#include <queue>
21#include <set>
22#include <stack>
23#include <unordered_set>
24
25using namespacellvm;
26#define DEBUG_TYPE "sample-profile-inference"
27
28namespace{
29
30staticcl::opt<bool> SampleProfileEvenFlowDistribution(
31"sample-profile-even-flow-distribution",cl::init(true),cl::Hidden,
32cl::desc("Try to evenly distribute flow when there are multiple equally "
33"likely options."));
34
35staticcl::opt<bool> SampleProfileRebalanceUnknown(
36"sample-profile-rebalance-unknown",cl::init(true),cl::Hidden,
37cl::desc("Evenly re-distribute flow among unknown subgraphs."));
38
39staticcl::opt<bool> SampleProfileJoinIslands(
40"sample-profile-join-islands",cl::init(true),cl::Hidden,
41cl::desc("Join isolated components having positive flow."));
42
43staticcl::opt<unsigned> SampleProfileProfiCostBlockInc(
44"sample-profile-profi-cost-block-inc",cl::init(10),cl::Hidden,
45cl::desc("The cost of increasing a block's count by one."));
46
47staticcl::opt<unsigned> SampleProfileProfiCostBlockDec(
48"sample-profile-profi-cost-block-dec",cl::init(20),cl::Hidden,
49cl::desc("The cost of decreasing a block's count by one."));
50
51staticcl::opt<unsigned> SampleProfileProfiCostBlockEntryInc(
52"sample-profile-profi-cost-block-entry-inc",cl::init(40),cl::Hidden,
53cl::desc("The cost of increasing the entry block's count by one."));
54
55staticcl::opt<unsigned> SampleProfileProfiCostBlockEntryDec(
56"sample-profile-profi-cost-block-entry-dec",cl::init(10),cl::Hidden,
57cl::desc("The cost of decreasing the entry block's count by one."));
58
59staticcl::opt<unsigned> SampleProfileProfiCostBlockZeroInc(
60"sample-profile-profi-cost-block-zero-inc",cl::init(11),cl::Hidden,
61cl::desc("The cost of increasing a count of zero-weight block by one."));
62
63staticcl::opt<unsigned> SampleProfileProfiCostBlockUnknownInc(
64"sample-profile-profi-cost-block-unknown-inc",cl::init(0),cl::Hidden,
65cl::desc("The cost of increasing an unknown block's count by one."));
66
67/// A value indicating an infinite flow/capacity/weight of a block/edge.
68/// Not using numeric_limits<int64_t>::max(), as the values can be summed up
69/// during the execution.
70staticconstexpr int64_tINF = ((int64_t)1) << 50;
71
72/// The minimum-cost maximum flow algorithm.
73///
74/// The algorithm finds the maximum flow of minimum cost on a given (directed)
75/// network using a modified version of the classical Moore-Bellman-Ford
76/// approach. The algorithm applies a number of augmentation iterations in which
77/// flow is sent along paths of positive capacity from the source to the sink.
78/// The worst-case time complexity of the implementation is O(v(f)*m*n), where
79/// where m is the number of edges, n is the number of vertices, and v(f) is the
80/// value of the maximum flow. However, the observed running time on typical
81/// instances is sub-quadratic, that is, o(n^2).
82///
83/// The input is a set of edges with specified costs and capacities, and a pair
84/// of nodes (source and sink). The output is the flow along each edge of the
85/// minimum total cost respecting the given edge capacities.
86classMinCostMaxFlow {
87public:
88 MinCostMaxFlow(constProfiParams &Params) : Params(Params) {}
89
90// Initialize algorithm's data structures for a network of a given size.
91voidinitialize(uint64_t NodeCount,uint64_t SourceNode,uint64_t SinkNode) {
92 Source = SourceNode;
93Target = SinkNode;
94
95 Nodes = std::vector<Node>(NodeCount);
96 Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
97if (Params.EvenFlowDistribution)
98 AugmentingEdges =
99 std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());
100 }
101
102// Run the algorithm.
103 int64_t run() {
104LLVM_DEBUG(dbgs() <<"Starting profi for " << Nodes.size() <<" nodes\n");
105
106// Iteratively find an augmentation path/dag in the network and send the
107// flow along its edges
108size_t AugmentationIters = applyFlowAugmentation();
109
110// Compute the total flow and its cost
111 int64_t TotalCost = 0;
112 int64_t TotalFlow = 0;
113for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
114for (auto &Edge : Edges[Src]) {
115if (Edge.Flow > 0) {
116 TotalCost += Edge.Cost * Edge.Flow;
117if (Src == Source)
118 TotalFlow += Edge.Flow;
119 }
120 }
121 }
122LLVM_DEBUG(dbgs() <<"Completed profi after " << AugmentationIters
123 <<" iterations with " << TotalFlow <<" total flow"
124 <<" of " << TotalCost <<" cost\n");
125 (void)TotalFlow;
126 (void)AugmentationIters;
127return TotalCost;
128 }
129
130 /// Adding an edge to the network with a specified capacity and a cost.
131 /// Multiple edges between a pair of nodes are allowed but self-edges
132 /// are not supported.
133voidaddEdge(uint64_t Src,uint64_t Dst, int64_t Capacity, int64_tCost) {
134assert(Capacity > 0 &&"adding an edge of zero capacity");
135assert(Src != Dst &&"loop edge are not supported");
136
137 Edge SrcEdge;
138 SrcEdge.Dst = Dst;
139 SrcEdge.Cost =Cost;
140 SrcEdge.Capacity = Capacity;
141 SrcEdge.Flow = 0;
142 SrcEdge.RevEdgeIndex = Edges[Dst].size();
143
144 Edge DstEdge;
145 DstEdge.Dst = Src;
146 DstEdge.Cost = -Cost;
147 DstEdge.Capacity = 0;
148 DstEdge.Flow = 0;
149 DstEdge.RevEdgeIndex = Edges[Src].size();
150
151 Edges[Src].push_back(SrcEdge);
152 Edges[Dst].push_back(DstEdge);
153 }
154
155 /// Adding an edge to the network of infinite capacity and a given cost.
156voidaddEdge(uint64_t Src,uint64_t Dst, int64_tCost) {
157addEdge(Src, Dst,INF,Cost);
158 }
159
160 /// Get the total flow from a given source node.
161 /// Returns a list of pairs (target node, amount of flow to the target).
162 std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const{
163 std::vector<std::pair<uint64_t, int64_t>>Flow;
164for (constauto &Edge : Edges[Src]) {
165if (Edge.Flow > 0)
166Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
167 }
168returnFlow;
169 }
170
171 /// Get the total flow between a pair of nodes.
172 int64_t getFlow(uint64_t Src,uint64_t Dst) const{
173 int64_tFlow = 0;
174for (constauto &Edge : Edges[Src]) {
175if (Edge.Dst == Dst) {
176Flow += Edge.Flow;
177 }
178 }
179returnFlow;
180 }
181
182private:
183 /// Iteratively find an augmentation path/dag in the network and send the
184 /// flow along its edges. The method returns the number of applied iterations.
185size_t applyFlowAugmentation() {
186size_t AugmentationIters = 0;
187while (findAugmentingPath()) {
188uint64_t PathCapacity = computeAugmentingPathCapacity();
189while (PathCapacity > 0) {
190bool Progress =false;
191if (Params.EvenFlowDistribution) {
192// Identify node/edge candidates for augmentation
193 identifyShortestEdges(PathCapacity);
194
195// Find an augmenting DAG
196auto AugmentingOrder = findAugmentingDAG();
197
198// Apply the DAG augmentation
199 Progress = augmentFlowAlongDAG(AugmentingOrder);
200 PathCapacity = computeAugmentingPathCapacity();
201 }
202
203if (!Progress) {
204 augmentFlowAlongPath(PathCapacity);
205 PathCapacity = 0;
206 }
207
208 AugmentationIters++;
209 }
210 }
211return AugmentationIters;
212 }
213
214 /// Compute the capacity of the cannonical augmenting path. If the path is
215 /// saturated (that is, no flow can be sent along the path), then return 0.
216uint64_t computeAugmentingPathCapacity() {
217uint64_t PathCapacity =INF;
218uint64_t Now =Target;
219while (Now != Source) {
220uint64_t Pred = Nodes[Now].ParentNode;
221auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
222
223assert(Edge.Capacity >= Edge.Flow &&"incorrect edge flow");
224uint64_t EdgeCapacity =uint64_t(Edge.Capacity - Edge.Flow);
225 PathCapacity = std::min(PathCapacity, EdgeCapacity);
226
227 Now = Pred;
228 }
229return PathCapacity;
230 }
231
232 /// Check for existence of an augmenting path with a positive capacity.
233bool findAugmentingPath() {
234// Initialize data structures
235for (auto &Node : Nodes) {
236Node.Distance =INF;
237Node.ParentNode =uint64_t(-1);
238Node.ParentEdgeIndex =uint64_t(-1);
239Node.Taken =false;
240 }
241
242 std::queue<uint64_t> Queue;
243 Queue.push(Source);
244 Nodes[Source].Distance = 0;
245 Nodes[Source].Taken =true;
246while (!Queue.empty()) {
247uint64_t Src = Queue.front();
248 Queue.pop();
249 Nodes[Src].Taken =false;
250// Although the residual network contains edges with negative costs
251// (in particular, backward edges), it can be shown that there are no
252// negative-weight cycles and the following two invariants are maintained:
253// (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
254// where Dist is the length of the shortest path between two nodes. This
255// allows to prune the search-space of the path-finding algorithm using
256// the following early-stop criteria:
257// -- If we find a path with zero-distance from Source to Target, stop the
258// search, as the path is the shortest since Dist[Source, Target] >= 0;
259// -- If we have Dist[Source, V] > Dist[Source, Target], then do not
260// process node V, as it is guaranteed _not_ to be on a shortest path
261// from Source to Target; it follows from inequalities
262// Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
263// >= Dist[Source, V]
264if (!Params.EvenFlowDistribution && Nodes[Target].Distance == 0)
265break;
266if (Nodes[Src].Distance > Nodes[Target].Distance)
267continue;
268
269// Process adjacent edges
270for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
271auto &Edge = Edges[Src][EdgeIdx];
272if (Edge.Flow < Edge.Capacity) {
273uint64_t Dst = Edge.Dst;
274 int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
275if (Nodes[Dst].Distance > NewDistance) {
276// Update the distance and the parent node/edge
277 Nodes[Dst].Distance = NewDistance;
278 Nodes[Dst].ParentNode = Src;
279 Nodes[Dst].ParentEdgeIndex = EdgeIdx;
280// Add the node to the queue, if it is not there yet
281if (!Nodes[Dst].Taken) {
282 Queue.push(Dst);
283 Nodes[Dst].Taken =true;
284 }
285 }
286 }
287 }
288 }
289
290return Nodes[Target].Distance !=INF;
291 }
292
293 /// Update the current flow along the augmenting path.
294void augmentFlowAlongPath(uint64_t PathCapacity) {
295assert(PathCapacity > 0 &&"found an incorrect augmenting path");
296uint64_t Now =Target;
297while (Now != Source) {
298uint64_t Pred = Nodes[Now].ParentNode;
299auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
300auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
301
302 Edge.Flow += PathCapacity;
303 RevEdge.Flow -= PathCapacity;
304
305 Now = Pred;
306 }
307 }
308
309 /// Find an Augmenting DAG order using a modified version of DFS in which we
310 /// can visit a node multiple times. In the DFS search, when scanning each
311 /// edge out of a node, continue search at Edge.Dst endpoint if it has not
312 /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm
313 /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time.
314 /// It returns an Augmenting Order (Taken nodes in decreasing Finish time)
315 /// that starts with Source and ends with Target.
316 std::vector<uint64_t> findAugmentingDAG() {
317// We use a stack based implemenation of DFS to avoid recursion.
318// Defining DFS data structures:
319// A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that
320// - we are currently visiting Nodes[NodeIdx] and
321// - the next edge to scan is Edges[NodeIdx][EdgeIdx]
322typedef std::pair<uint64_t, uint64_t> StackItemType;
323 std::stack<StackItemType> Stack;
324 std::vector<uint64_t> AugmentingOrder;
325
326// Phase 0: Initialize Node attributes and Time for DFS run
327for (auto &Node : Nodes) {
328Node.Discovery = 0;
329Node.Finish = 0;
330Node.NumCalls = 0;
331Node.Taken =false;
332 }
333uint64_t Time = 0;
334// Mark Target as Taken
335// Taken attribute will be propagated backwards from Target towards Source
336 Nodes[Target].Taken =true;
337
338// Phase 1: Start DFS traversal from Source
339 Stack.emplace(Source, 0);
340 Nodes[Source].Discovery = ++Time;
341while (!Stack.empty()) {
342auto NodeIdx = Stack.top().first;
343auto EdgeIdx = Stack.top().second;
344
345// If we haven't scanned all edges out of NodeIdx, continue scanning
346if (EdgeIdx < Edges[NodeIdx].size()) {
347auto &Edge = Edges[NodeIdx][EdgeIdx];
348auto &Dst = Nodes[Edge.Dst];
349 Stack.top().second++;
350
351if (Edge.OnShortestPath) {
352// If we haven't seen Edge.Dst so far, continue DFS search there
353if (Dst.Discovery == 0 && Dst.NumCalls < MaxDfsCalls) {
354 Dst.Discovery = ++Time;
355 Stack.emplace(Edge.Dst, 0);
356 Dst.NumCalls++;
357 }elseif (Dst.Taken && Dst.Finish != 0) {
358// Else, if Edge.Dst already have a path to Target, so that NodeIdx
359 Nodes[NodeIdx].Taken =true;
360 }
361 }
362 }else {
363// If we are done scanning all edge out of NodeIdx
364 Stack.pop();
365// If we haven't found a path from NodeIdx to Target, forget about it
366if (!Nodes[NodeIdx].Taken) {
367 Nodes[NodeIdx].Discovery = 0;
368 }else {
369// If we have found a path from NodeIdx to Target, then finish NodeIdx
370// and propagate Taken flag to DFS parent unless at the Source
371 Nodes[NodeIdx].Finish = ++Time;
372// NodeIdx == Source if and only if the stack is empty
373if (NodeIdx != Source) {
374assert(!Stack.empty() &&"empty stack while running dfs");
375 Nodes[Stack.top().first].Taken =true;
376 }
377 AugmentingOrder.push_back(NodeIdx);
378 }
379 }
380 }
381// Nodes are collected decreasing Finish time, so the order is reversed
382 std::reverse(AugmentingOrder.begin(), AugmentingOrder.end());
383
384// Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges
385for (size_t Src : AugmentingOrder) {
386 AugmentingEdges[Src].clear();
387for (auto &Edge : Edges[Src]) {
388uint64_t Dst = Edge.Dst;
389if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken &&
390 Nodes[Dst].Finish < Nodes[Src].Finish) {
391 AugmentingEdges[Src].push_back(&Edge);
392 }
393 }
394assert((Src ==Target || !AugmentingEdges[Src].empty()) &&
395"incorrectly constructed augmenting edges");
396 }
397
398return AugmentingOrder;
399 }
400
401 /// Update the current flow along the given (acyclic) subgraph specified by
402 /// the vertex order, AugmentingOrder. The objective is to send as much flow
403 /// as possible while evenly distributing flow among successors of each node.
404 /// After the update at least one edge is saturated.
405bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) {
406// Phase 0: Initialization
407for (uint64_t Src : AugmentingOrder) {
408 Nodes[Src].FracFlow = 0;
409 Nodes[Src].IntFlow = 0;
410for (auto &Edge : AugmentingEdges[Src]) {
411 Edge->AugmentedFlow = 0;
412 }
413 }
414
415// Phase 1: Send a unit of fractional flow along the DAG
416uint64_t MaxFlowAmount =INF;
417 Nodes[Source].FracFlow = 1.0;
418for (uint64_t Src : AugmentingOrder) {
419assert((Src ==Target || Nodes[Src].FracFlow > 0.0) &&
420"incorrectly computed fractional flow");
421// Distribute flow evenly among successors of Src
422uint64_t Degree = AugmentingEdges[Src].size();
423for (auto &Edge : AugmentingEdges[Src]) {
424double EdgeFlow = Nodes[Src].FracFlow / Degree;
425 Nodes[Edge->Dst].FracFlow += EdgeFlow;
426if (Edge->Capacity ==INF)
427continue;
428uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;
429 MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow);
430 }
431 }
432// Stop early if we cannot send any (integral) flow from Source to Target
433if (MaxFlowAmount == 0)
434returnfalse;
435
436// Phase 2: Send an integral flow of MaxFlowAmount
437 Nodes[Source].IntFlow = MaxFlowAmount;
438for (uint64_t Src : AugmentingOrder) {
439if (Src ==Target)
440break;
441// Distribute flow evenly among successors of Src, rounding up to make
442// sure all flow is sent
443uint64_t Degree = AugmentingEdges[Src].size();
444// We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree
445uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;
446for (auto &Edge : AugmentingEdges[Src]) {
447uint64_t Dst = Edge->Dst;
448uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow);
449 EdgeFlow = std::min(EdgeFlow,uint64_t(Edge->Capacity - Edge->Flow));
450 Nodes[Dst].IntFlow += EdgeFlow;
451 Nodes[Src].IntFlow -= EdgeFlow;
452 Edge->AugmentedFlow += EdgeFlow;
453 }
454 }
455assert(Nodes[Target].IntFlow <= MaxFlowAmount);
456 Nodes[Target].IntFlow = 0;
457
458// Phase 3: Send excess flow back traversing the nodes backwards.
459// Because of rounding, not all flow can be sent along the edges of Src.
460// Hence, sending the remaining flow back to maintain flow conservation
461for (size_tIdx = AugmentingOrder.size() - 1;Idx > 0;Idx--) {
462uint64_t Src = AugmentingOrder[Idx - 1];
463// Try to send excess flow back along each edge.
464// Make sure we only send back flow we just augmented (AugmentedFlow).
465for (auto &Edge : AugmentingEdges[Src]) {
466uint64_t Dst = Edge->Dst;
467if (Nodes[Dst].IntFlow == 0)
468continue;
469uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);
470 Nodes[Dst].IntFlow -= EdgeFlow;
471 Nodes[Src].IntFlow += EdgeFlow;
472 Edge->AugmentedFlow -= EdgeFlow;
473 }
474 }
475
476// Phase 4: Update flow values along all edges
477bool HasSaturatedEdges =false;
478for (uint64_t Src : AugmentingOrder) {
479// Verify that we have sent all the excess flow from the node
480assert(Src == Source || Nodes[Src].IntFlow == 0);
481for (auto &Edge : AugmentingEdges[Src]) {
482assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);
483// Update flow values along the edge and its reverse copy
484auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];
485 Edge->Flow += Edge->AugmentedFlow;
486 RevEdge.Flow -= Edge->AugmentedFlow;
487if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)
488 HasSaturatedEdges =true;
489 }
490 }
491
492// The augmentation is successful iff at least one edge becomes saturated
493return HasSaturatedEdges;
494 }
495
496 /// Identify candidate (shortest) edges for augmentation.
497void identifyShortestEdges(uint64_t PathCapacity) {
498assert(PathCapacity > 0 &&"found an incorrect augmenting DAG");
499// To make sure the augmentation DAG contains only edges with large residual
500// capacity, we prune all edges whose capacity is below a fraction of
501// the capacity of the augmented path.
502// (All edges of the path itself are always in the DAG)
503uint64_t MinCapacity = std::max(PathCapacity / 2,uint64_t(1));
504
505// Decide which edges are on a shortest path from Source to Target
506for (size_t Src = 0; Src < Nodes.size(); Src++) {
507// An edge cannot be augmenting if the endpoint has large distance
508if (Nodes[Src].Distance > Nodes[Target].Distance)
509continue;
510
511for (auto &Edge : Edges[Src]) {
512uint64_t Dst = Edge.Dst;
513 Edge.OnShortestPath =
514 Src !=Target && Dst != Source &&
515 Nodes[Dst].Distance <= Nodes[Target].Distance &&
516 Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost &&
517 Edge.Capacity > Edge.Flow &&
518uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;
519 }
520 }
521 }
522
523 /// Maximum number of DFS iterations for DAG finding.
524staticconstexpruint64_t MaxDfsCalls = 10;
525
526 /// A node in a flow network.
527structNode {
528 /// The cost of the cheapest path from the source to the current node.
529 int64_t Distance;
530 /// The node preceding the current one in the path.
531uint64_t ParentNode;
532 /// The index of the edge between ParentNode and the current node.
533uint64_t ParentEdgeIndex;
534 /// An indicator of whether the current node is in a queue.
535bool Taken;
536
537 /// Data fields utilized in DAG-augmentation:
538 /// Fractional flow.
539double FracFlow;
540 /// Integral flow.
541uint64_t IntFlow;
542 /// Discovery time.
543uint64_t Discovery;
544 /// Finish time.
545uint64_t Finish;
546 /// NumCalls.
547uint64_t NumCalls;
548 };
549
550 /// An edge in a flow network.
551structEdge {
552 /// The cost of the edge.
553 int64_tCost;
554 /// The capacity of the edge.
555 int64_t Capacity;
556 /// The current flow on the edge.
557 int64_tFlow;
558 /// The destination node of the edge.
559uint64_t Dst;
560 /// The index of the reverse edge between Dst and the current node.
561uint64_t RevEdgeIndex;
562
563 /// Data fields utilized in DAG-augmentation:
564 /// Whether the edge is currently on a shortest path from Source to Target.
565bool OnShortestPath;
566 /// Extra flow along the edge.
567uint64_t AugmentedFlow;
568 };
569
570 /// The set of network nodes.
571 std::vector<Node> Nodes;
572 /// The set of network edges.
573 std::vector<std::vector<Edge>> Edges;
574 /// Source node of the flow.
575uint64_t Source;
576 /// Target (sink) node of the flow.
577uint64_tTarget;
578 /// Augmenting edges.
579 std::vector<std::vector<Edge *>> AugmentingEdges;
580 /// Params for flow computation.
581constProfiParams &Params;
582};
583
584/// A post-processing adjustment of the control flow. It applies two steps by
585/// rerouting some flow and making it more realistic:
586///
587/// - First, it removes all isolated components ("islands") with a positive flow
588/// that are unreachable from the entry block. For every such component, we
589/// find the shortest from the entry to an exit passing through the component,
590/// and increase the flow by one unit along the path.
591///
592/// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
593/// with no sampled counts. Then it rebalnces the flow that goes through such
594/// a subgraph so that each branch is taken with probability 50%.
595/// An unknown subgraph is such that for every two nodes u and v:
596/// - u dominates v and u is not unknown;
597/// - v post-dominates u; and
598/// - all inner-nodes of all (u,v)-paths are unknown.
599///
600classFlowAdjuster {
601public:
602 FlowAdjuster(constProfiParams &Params,FlowFunction &Func)
603 : Params(Params), Func(Func) {}
604
605 /// Apply the post-processing.
606void run() {
607if (Params.JoinIslands) {
608// Adjust the flow to get rid of isolated components
609 joinIsolatedComponents();
610 }
611
612if (Params.RebalanceUnknown) {
613// Rebalance the flow inside unknown subgraphs
614 rebalanceUnknownSubgraphs();
615 }
616 }
617
618private:
619void joinIsolatedComponents() {
620// Find blocks that are reachable from the source
621auto Visited =BitVector(NumBlocks(),false);
622 findReachable(Func.Entry, Visited);
623
624// Iterate over all non-reachable blocks and adjust their weights
625for (uint64_tI = 0;I < NumBlocks();I++) {
626auto &Block = Func.Blocks[I];
627if (Block.Flow > 0 && !Visited[I]) {
628// Find a path from the entry to an exit passing through the block I
629auto Path = findShortestPath(I);
630// Increase the flow along the path
631assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
632"incorrectly computed path adjusting control flow");
633 Func.Blocks[Func.Entry].Flow += 1;
634for (auto &Jump : Path) {
635 Jump->Flow += 1;
636 Func.Blocks[Jump->Target].Flow += 1;
637// Update reachability
638 findReachable(Jump->Target, Visited);
639 }
640 }
641 }
642 }
643
644 /// Run BFS from a given block along the jumps with a positive flow and mark
645 /// all reachable blocks.
646void findReachable(uint64_t Src,BitVector &Visited) {
647if (Visited[Src])
648return;
649 std::queue<uint64_t> Queue;
650 Queue.push(Src);
651 Visited[Src] =true;
652while (!Queue.empty()) {
653 Src = Queue.front();
654 Queue.pop();
655for (auto *Jump : Func.Blocks[Src].SuccJumps) {
656uint64_t Dst = Jump->Target;
657if (Jump->Flow > 0 && !Visited[Dst]) {
658 Queue.push(Dst);
659 Visited[Dst] =true;
660 }
661 }
662 }
663 }
664
665 /// Find the shortest path from the entry block to an exit block passing
666 /// through a given block.
667 std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
668// A path from the entry block to BlockIdx
669auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
670// A path from BlockIdx to an exit block
671auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
672
673// Concatenate the two paths
674 std::vector<FlowJump *> Result;
675 Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
676 Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
677return Result;
678 }
679
680 /// Apply the Dijkstra algorithm to find the shortest path from a given
681 /// Source to a given Target block.
682 /// If Target == -1, then the path ends at an exit block.
683 std::vector<FlowJump *> findShortestPath(uint64_t Source,uint64_tTarget) {
684// Quit early, if possible
685if (Source ==Target)
686return std::vector<FlowJump *>();
687if (Func.Blocks[Source].isExit() &&Target == AnyExitBlock)
688return std::vector<FlowJump *>();
689
690// Initialize data structures
691auto Distance = std::vector<int64_t>(NumBlocks(),INF);
692auto Parent = std::vector<FlowJump *>(NumBlocks(),nullptr);
693 Distance[Source] = 0;
694 std::set<std::pair<uint64_t, uint64_t>> Queue;
695 Queue.insert(std::make_pair(Distance[Source], Source));
696
697// Run the Dijkstra algorithm
698while (!Queue.empty()) {
699uint64_t Src = Queue.begin()->second;
700 Queue.erase(Queue.begin());
701// If we found a solution, quit early
702if (Src ==Target ||
703 (Func.Blocks[Src].isExit() &&Target == AnyExitBlock))
704break;
705
706for (auto *Jump : Func.Blocks[Src].SuccJumps) {
707uint64_t Dst = Jump->Target;
708 int64_t JumpDist = jumpDistance(Jump);
709if (Distance[Dst] > Distance[Src] + JumpDist) {
710 Queue.erase(std::make_pair(Distance[Dst], Dst));
711
712 Distance[Dst] = Distance[Src] + JumpDist;
713 Parent[Dst] = Jump;
714
715 Queue.insert(std::make_pair(Distance[Dst], Dst));
716 }
717 }
718 }
719// If Target is not provided, find the closest exit block
720if (Target == AnyExitBlock) {
721for (uint64_tI = 0;I < NumBlocks();I++) {
722if (Func.Blocks[I].isExit() && Parent[I] !=nullptr) {
723if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
724Target =I;
725 }
726 }
727 }
728 }
729assert(Parent[Target] !=nullptr &&"a path does not exist");
730
731// Extract the constructed path
732 std::vector<FlowJump *> Result;
733uint64_t Now =Target;
734while (Now != Source) {
735assert(Now == Parent[Now]->Target &&"incorrect parent jump");
736 Result.push_back(Parent[Now]);
737 Now = Parent[Now]->Source;
738 }
739// Reverse the path, since it is extracted from Target to Source
740 std::reverse(Result.begin(), Result.end());
741return Result;
742 }
743
744 /// A distance of a path for a given jump.
745 /// In order to incite the path to use blocks/jumps with large positive flow,
746 /// and avoid changing branch probability of outgoing edges drastically,
747 /// set the jump distance so as:
748 /// - to minimize the number of unlikely jumps used and subject to that,
749 /// - to minimize the number of Flow == 0 jumps used and subject to that,
750 /// - minimizes total multiplicative Flow increase for the remaining edges.
751 /// To capture this objective with integer distances, we round off fractional
752 /// parts to a multiple of 1 / BaseDistance.
753 int64_t jumpDistance(FlowJump *Jump) const{
754if (Jump->IsUnlikely)
755return Params.CostUnlikely;
756uint64_t BaseDistance =
757 std::max(FlowAdjuster::MinBaseDistance,
758 std::min(Func.Blocks[Func.Entry].Flow,
759 Params.CostUnlikely / (2 * (NumBlocks() + 1))));
760if (Jump->Flow > 0)
761return BaseDistance + BaseDistance / Jump->Flow;
762return 2 * BaseDistance * (NumBlocks() + 1);
763 };
764
765uint64_t NumBlocks() const{return Func.Blocks.size(); }
766
767 /// Rebalance unknown subgraphs so that the flow is split evenly across the
768 /// outgoing branches of every block of the subgraph. The method iterates over
769 /// blocks with known weight and identifies unknown subgraphs rooted at the
770 /// blocks. Then it verifies if flow rebalancing is feasible and applies it.
771void rebalanceUnknownSubgraphs() {
772// Try to find unknown subgraphs from each block
773for (constFlowBlock &SrcBlock : Func.Blocks) {
774// Verify if rebalancing rooted at SrcBlock is feasible
775if (!canRebalanceAtRoot(&SrcBlock))
776continue;
777
778// Find an unknown subgraphs starting at SrcBlock. Along the way,
779// fill in known destinations and intermediate unknown blocks.
780 std::vector<FlowBlock *> UnknownBlocks;
781 std::vector<FlowBlock *> KnownDstBlocks;
782 findUnknownSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks);
783
784// Verify if rebalancing of the subgraph is feasible. If the search is
785// successful, find the unique destination block (which can be null)
786FlowBlock *DstBlock =nullptr;
787if (!canRebalanceSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks,
788 DstBlock))
789continue;
790
791// We cannot rebalance subgraphs containing cycles among unknown blocks
792if (!isAcyclicSubgraph(&SrcBlock, DstBlock, UnknownBlocks))
793continue;
794
795// Rebalance the flow
796 rebalanceUnknownSubgraph(&SrcBlock, DstBlock, UnknownBlocks);
797 }
798 }
799
800 /// Verify if rebalancing rooted at a given block is possible.
801bool canRebalanceAtRoot(constFlowBlock *SrcBlock) {
802// Do not attempt to find unknown subgraphs from an unknown or a
803// zero-flow block
804if (SrcBlock->HasUnknownWeight || SrcBlock->Flow == 0)
805returnfalse;
806
807// Do not attempt to process subgraphs from a block w/o unknown sucessors
808bool HasUnknownSuccs =false;
809for (auto *Jump : SrcBlock->SuccJumps) {
810if (Func.Blocks[Jump->Target].HasUnknownWeight) {
811 HasUnknownSuccs =true;
812break;
813 }
814 }
815if (!HasUnknownSuccs)
816returnfalse;
817
818returntrue;
819 }
820
821 /// Find an unknown subgraph starting at block SrcBlock. The method sets
822 /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.
823void findUnknownSubgraph(constFlowBlock *SrcBlock,
824 std::vector<FlowBlock *> &KnownDstBlocks,
825 std::vector<FlowBlock *> &UnknownBlocks) {
826// Run BFS from SrcBlock and make sure all paths are going through unknown
827// blocks and end at a known DstBlock
828auto Visited =BitVector(NumBlocks(),false);
829 std::queue<uint64_t> Queue;
830
831 Queue.push(SrcBlock->Index);
832 Visited[SrcBlock->Index] =true;
833while (!Queue.empty()) {
834auto &Block = Func.Blocks[Queue.front()];
835 Queue.pop();
836// Process blocks reachable from Block
837for (auto *Jump :Block.SuccJumps) {
838// If Jump can be ignored, skip it
839if (ignoreJump(SrcBlock,nullptr, Jump))
840continue;
841
842uint64_t Dst = Jump->Target;
843// If Dst has been visited, skip Jump
844if (Visited[Dst])
845continue;
846// Process block Dst
847 Visited[Dst] =true;
848if (!Func.Blocks[Dst].HasUnknownWeight) {
849 KnownDstBlocks.push_back(&Func.Blocks[Dst]);
850 }else {
851 Queue.push(Dst);
852 UnknownBlocks.push_back(&Func.Blocks[Dst]);
853 }
854 }
855 }
856 }
857
858 /// Verify if rebalancing of the subgraph is feasible. If the checks are
859 /// successful, set the unique destination block, DstBlock (can be null).
860bool canRebalanceSubgraph(constFlowBlock *SrcBlock,
861const std::vector<FlowBlock *> &KnownDstBlocks,
862const std::vector<FlowBlock *> &UnknownBlocks,
863FlowBlock *&DstBlock) {
864// If the list of unknown blocks is empty, we don't need rebalancing
865if (UnknownBlocks.empty())
866returnfalse;
867
868// If there are multiple known sinks, we can't rebalance
869if (KnownDstBlocks.size() > 1)
870returnfalse;
871 DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
872
873// Verify sinks of the subgraph
874for (auto *Block : UnknownBlocks) {
875if (Block->SuccJumps.empty()) {
876// If there are multiple (known and unknown) sinks, we can't rebalance
877if (DstBlock !=nullptr)
878returnfalse;
879continue;
880 }
881size_t NumIgnoredJumps = 0;
882for (auto *Jump :Block->SuccJumps) {
883if (ignoreJump(SrcBlock, DstBlock, Jump))
884 NumIgnoredJumps++;
885 }
886// If there is a non-sink block in UnknownBlocks with all jumps ignored,
887// then we can't rebalance
888if (NumIgnoredJumps ==Block->SuccJumps.size())
889returnfalse;
890 }
891
892returntrue;
893 }
894
895 /// Decide whether the Jump is ignored while processing an unknown subgraphs
896 /// rooted at basic block SrcBlock with the destination block, DstBlock.
897bool ignoreJump(constFlowBlock *SrcBlock,constFlowBlock *DstBlock,
898constFlowJump *Jump) {
899// Ignore unlikely jumps with zero flow
900if (Jump->IsUnlikely && Jump->Flow == 0)
901returntrue;
902
903auto JumpSource = &Func.Blocks[Jump->Source];
904auto JumpTarget = &Func.Blocks[Jump->Target];
905
906// Do not ignore jumps coming into DstBlock
907if (DstBlock !=nullptr && JumpTarget == DstBlock)
908returnfalse;
909
910// Ignore jumps out of SrcBlock to known blocks
911if (!JumpTarget->HasUnknownWeight && JumpSource == SrcBlock)
912returntrue;
913
914// Ignore jumps to known blocks with zero flow
915if (!JumpTarget->HasUnknownWeight && JumpTarget->Flow == 0)
916returntrue;
917
918returnfalse;
919 }
920
921 /// Verify if the given unknown subgraph is acyclic, and if yes, reorder
922 /// UnknownBlocks in the topological order (so that all jumps are "forward").
923bool isAcyclicSubgraph(constFlowBlock *SrcBlock,constFlowBlock *DstBlock,
924 std::vector<FlowBlock *> &UnknownBlocks) {
925// Extract local in-degrees in the considered subgraph
926auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
927auto fillInDegree = [&](constFlowBlock *Block) {
928for (auto *Jump :Block->SuccJumps) {
929if (ignoreJump(SrcBlock, DstBlock, Jump))
930continue;
931 LocalInDegree[Jump->Target]++;
932 }
933 };
934 fillInDegree(SrcBlock);
935for (auto *Block : UnknownBlocks) {
936 fillInDegree(Block);
937 }
938// A loop containing SrcBlock
939if (LocalInDegree[SrcBlock->Index] > 0)
940returnfalse;
941
942 std::vector<FlowBlock *> AcyclicOrder;
943 std::queue<uint64_t> Queue;
944 Queue.push(SrcBlock->Index);
945while (!Queue.empty()) {
946FlowBlock *Block = &Func.Blocks[Queue.front()];
947 Queue.pop();
948// Stop propagation once we reach DstBlock, if any
949if (DstBlock !=nullptr &&Block == DstBlock)
950break;
951
952// Keep an acyclic order of unknown blocks
953if (Block->HasUnknownWeight &&Block != SrcBlock)
954 AcyclicOrder.push_back(Block);
955
956// Add to the queue all successors with zero local in-degree
957for (auto *Jump :Block->SuccJumps) {
958if (ignoreJump(SrcBlock, DstBlock, Jump))
959continue;
960uint64_t Dst = Jump->Target;
961 LocalInDegree[Dst]--;
962if (LocalInDegree[Dst] == 0) {
963 Queue.push(Dst);
964 }
965 }
966 }
967
968// If there is a cycle in the subgraph, AcyclicOrder contains only a subset
969// of all blocks
970if (UnknownBlocks.size() != AcyclicOrder.size())
971returnfalse;
972 UnknownBlocks = AcyclicOrder;
973returntrue;
974 }
975
976 /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and
977 /// having UnknownBlocks intermediate blocks.
978void rebalanceUnknownSubgraph(constFlowBlock *SrcBlock,
979constFlowBlock *DstBlock,
980const std::vector<FlowBlock *> &UnknownBlocks) {
981assert(SrcBlock->Flow > 0 &&"zero-flow block in unknown subgraph");
982
983// Ditribute flow from the source block
984uint64_t BlockFlow = 0;
985// SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
986for (auto *Jump : SrcBlock->SuccJumps) {
987if (ignoreJump(SrcBlock, DstBlock, Jump))
988continue;
989 BlockFlow += Jump->Flow;
990 }
991 rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
992
993// Ditribute flow from the remaining blocks
994for (auto *Block : UnknownBlocks) {
995assert(Block->HasUnknownWeight &&"incorrect unknown subgraph");
996uint64_t BlockFlow = 0;
997// Block's flow is the sum of incoming flows
998for (auto *Jump :Block->PredJumps) {
999 BlockFlow += Jump->Flow;
1000 }
1001Block->Flow = BlockFlow;
1002 rebalanceBlock(SrcBlock, DstBlock,Block, BlockFlow);
1003 }
1004 }
1005
1006 /// Redistribute flow for a block in a subgraph rooted at SrcBlock,
1007 /// and ending at DstBlock.
1008void rebalanceBlock(constFlowBlock *SrcBlock,constFlowBlock *DstBlock,
1009constFlowBlock *Block,uint64_t BlockFlow) {
1010// Process all successor jumps and update corresponding flow values
1011size_t BlockDegree = 0;
1012for (auto *Jump :Block->SuccJumps) {
1013if (ignoreJump(SrcBlock, DstBlock, Jump))
1014continue;
1015 BlockDegree++;
1016 }
1017// If all successor jumps of the block are ignored, skip it
1018if (DstBlock ==nullptr && BlockDegree == 0)
1019return;
1020assert(BlockDegree > 0 &&"all outgoing jumps are ignored");
1021
1022// Each of the Block's successors gets the following amount of flow.
1023// Rounding the value up so that all flow is propagated
1024uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
1025for (auto *Jump :Block->SuccJumps) {
1026if (ignoreJump(SrcBlock, DstBlock, Jump))
1027continue;
1028uint64_tFlow = std::min(SuccFlow, BlockFlow);
1029 Jump->Flow =Flow;
1030 BlockFlow -=Flow;
1031 }
1032assert(BlockFlow == 0 &&"not all flow is propagated");
1033 }
1034
1035 /// A constant indicating an arbitrary exit block of a function.
1036staticconstexpruint64_t AnyExitBlock =uint64_t(-1);
1037 /// Minimum BaseDistance for the jump distance values in island joining.
1038staticconstexpruint64_t MinBaseDistance = 10000;
1039
1040 /// Params for flow computation.
1041constProfiParams &Params;
1042 /// The function.
1043FlowFunction &Func;
1044};
1045
1046std::pair<int64_t, int64_t> assignBlockCosts(constProfiParams &Params,
1047constFlowBlock &Block);
1048std::pair<int64_t, int64_t> assignJumpCosts(constProfiParams &Params,
1049constFlowJump &Jump);
1050
1051/// Initializing flow network for a given function.
1052///
1053/// Every block is split into two nodes that are responsible for (i) an
1054/// incoming flow, (ii) an outgoing flow; they penalize an increase or a
1055/// reduction of the block weight.
1056void initializeNetwork(constProfiParams &Params, MinCostMaxFlow &Network,
1057FlowFunction &Func) {
1058uint64_t NumBlocks = Func.Blocks.size();
1059assert(NumBlocks > 1 &&"Too few blocks in a function");
1060uint64_t NumJumps = Func.Jumps.size();
1061assert(NumJumps > 0 &&"Too few jumps in a function");
1062
1063// Introducing dummy source/sink pairs to allow flow circulation.
1064// The nodes corresponding to blocks of the function have indices in
1065// the range [0 .. 2 * NumBlocks); the dummy sources/sinks are indexed by the
1066// next four values.
1067uint64_t S = 2 * NumBlocks;
1068uint64_tT = S + 1;
1069uint64_tS1 = S + 2;
1070uint64_tT1 = S + 3;
1071
1072 Network.initialize(2 * NumBlocks + 4,S1,T1);
1073
1074// Initialize nodes of the flow network
1075for (uint64_tB = 0;B < NumBlocks;B++) {
1076auto &Block = Func.Blocks[B];
1077
1078// Split every block into two auxiliary nodes to allow
1079// increase/reduction of the block count.
1080uint64_tBin = 2 *B;
1081uint64_t Bout = 2 *B + 1;
1082
1083// Edges from S and to T
1084if (Block.isEntry()) {
1085 Network.addEdge(S,Bin, 0);
1086 }elseif (Block.isExit()) {
1087 Network.addEdge(Bout,T, 0);
1088 }
1089
1090// Assign costs for increasing/decreasing the block counts
1091auto [AuxCostInc, AuxCostDec] = assignBlockCosts(Params,Block);
1092
1093// Add the corresponding edges to the network
1094 Network.addEdge(Bin, Bout, AuxCostInc);
1095if (Block.Weight > 0) {
1096 Network.addEdge(Bout,Bin,Block.Weight, AuxCostDec);
1097 Network.addEdge(S1, Bout,Block.Weight, 0);
1098 Network.addEdge(Bin,T1,Block.Weight, 0);
1099 }
1100 }
1101
1102// Initialize edges of the flow network
1103for (uint64_t J = 0; J < NumJumps; J++) {
1104auto &Jump = Func.Jumps[J];
1105
1106// Get the endpoints corresponding to the jump
1107uint64_t Jin = 2 * Jump.Source + 1;
1108uint64_t Jout = 2 * Jump.Target;
1109
1110// Assign costs for increasing/decreasing the jump counts
1111auto [AuxCostInc, AuxCostDec] = assignJumpCosts(Params, Jump);
1112
1113// Add the corresponding edges to the network
1114 Network.addEdge(Jin, Jout, AuxCostInc);
1115if (Jump.Weight > 0) {
1116 Network.addEdge(Jout, Jin, Jump.Weight, AuxCostDec);
1117 Network.addEdge(S1, Jout, Jump.Weight, 0);
1118 Network.addEdge(Jin,T1, Jump.Weight, 0);
1119 }
1120 }
1121
1122// Make sure we have a valid flow circulation
1123 Network.addEdge(T, S, 0);
1124}
1125
1126/// Assign costs for increasing/decreasing the block counts.
1127std::pair<int64_t, int64_t> assignBlockCosts(constProfiParams &Params,
1128constFlowBlock &Block) {
1129// Modifying the weight of an unlikely block is expensive
1130if (Block.IsUnlikely)
1131return std::make_pair(Params.CostUnlikely, Params.CostUnlikely);
1132
1133// Assign default values for the costs
1134 int64_t CostInc = Params.CostBlockInc;
1135 int64_t CostDec = Params.CostBlockDec;
1136// Update the costs depending on the block metadata
1137if (Block.HasUnknownWeight) {
1138 CostInc = Params.CostBlockUnknownInc;
1139 CostDec = 0;
1140 }else {
1141// Increasing the count for "cold" blocks with zero initial count is more
1142// expensive than for "hot" ones
1143if (Block.Weight == 0)
1144 CostInc = Params.CostBlockZeroInc;
1145// Modifying the count of the entry block is expensive
1146if (Block.isEntry()) {
1147 CostInc = Params.CostBlockEntryInc;
1148 CostDec = Params.CostBlockEntryDec;
1149 }
1150 }
1151return std::make_pair(CostInc, CostDec);
1152}
1153
1154/// Assign costs for increasing/decreasing the jump counts.
1155std::pair<int64_t, int64_t> assignJumpCosts(constProfiParams &Params,
1156constFlowJump &Jump) {
1157// Modifying the weight of an unlikely jump is expensive
1158if (Jump.IsUnlikely)
1159return std::make_pair(Params.CostUnlikely, Params.CostUnlikely);
1160
1161// Assign default values for the costs
1162 int64_t CostInc = Params.CostJumpInc;
1163 int64_t CostDec = Params.CostJumpDec;
1164// Update the costs depending on the block metadata
1165if (Jump.Source + 1 == Jump.Target) {
1166// Adjusting the fall-through branch
1167 CostInc = Params.CostJumpFTInc;
1168 CostDec = Params.CostJumpFTDec;
1169 }
1170if (Jump.HasUnknownWeight) {
1171// The cost is different for fall-through and non-fall-through branches
1172if (Jump.Source + 1 == Jump.Target)
1173 CostInc = Params.CostJumpUnknownFTInc;
1174else
1175 CostInc = Params.CostJumpUnknownInc;
1176 CostDec = 0;
1177 }else {
1178assert(Jump.Weight > 0 &&"found zero-weight jump with a positive weight");
1179 }
1180return std::make_pair(CostInc, CostDec);
1181}
1182
1183/// Extract resulting block and edge counts from the flow network.
1184void extractWeights(constProfiParams &Params, MinCostMaxFlow &Network,
1185FlowFunction &Func) {
1186uint64_t NumBlocks = Func.Blocks.size();
1187uint64_t NumJumps = Func.Jumps.size();
1188
1189// Extract resulting jump counts
1190for (uint64_t J = 0; J < NumJumps; J++) {
1191auto &Jump = Func.Jumps[J];
1192uint64_t SrcOut = 2 * Jump.Source + 1;
1193uint64_t DstIn = 2 * Jump.Target;
1194
1195 int64_tFlow = 0;
1196 int64_t AuxFlow = Network.getFlow(SrcOut, DstIn);
1197if (Jump.Source != Jump.Target)
1198Flow = int64_t(Jump.Weight) + AuxFlow;
1199else
1200Flow = int64_t(Jump.Weight) + (AuxFlow > 0 ? AuxFlow : 0);
1201
1202 Jump.Flow =Flow;
1203assert(Flow >= 0 &&"negative jump flow");
1204 }
1205
1206// Extract resulting block counts
1207auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
1208auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
1209for (auto &Jump : Func.Jumps) {
1210 InFlow[Jump.Target] += Jump.Flow;
1211 OutFlow[Jump.Source] += Jump.Flow;
1212 }
1213for (uint64_tB = 0;B < NumBlocks;B++) {
1214auto &Block = Func.Blocks[B];
1215Block.Flow = std::max(OutFlow[B], InFlow[B]);
1216 }
1217}
1218
1219#ifndef NDEBUG
1220/// Verify that the provided block/jump weights are as expected.
1221void verifyInput(constFlowFunction &Func) {
1222// Verify entry and exit blocks
1223assert(Func.Entry == 0 && Func.Blocks[0].isEntry());
1224size_t NumExitBlocks = 0;
1225for (size_tI = 1;I < Func.Blocks.size();I++) {
1226assert(!Func.Blocks[I].isEntry() &&"multiple entry blocks");
1227if (Func.Blocks[I].isExit())
1228 NumExitBlocks++;
1229 }
1230assert(NumExitBlocks > 0 &&"cannot find exit blocks");
1231
1232// Verify that there are no parallel edges
1233for (auto &Block : Func.Blocks) {
1234 std::unordered_set<uint64_t> UniqueSuccs;
1235for (auto &Jump :Block.SuccJumps) {
1236auto It = UniqueSuccs.insert(Jump->Target);
1237assert(It.second &&"input CFG contains parallel edges");
1238 }
1239 }
1240// Verify CFG jumps
1241for (auto &Block : Func.Blocks) {
1242assert((!Block.isEntry() || !Block.isExit()) &&
1243"a block cannot be an entry and an exit");
1244 }
1245// Verify input block weights
1246for (auto &Block : Func.Blocks) {
1247assert((!Block.HasUnknownWeight ||Block.Weight == 0 ||Block.isEntry()) &&
1248"non-zero weight of a block w/o weight except for an entry");
1249 }
1250// Verify input jump weights
1251for (auto &Jump : Func.Jumps) {
1252assert((!Jump.HasUnknownWeight || Jump.Weight == 0) &&
1253"non-zero weight of a jump w/o weight");
1254 }
1255}
1256
1257/// Verify that the computed flow values satisfy flow conservation rules.
1258void verifyOutput(constFlowFunction &Func) {
1259constuint64_t NumBlocks = Func.Blocks.size();
1260auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
1261auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
1262for (constauto &Jump : Func.Jumps) {
1263 InFlow[Jump.Target] += Jump.Flow;
1264 OutFlow[Jump.Source] += Jump.Flow;
1265 }
1266
1267uint64_t TotalInFlow = 0;
1268uint64_t TotalOutFlow = 0;
1269for (uint64_tI = 0;I < NumBlocks;I++) {
1270auto &Block = Func.Blocks[I];
1271if (Block.isEntry()) {
1272 TotalInFlow +=Block.Flow;
1273assert(Block.Flow == OutFlow[I] &&"incorrectly computed control flow");
1274 }elseif (Block.isExit()) {
1275 TotalOutFlow +=Block.Flow;
1276assert(Block.Flow == InFlow[I] &&"incorrectly computed control flow");
1277 }else {
1278assert(Block.Flow == OutFlow[I] &&"incorrectly computed control flow");
1279assert(Block.Flow == InFlow[I] &&"incorrectly computed control flow");
1280 }
1281 }
1282assert(TotalInFlow == TotalOutFlow &&"incorrectly computed control flow");
1283
1284// Verify that there are no isolated flow components
1285// One could modify FlowFunction to hold edges indexed by the sources, which
1286// will avoid a creation of the object
1287auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
1288for (constauto &Jump : Func.Jumps) {
1289if (Jump.Flow > 0) {
1290 PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
1291 }
1292 }
1293
1294// Run BFS from the source along edges with positive flow
1295 std::queue<uint64_t> Queue;
1296auto Visited =BitVector(NumBlocks,false);
1297 Queue.push(Func.Entry);
1298 Visited[Func.Entry] =true;
1299while (!Queue.empty()) {
1300uint64_t Src = Queue.front();
1301 Queue.pop();
1302for (uint64_t Dst : PositiveFlowEdges[Src]) {
1303if (!Visited[Dst]) {
1304 Queue.push(Dst);
1305 Visited[Dst] =true;
1306 }
1307 }
1308 }
1309
1310// Verify that every block that has a positive flow is reached from the source
1311// along edges with a positive flow
1312for (uint64_tI = 0;I < NumBlocks;I++) {
1313auto &Block = Func.Blocks[I];
1314assert((Visited[I] ||Block.Flow == 0) &&"an isolated flow component");
1315 }
1316}
1317#endif
1318
1319}// end of anonymous namespace
1320
1321/// Apply the profile inference algorithm for a given function and provided
1322/// profi options
1323voidllvm::applyFlowInference(constProfiParams &Params,FlowFunction &Func) {
1324// Check if the function has samples and assign initial flow values
1325bool HasSamples =false;
1326for (FlowBlock &Block : Func.Blocks) {
1327if (Block.Weight > 0)
1328 HasSamples =true;
1329Block.Flow =Block.Weight;
1330 }
1331for (FlowJump &Jump : Func.Jumps) {
1332if (Jump.Weight > 0)
1333 HasSamples =true;
1334 Jump.Flow = Jump.Weight;
1335 }
1336
1337// Quit early for functions with a single block or ones w/o samples
1338if (Func.Blocks.size() <= 1 || !HasSamples)
1339return;
1340
1341#ifndef NDEBUG
1342// Verify the input data
1343 verifyInput(Func);
1344#endif
1345
1346// Create and apply an inference network model
1347auto InferenceNetwork = MinCostMaxFlow(Params);
1348 initializeNetwork(Params, InferenceNetwork, Func);
1349 InferenceNetwork.run();
1350
1351// Extract flow values for every block and every edge
1352 extractWeights(Params, InferenceNetwork, Func);
1353
1354// Post-processing adjustments to the flow
1355auto Adjuster = FlowAdjuster(Params, Func);
1356 Adjuster.run();
1357
1358#ifndef NDEBUG
1359// Verify the result
1360 verifyOutput(Func);
1361#endif
1362}
1363
1364/// Apply the profile inference algorithm for a given flow function
1365voidllvm::applyFlowInference(FlowFunction &Func) {
1366ProfiParams Params;
1367// Set the params from the command-line flags.
1368 Params.EvenFlowDistribution = SampleProfileEvenFlowDistribution;
1369 Params.RebalanceUnknown = SampleProfileRebalanceUnknown;
1370 Params.JoinIslands = SampleProfileJoinIslands;
1371 Params.CostBlockInc = SampleProfileProfiCostBlockInc;
1372 Params.CostBlockDec = SampleProfileProfiCostBlockDec;
1373 Params.CostBlockEntryInc = SampleProfileProfiCostBlockEntryInc;
1374 Params.CostBlockEntryDec = SampleProfileProfiCostBlockEntryDec;
1375 Params.CostBlockZeroInc = SampleProfileProfiCostBlockZeroInc;
1376 Params.CostBlockUnknownInc = SampleProfileProfiCostBlockUnknownInc;
1377
1378applyFlowInference(Params, Func);
1379}
S1
static const LLT S1
Definition:AMDGPULegalizerInfo.cpp:282
BitVector.h
This file implements the BitVector class.
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
CommandLine.h
Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition:DeadArgumentElimination.cpp:353
Debug.h
LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition:Debug.h:106
addEdge
static void addEdge(SmallVectorImpl< LazyCallGraph::Edge > &Edges, DenseMap< LazyCallGraph::Node *, int > &EdgeIndexMap, LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK)
Definition:LazyCallGraph.cpp:62
I
#define I(x, y, z)
Definition:MD5.cpp:58
T1
#define T1
Definition:Mips16ISelLowering.cpp:340
Flow
Annotate SI Control Flow
Definition:SIAnnotateControlFlow.cpp:446
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SampleProfileInference.h
This file provides the interface for the profile inference algorithm, profi.
initialize
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Definition:TargetLibraryInfo.cpp:917
Node
Definition:ItaniumDemangle.h:163
T
llvm::BitVector
Definition:BitVector.h:82
llvm::BitVector::push_back
void push_back(bool Val)
Definition:BitVector.h:466
llvm::InstructionCost
Definition:InstructionCost.h:29
llvm::Target
Target - Wrapper for Target specific information.
Definition:TargetRegistry.h:144
llvm::cl::opt
Definition:CommandLine.h:1423
uint64_t
llvm::cl::Hidden
@ Hidden
Definition:CommandLine.h:137
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition:CommandLine.h:443
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition:AddressRanges.h:18
llvm::PseudoProbeType::Block
@ Block
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition:STLExtras.h:1697
llvm::SubDirectoryType::Bin
@ Bin
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition:Debug.cpp:163
llvm::applyFlowInference
void applyFlowInference(const ProfiParams &Params, FlowFunction &Func)
Apply the profile inference algorithm for a given function and provided profi options.
Definition:SampleProfileInference.cpp:1323
INF
#define INF
llvm::FlowBlock
A wrapper of a binary basic block.
Definition:SampleProfileInference.h:26
llvm::FlowBlock::Index
uint64_t Index
Definition:SampleProfileInference.h:27
llvm::FlowBlock::Flow
uint64_t Flow
Definition:SampleProfileInference.h:31
llvm::FlowBlock::HasUnknownWeight
bool HasUnknownWeight
Definition:SampleProfileInference.h:29
llvm::FlowBlock::SuccJumps
std::vector< FlowJump * > SuccJumps
Definition:SampleProfileInference.h:32
llvm::FlowFunction
A wrapper of binary function with basic blocks and jumps.
Definition:SampleProfileInference.h:53
llvm::FlowJump
A wrapper of a jump between two basic blocks.
Definition:SampleProfileInference.h:43
llvm::FlowJump::IsUnlikely
bool IsUnlikely
Definition:SampleProfileInference.h:48
llvm::FlowJump::HasUnknownWeight
bool HasUnknownWeight
Definition:SampleProfileInference.h:47
llvm::FlowJump::Target
uint64_t Target
Definition:SampleProfileInference.h:45
llvm::FlowJump::Flow
uint64_t Flow
Definition:SampleProfileInference.h:49
llvm::FlowJump::Weight
uint64_t Weight
Definition:SampleProfileInference.h:46
llvm::FlowJump::Source
uint64_t Source
Definition:SampleProfileInference.h:44
llvm::ProfiParams
Various thresholds and options controlling the behavior of the profile inference algorithm.
Definition:SampleProfileInference.h:65
llvm::ProfiParams::CostJumpUnknownFTInc
unsigned CostJumpUnknownFTInc
The cost of increasing an unknown fall-through jump's count by one.
Definition:SampleProfileInference.h:109
llvm::ProfiParams::CostBlockInc
unsigned CostBlockInc
The cost of increasing a block's count by one.
Definition:SampleProfileInference.h:76
llvm::ProfiParams::CostJumpFTInc
unsigned CostJumpFTInc
The cost of increasing a fall-through jump's count by one.
Definition:SampleProfileInference.h:97
llvm::ProfiParams::RebalanceUnknown
bool RebalanceUnknown
Evenly re-distribute flow among unknown subgraphs.
Definition:SampleProfileInference.h:70
llvm::ProfiParams::CostUnlikely
const int64_t CostUnlikely
The cost of taking an unlikely block/jump.
Definition:SampleProfileInference.h:112
llvm::ProfiParams::CostJumpDec
unsigned CostJumpDec
The cost of decreasing a jump's count by one.
Definition:SampleProfileInference.h:100
llvm::ProfiParams::JoinIslands
bool JoinIslands
Join isolated components having positive flow.
Definition:SampleProfileInference.h:73
llvm::ProfiParams::CostBlockZeroInc
unsigned CostBlockZeroInc
The cost of increasing a count of zero-weight block by one.
Definition:SampleProfileInference.h:82
llvm::ProfiParams::CostBlockEntryDec
unsigned CostBlockEntryDec
The cost of decreasing the entry block's count by one.
Definition:SampleProfileInference.h:88
llvm::ProfiParams::CostJumpInc
unsigned CostJumpInc
The cost of increasing a jump's count by one.
Definition:SampleProfileInference.h:94
llvm::ProfiParams::CostJumpUnknownInc
unsigned CostJumpUnknownInc
The cost of increasing an unknown jump's count by one.
Definition:SampleProfileInference.h:106
llvm::ProfiParams::CostBlockUnknownInc
unsigned CostBlockUnknownInc
The cost of increasing an unknown block's count by one.
Definition:SampleProfileInference.h:91
llvm::ProfiParams::CostJumpFTDec
unsigned CostJumpFTDec
The cost of decreasing a fall-through jump's count by one.
Definition:SampleProfileInference.h:103
llvm::ProfiParams::CostBlockDec
unsigned CostBlockDec
The cost of decreasing a block's count by one.
Definition:SampleProfileInference.h:79
llvm::ProfiParams::CostBlockEntryInc
unsigned CostBlockEntryInc
The cost of increasing the entry block's count by one.
Definition:SampleProfileInference.h:85
llvm::ProfiParams::EvenFlowDistribution
bool EvenFlowDistribution
Evenly distribute flow when there are multiple equally likely options.
Definition:SampleProfileInference.h:67
llvm::cl::desc
Definition:CommandLine.h:409

Generated on Fri Jul 18 2025 16:40:18 for LLVM by doxygen 1.9.6
[8]ページ先頭

©2009-2025 Movatter.jp