Movatterモバイル変換

Go to the documentation of this file.

1//===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//

2//

3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

4// See https://llvm.org/LICENSE.txt for license information.

5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

6//

7//===----------------------------------------------------------------------===//

8//

9// This file implements a profile inference algorithm. Given an incomplete and

10// possibly imprecise block counts, the algorithm reconstructs realistic block

11// and edge counts that satisfy flow conservation rules, while minimally modify

12// input block counts.

13//

14//===----------------------------------------------------------------------===//

16#include "llvm/Transforms/Utils/SampleProfileInference.h"

17#include "llvm/ADT/BitVector.h"

18#include "llvm/Support/CommandLine.h"

19#include "llvm/Support/Debug.h"

20#include <queue>

21#include <set>

22#include <stack>

23#include <unordered_set>

25using namespacellvm;

26#define DEBUG_TYPE "sample-profile-inference"

28namespace{

30staticcl::opt<bool> SampleProfileEvenFlowDistribution(

31"sample-profile-even-flow-distribution",cl::init(true),cl::Hidden,

32cl::desc("Try to evenly distribute flow when there are multiple equally "

33"likely options."));

35staticcl::opt<bool> SampleProfileRebalanceUnknown(

36"sample-profile-rebalance-unknown",cl::init(true),cl::Hidden,

37cl::desc("Evenly re-distribute flow among unknown subgraphs."));

39staticcl::opt<bool> SampleProfileJoinIslands(

40"sample-profile-join-islands",cl::init(true),cl::Hidden,

41cl::desc("Join isolated components having positive flow."));

43staticcl::opt<unsigned> SampleProfileProfiCostBlockInc(

44"sample-profile-profi-cost-block-inc",cl::init(10),cl::Hidden,

45cl::desc("The cost of increasing a block's count by one."));

47staticcl::opt<unsigned> SampleProfileProfiCostBlockDec(

48"sample-profile-profi-cost-block-dec",cl::init(20),cl::Hidden,

49cl::desc("The cost of decreasing a block's count by one."));

51staticcl::opt<unsigned> SampleProfileProfiCostBlockEntryInc(

52"sample-profile-profi-cost-block-entry-inc",cl::init(40),cl::Hidden,

53cl::desc("The cost of increasing the entry block's count by one."));

55staticcl::opt<unsigned> SampleProfileProfiCostBlockEntryDec(

56"sample-profile-profi-cost-block-entry-dec",cl::init(10),cl::Hidden,

57cl::desc("The cost of decreasing the entry block's count by one."));

59staticcl::opt<unsigned> SampleProfileProfiCostBlockZeroInc(

60"sample-profile-profi-cost-block-zero-inc",cl::init(11),cl::Hidden,

61cl::desc("The cost of increasing a count of zero-weight block by one."));

63staticcl::opt<unsigned> SampleProfileProfiCostBlockUnknownInc(

64"sample-profile-profi-cost-block-unknown-inc",cl::init(0),cl::Hidden,

65cl::desc("The cost of increasing an unknown block's count by one."));

67/// A value indicating an infinite flow/capacity/weight of a block/edge.

68/// Not using numeric_limits<int64_t>::max(), as the values can be summed up

69/// during the execution.

70staticconstexpr int64_tINF = ((int64_t)1) << 50;

72/// The minimum-cost maximum flow algorithm.

73///

74/// The algorithm finds the maximum flow of minimum cost on a given (directed)

75/// network using a modified version of the classical Moore-Bellman-Ford

76/// approach. The algorithm applies a number of augmentation iterations in which

77/// flow is sent along paths of positive capacity from the source to the sink.

78/// The worst-case time complexity of the implementation is O(v(f)*m*n), where

79/// where m is the number of edges, n is the number of vertices, and v(f) is the

80/// value of the maximum flow. However, the observed running time on typical

81/// instances is sub-quadratic, that is, o(n^2).

82///

83/// The input is a set of edges with specified costs and capacities, and a pair

84/// of nodes (source and sink). The output is the flow along each edge of the

85/// minimum total cost respecting the given edge capacities.

86classMinCostMaxFlow {

87public:

88 MinCostMaxFlow(constProfiParams &Params) : Params(Params) {}

90// Initialize algorithm's data structures for a network of a given size.

91voidinitialize(uint64_t NodeCount,uint64_t SourceNode,uint64_t SinkNode) {

92 Source = SourceNode;

93Target = SinkNode;

95 Nodes = std::vector<Node>(NodeCount);

96 Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());

97if (Params.EvenFlowDistribution)

98 AugmentingEdges =

99 std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());

100 }

101

102// Run the algorithm.

103 int64_t run() {

104LLVM_DEBUG(dbgs() <<"Starting profi for " << Nodes.size() <<" nodes\n");

105

106// Iteratively find an augmentation path/dag in the network and send the

107// flow along its edges

108size_t AugmentationIters = applyFlowAugmentation();

109

110// Compute the total flow and its cost

111 int64_t TotalCost = 0;

112 int64_t TotalFlow = 0;

113for (uint64_t Src = 0; Src < Nodes.size(); Src++) {

114for (auto &Edge : Edges[Src]) {

115if (Edge.Flow > 0) {

116 TotalCost += Edge.Cost * Edge.Flow;

117if (Src == Source)

118 TotalFlow += Edge.Flow;

119 }

120 }

121 }

122LLVM_DEBUG(dbgs() <<"Completed profi after " << AugmentationIters

123 <<" iterations with " << TotalFlow <<" total flow"

124 <<" of " << TotalCost <<" cost\n");

125 (void)TotalFlow;

126 (void)AugmentationIters;

127return TotalCost;

128 }

129

130 /// Adding an edge to the network with a specified capacity and a cost.

131 /// Multiple edges between a pair of nodes are allowed but self-edges

132 /// are not supported.

133voidaddEdge(uint64_t Src,uint64_t Dst, int64_t Capacity, int64_tCost) {

134assert(Capacity > 0 &&"adding an edge of zero capacity");

135assert(Src != Dst &&"loop edge are not supported");

136

137 Edge SrcEdge;

138 SrcEdge.Dst = Dst;

139 SrcEdge.Cost =Cost;

140 SrcEdge.Capacity = Capacity;

141 SrcEdge.Flow = 0;

142 SrcEdge.RevEdgeIndex = Edges[Dst].size();

143

144 Edge DstEdge;

145 DstEdge.Dst = Src;

146 DstEdge.Cost = -Cost;

147 DstEdge.Capacity = 0;

148 DstEdge.Flow = 0;

149 DstEdge.RevEdgeIndex = Edges[Src].size();

150

151 Edges[Src].push_back(SrcEdge);

152 Edges[Dst].push_back(DstEdge);

153 }

154

155 /// Adding an edge to the network of infinite capacity and a given cost.

156voidaddEdge(uint64_t Src,uint64_t Dst, int64_tCost) {

157addEdge(Src, Dst,INF,Cost);

158 }

159

160 /// Get the total flow from a given source node.

161 /// Returns a list of pairs (target node, amount of flow to the target).

162 std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const{

163 std::vector<std::pair<uint64_t, int64_t>>Flow;

164for (constauto &Edge : Edges[Src]) {

165if (Edge.Flow > 0)

166Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));

167 }

168returnFlow;

169 }

170

171 /// Get the total flow between a pair of nodes.

172 int64_t getFlow(uint64_t Src,uint64_t Dst) const{

173 int64_tFlow = 0;

174for (constauto &Edge : Edges[Src]) {

175if (Edge.Dst == Dst) {

176Flow += Edge.Flow;

177 }

178 }

179returnFlow;

180 }

181

182private:

183 /// Iteratively find an augmentation path/dag in the network and send the

184 /// flow along its edges. The method returns the number of applied iterations.

185size_t applyFlowAugmentation() {

186size_t AugmentationIters = 0;

187while (findAugmentingPath()) {

188uint64_t PathCapacity = computeAugmentingPathCapacity();

189while (PathCapacity > 0) {

190bool Progress =false;

191if (Params.EvenFlowDistribution) {

192// Identify node/edge candidates for augmentation

193 identifyShortestEdges(PathCapacity);

194

195// Find an augmenting DAG

196auto AugmentingOrder = findAugmentingDAG();

197

198// Apply the DAG augmentation

199 Progress = augmentFlowAlongDAG(AugmentingOrder);

200 PathCapacity = computeAugmentingPathCapacity();

201 }

202

203if (!Progress) {

204 augmentFlowAlongPath(PathCapacity);

205 PathCapacity = 0;

206 }

207

208 AugmentationIters++;

209 }

210 }

211return AugmentationIters;

212 }

213

214 /// Compute the capacity of the cannonical augmenting path. If the path is

215 /// saturated (that is, no flow can be sent along the path), then return 0.

216uint64_t computeAugmentingPathCapacity() {

217uint64_t PathCapacity =INF;

218uint64_t Now =Target;

219while (Now != Source) {

220uint64_t Pred = Nodes[Now].ParentNode;

221auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];

222

223assert(Edge.Capacity >= Edge.Flow &&"incorrect edge flow");

224uint64_t EdgeCapacity =uint64_t(Edge.Capacity - Edge.Flow);

225 PathCapacity = std::min(PathCapacity, EdgeCapacity);

226

227 Now = Pred;

228 }

229return PathCapacity;

230 }

231

232 /// Check for existence of an augmenting path with a positive capacity.

233bool findAugmentingPath() {

234// Initialize data structures

235for (auto &Node : Nodes) {

236Node.Distance =INF;

237Node.ParentNode =uint64_t(-1);

238Node.ParentEdgeIndex =uint64_t(-1);

239Node.Taken =false;

240 }

241

242 std::queue<uint64_t> Queue;

243 Queue.push(Source);

244 Nodes[Source].Distance = 0;

245 Nodes[Source].Taken =true;

246while (!Queue.empty()) {

247uint64_t Src = Queue.front();

248 Queue.pop();

249 Nodes[Src].Taken =false;

250// Although the residual network contains edges with negative costs

251// (in particular, backward edges), it can be shown that there are no

252// negative-weight cycles and the following two invariants are maintained:

253// (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,

254// where Dist is the length of the shortest path between two nodes. This

255// allows to prune the search-space of the path-finding algorithm using

256// the following early-stop criteria:

257// -- If we find a path with zero-distance from Source to Target, stop the

258// search, as the path is the shortest since Dist[Source, Target] >= 0;

259// -- If we have Dist[Source, V] > Dist[Source, Target], then do not

260// process node V, as it is guaranteed _not_ to be on a shortest path

261// from Source to Target; it follows from inequalities

262// Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]

263// >= Dist[Source, V]

264if (!Params.EvenFlowDistribution && Nodes[Target].Distance == 0)

265break;

266if (Nodes[Src].Distance > Nodes[Target].Distance)

267continue;

268

269// Process adjacent edges

270for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {

271auto &Edge = Edges[Src][EdgeIdx];

272if (Edge.Flow < Edge.Capacity) {

273uint64_t Dst = Edge.Dst;

274 int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;

275if (Nodes[Dst].Distance > NewDistance) {

276// Update the distance and the parent node/edge

277 Nodes[Dst].Distance = NewDistance;

278 Nodes[Dst].ParentNode = Src;

279 Nodes[Dst].ParentEdgeIndex = EdgeIdx;

280// Add the node to the queue, if it is not there yet

281if (!Nodes[Dst].Taken) {

282 Queue.push(Dst);

283 Nodes[Dst].Taken =true;

284 }

285 }

286 }

287 }

288 }

289

290return Nodes[Target].Distance !=INF;

291 }

292

293 /// Update the current flow along the augmenting path.

294void augmentFlowAlongPath(uint64_t PathCapacity) {

295assert(PathCapacity > 0 &&"found an incorrect augmenting path");

296uint64_t Now =Target;

297while (Now != Source) {

298uint64_t Pred = Nodes[Now].ParentNode;

299auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];

300auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];

301

302 Edge.Flow += PathCapacity;

303 RevEdge.Flow -= PathCapacity;

304

305 Now = Pred;

306 }

307 }

308

309 /// Find an Augmenting DAG order using a modified version of DFS in which we

310 /// can visit a node multiple times. In the DFS search, when scanning each

311 /// edge out of a node, continue search at Edge.Dst endpoint if it has not

312 /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm

313 /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time.

314 /// It returns an Augmenting Order (Taken nodes in decreasing Finish time)

315 /// that starts with Source and ends with Target.

316 std::vector<uint64_t> findAugmentingDAG() {

317// We use a stack based implemenation of DFS to avoid recursion.

318// Defining DFS data structures:

319// A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that

320// - we are currently visiting Nodes[NodeIdx] and

321// - the next edge to scan is Edges[NodeIdx][EdgeIdx]

322typedef std::pair<uint64_t, uint64_t> StackItemType;

323 std::stack<StackItemType> Stack;

324 std::vector<uint64_t> AugmentingOrder;

325

326// Phase 0: Initialize Node attributes and Time for DFS run

327for (auto &Node : Nodes) {

328Node.Discovery = 0;

329Node.Finish = 0;

330Node.NumCalls = 0;

331Node.Taken =false;

332 }

333uint64_t Time = 0;

334// Mark Target as Taken

335// Taken attribute will be propagated backwards from Target towards Source

336 Nodes[Target].Taken =true;

337

338// Phase 1: Start DFS traversal from Source

339 Stack.emplace(Source, 0);

340 Nodes[Source].Discovery = ++Time;

341while (!Stack.empty()) {

342auto NodeIdx = Stack.top().first;

343auto EdgeIdx = Stack.top().second;

344

345// If we haven't scanned all edges out of NodeIdx, continue scanning

346if (EdgeIdx < Edges[NodeIdx].size()) {

347auto &Edge = Edges[NodeIdx][EdgeIdx];

348auto &Dst = Nodes[Edge.Dst];

349 Stack.top().second++;

350

351if (Edge.OnShortestPath) {

352// If we haven't seen Edge.Dst so far, continue DFS search there

353if (Dst.Discovery == 0 && Dst.NumCalls < MaxDfsCalls) {

354 Dst.Discovery = ++Time;

355 Stack.emplace(Edge.Dst, 0);

356 Dst.NumCalls++;

357 }elseif (Dst.Taken && Dst.Finish != 0) {

358// Else, if Edge.Dst already have a path to Target, so that NodeIdx

359 Nodes[NodeIdx].Taken =true;

360 }

361 }

362 }else {

363// If we are done scanning all edge out of NodeIdx

364 Stack.pop();

365// If we haven't found a path from NodeIdx to Target, forget about it

366if (!Nodes[NodeIdx].Taken) {

367 Nodes[NodeIdx].Discovery = 0;

368 }else {

369// If we have found a path from NodeIdx to Target, then finish NodeIdx

370// and propagate Taken flag to DFS parent unless at the Source

371 Nodes[NodeIdx].Finish = ++Time;

372// NodeIdx == Source if and only if the stack is empty

373if (NodeIdx != Source) {

374assert(!Stack.empty() &&"empty stack while running dfs");

375 Nodes[Stack.top().first].Taken =true;

376 }

377 AugmentingOrder.push_back(NodeIdx);

378 }

379 }

380 }

381// Nodes are collected decreasing Finish time, so the order is reversed

382 std::reverse(AugmentingOrder.begin(), AugmentingOrder.end());

383

384// Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges

385for (size_t Src : AugmentingOrder) {

386 AugmentingEdges[Src].clear();

387for (auto &Edge : Edges[Src]) {

388uint64_t Dst = Edge.Dst;

389if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken &&

390 Nodes[Dst].Finish < Nodes[Src].Finish) {

391 AugmentingEdges[Src].push_back(&Edge);

392 }

393 }

394assert((Src ==Target || !AugmentingEdges[Src].empty()) &&

395"incorrectly constructed augmenting edges");

396 }

397

398return AugmentingOrder;

399 }

400

401 /// Update the current flow along the given (acyclic) subgraph specified by

402 /// the vertex order, AugmentingOrder. The objective is to send as much flow

403 /// as possible while evenly distributing flow among successors of each node.

404 /// After the update at least one edge is saturated.

405bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) {

406// Phase 0: Initialization

407for (uint64_t Src : AugmentingOrder) {

408 Nodes[Src].FracFlow = 0;

409 Nodes[Src].IntFlow = 0;

410for (auto &Edge : AugmentingEdges[Src]) {

411 Edge->AugmentedFlow = 0;

412 }

413 }

414

415// Phase 1: Send a unit of fractional flow along the DAG

416uint64_t MaxFlowAmount =INF;

417 Nodes[Source].FracFlow = 1.0;

418for (uint64_t Src : AugmentingOrder) {

419assert((Src ==Target || Nodes[Src].FracFlow > 0.0) &&

420"incorrectly computed fractional flow");

421// Distribute flow evenly among successors of Src

422uint64_t Degree = AugmentingEdges[Src].size();

423for (auto &Edge : AugmentingEdges[Src]) {

424double EdgeFlow = Nodes[Src].FracFlow / Degree;

425 Nodes[Edge->Dst].FracFlow += EdgeFlow;

426if (Edge->Capacity ==INF)

427continue;

428uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;

429 MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow);

430 }

431 }

432// Stop early if we cannot send any (integral) flow from Source to Target

433if (MaxFlowAmount == 0)

434returnfalse;

435

436// Phase 2: Send an integral flow of MaxFlowAmount

437 Nodes[Source].IntFlow = MaxFlowAmount;

438for (uint64_t Src : AugmentingOrder) {

439if (Src ==Target)

440break;

441// Distribute flow evenly among successors of Src, rounding up to make

442// sure all flow is sent

443uint64_t Degree = AugmentingEdges[Src].size();

444// We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree

445uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;

446for (auto &Edge : AugmentingEdges[Src]) {

447uint64_t Dst = Edge->Dst;

448uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow);

449 EdgeFlow = std::min(EdgeFlow,uint64_t(Edge->Capacity - Edge->Flow));

450 Nodes[Dst].IntFlow += EdgeFlow;

451 Nodes[Src].IntFlow -= EdgeFlow;

452 Edge->AugmentedFlow += EdgeFlow;

453 }

454 }

455assert(Nodes[Target].IntFlow <= MaxFlowAmount);

456 Nodes[Target].IntFlow = 0;

457

458// Phase 3: Send excess flow back traversing the nodes backwards.

459// Because of rounding, not all flow can be sent along the edges of Src.

460// Hence, sending the remaining flow back to maintain flow conservation

461for (size_tIdx = AugmentingOrder.size() - 1;Idx > 0;Idx--) {

462uint64_t Src = AugmentingOrder[Idx - 1];

463// Try to send excess flow back along each edge.

464// Make sure we only send back flow we just augmented (AugmentedFlow).

465for (auto &Edge : AugmentingEdges[Src]) {

466uint64_t Dst = Edge->Dst;

467if (Nodes[Dst].IntFlow == 0)

468continue;

469uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);

470 Nodes[Dst].IntFlow -= EdgeFlow;

471 Nodes[Src].IntFlow += EdgeFlow;

472 Edge->AugmentedFlow -= EdgeFlow;

473 }

474 }

475

476// Phase 4: Update flow values along all edges

477bool HasSaturatedEdges =false;

478for (uint64_t Src : AugmentingOrder) {

479// Verify that we have sent all the excess flow from the node

480assert(Src == Source || Nodes[Src].IntFlow == 0);

481for (auto &Edge : AugmentingEdges[Src]) {

482assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);

483// Update flow values along the edge and its reverse copy

484auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];

485 Edge->Flow += Edge->AugmentedFlow;

486 RevEdge.Flow -= Edge->AugmentedFlow;

487if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)

488 HasSaturatedEdges =true;

489 }

490 }

491

492// The augmentation is successful iff at least one edge becomes saturated

493return HasSaturatedEdges;

494 }

495

496 /// Identify candidate (shortest) edges for augmentation.

497void identifyShortestEdges(uint64_t PathCapacity) {

498assert(PathCapacity > 0 &&"found an incorrect augmenting DAG");

499// To make sure the augmentation DAG contains only edges with large residual

500// capacity, we prune all edges whose capacity is below a fraction of

501// the capacity of the augmented path.

502// (All edges of the path itself are always in the DAG)

503uint64_t MinCapacity = std::max(PathCapacity / 2,uint64_t(1));

504

505// Decide which edges are on a shortest path from Source to Target

506for (size_t Src = 0; Src < Nodes.size(); Src++) {

507// An edge cannot be augmenting if the endpoint has large distance

508if (Nodes[Src].Distance > Nodes[Target].Distance)

509continue;

510

511for (auto &Edge : Edges[Src]) {

512uint64_t Dst = Edge.Dst;

513 Edge.OnShortestPath =

514 Src !=Target && Dst != Source &&

515 Nodes[Dst].Distance <= Nodes[Target].Distance &&

516 Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost &&

517 Edge.Capacity > Edge.Flow &&

518uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;

519 }

520 }

521 }

522

523 /// Maximum number of DFS iterations for DAG finding.

524staticconstexpruint64_t MaxDfsCalls = 10;

525

526 /// A node in a flow network.

527structNode {

528 /// The cost of the cheapest path from the source to the current node.

529 int64_t Distance;

530 /// The node preceding the current one in the path.

531uint64_t ParentNode;

532 /// The index of the edge between ParentNode and the current node.

533uint64_t ParentEdgeIndex;

534 /// An indicator of whether the current node is in a queue.

535bool Taken;

536

537 /// Data fields utilized in DAG-augmentation:

538 /// Fractional flow.

539double FracFlow;

540 /// Integral flow.

541uint64_t IntFlow;

542 /// Discovery time.

543uint64_t Discovery;

544 /// Finish time.

545uint64_t Finish;

546 /// NumCalls.

547uint64_t NumCalls;

548 };

549

550 /// An edge in a flow network.

551structEdge {

552 /// The cost of the edge.

553 int64_tCost;

554 /// The capacity of the edge.

555 int64_t Capacity;

556 /// The current flow on the edge.

557 int64_tFlow;

558 /// The destination node of the edge.

559uint64_t Dst;

560 /// The index of the reverse edge between Dst and the current node.

561uint64_t RevEdgeIndex;

562

563 /// Data fields utilized in DAG-augmentation:

564 /// Whether the edge is currently on a shortest path from Source to Target.

565bool OnShortestPath;

566 /// Extra flow along the edge.

567uint64_t AugmentedFlow;

568 };

569

570 /// The set of network nodes.

571 std::vector<Node> Nodes;

572 /// The set of network edges.

573 std::vector<std::vector<Edge>> Edges;

574 /// Source node of the flow.

575uint64_t Source;

576 /// Target (sink) node of the flow.

577uint64_t Target;

578 /// Augmenting edges.

579 std::vector<std::vector<Edge *>> AugmentingEdges;

580 /// Params for flow computation.

581constProfiParams &Params;

582};

583

584/// A post-processing adjustment of the control flow. It applies two steps by

585/// rerouting some flow and making it more realistic:

586///

587/// - First, it removes all isolated components ("islands") with a positive flow

588/// that are unreachable from the entry block. For every such component, we

589/// find the shortest from the entry to an exit passing through the component,

590/// and increase the flow by one unit along the path.

591///

592/// - Second, it identifies all "unknown subgraphs" consisting of basic blocks

593/// with no sampled counts. Then it rebalnces the flow that goes through such

594/// a subgraph so that each branch is taken with probability 50%.

595/// An unknown subgraph is such that for every two nodes u and v:

596/// - u dominates v and u is not unknown;

597/// - v post-dominates u; and

598/// - all inner-nodes of all (u,v)-paths are unknown.

599///

600classFlowAdjuster {

601public:

602 FlowAdjuster(constProfiParams &Params,FlowFunction &Func)

603 : Params(Params), Func(Func) {}

604

605 /// Apply the post-processing.

606void run() {

607if (Params.JoinIslands) {

608// Adjust the flow to get rid of isolated components

609 joinIsolatedComponents();

610 }

611

612if (Params.RebalanceUnknown) {

613// Rebalance the flow inside unknown subgraphs

614 rebalanceUnknownSubgraphs();

615 }

616 }

617

618private:

619void joinIsolatedComponents() {

620// Find blocks that are reachable from the source

621auto Visited =BitVector(NumBlocks(),false);

622 findReachable(Func.Entry, Visited);

623

624// Iterate over all non-reachable blocks and adjust their weights

625for (uint64_t I = 0;I < NumBlocks();I++) {

626auto &Block = Func.Blocks[I];

627if (Block.Flow > 0 && !Visited[I]) {

628// Find a path from the entry to an exit passing through the block I

629auto Path = findShortestPath(I);

630// Increase the flow along the path

631assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&

632"incorrectly computed path adjusting control flow");

633 Func.Blocks[Func.Entry].Flow += 1;

634for (auto &Jump : Path) {

635 Jump->Flow += 1;

636 Func.Blocks[Jump->Target].Flow += 1;

637// Update reachability

638 findReachable(Jump->Target, Visited);

639 }

640 }

641 }

642 }

643

644 /// Run BFS from a given block along the jumps with a positive flow and mark

645 /// all reachable blocks.

646void findReachable(uint64_t Src,BitVector &Visited) {

647if (Visited[Src])

648return;

649 std::queue<uint64_t> Queue;

650 Queue.push(Src);

651 Visited[Src] =true;

652while (!Queue.empty()) {

653 Src = Queue.front();

654 Queue.pop();

655for (auto *Jump : Func.Blocks[Src].SuccJumps) {

656uint64_t Dst = Jump->Target;

657if (Jump->Flow > 0 && !Visited[Dst]) {

658 Queue.push(Dst);

659 Visited[Dst] =true;

660 }

661 }

662 }

663 }

664

665 /// Find the shortest path from the entry block to an exit block passing

666 /// through a given block.

667 std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {

668// A path from the entry block to BlockIdx

669auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);

670// A path from BlockIdx to an exit block

671auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);

672

673// Concatenate the two paths

674 std::vector<FlowJump *> Result;

675 Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());

676 Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());

677return Result;

678 }

679

680 /// Apply the Dijkstra algorithm to find the shortest path from a given

681 /// Source to a given Target block.

682 /// If Target == -1, then the path ends at an exit block.

683 std::vector<FlowJump *> findShortestPath(uint64_t Source,uint64_t Target) {

684// Quit early, if possible

685if (Source ==Target)

686return std::vector<FlowJump *>();

687if (Func.Blocks[Source].isExit() &&Target == AnyExitBlock)

688return std::vector<FlowJump *>();

689

690// Initialize data structures

691auto Distance = std::vector<int64_t>(NumBlocks(),INF);

692auto Parent = std::vector<FlowJump *>(NumBlocks(),nullptr);

693 Distance[Source] = 0;

694 std::set<std::pair<uint64_t, uint64_t>> Queue;

695 Queue.insert(std::make_pair(Distance[Source], Source));

696

697// Run the Dijkstra algorithm

698while (!Queue.empty()) {

699uint64_t Src = Queue.begin()->second;

700 Queue.erase(Queue.begin());

701// If we found a solution, quit early

702if (Src ==Target ||

703 (Func.Blocks[Src].isExit() &&Target == AnyExitBlock))

704break;

705

706for (auto *Jump : Func.Blocks[Src].SuccJumps) {

707uint64_t Dst = Jump->Target;

708 int64_t JumpDist = jumpDistance(Jump);

709if (Distance[Dst] > Distance[Src] + JumpDist) {

710 Queue.erase(std::make_pair(Distance[Dst], Dst));

711

712 Distance[Dst] = Distance[Src] + JumpDist;

713 Parent[Dst] = Jump;

714

715 Queue.insert(std::make_pair(Distance[Dst], Dst));

716 }

717 }

718 }

719// If Target is not provided, find the closest exit block

720if (Target == AnyExitBlock) {

721for (uint64_t I = 0;I < NumBlocks();I++) {

722if (Func.Blocks[I].isExit() && Parent[I] !=nullptr) {

723if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {

724Target =I;

725 }

726 }

727 }

728 }

729assert(Parent[Target] !=nullptr &&"a path does not exist");

730

731// Extract the constructed path

732 std::vector<FlowJump *> Result;

733uint64_t Now =Target;

734while (Now != Source) {

735assert(Now == Parent[Now]->Target &&"incorrect parent jump");

736 Result.push_back(Parent[Now]);

737 Now = Parent[Now]->Source;

738 }

739// Reverse the path, since it is extracted from Target to Source

740 std::reverse(Result.begin(), Result.end());

741return Result;

742 }

743

744 /// A distance of a path for a given jump.

745 /// In order to incite the path to use blocks/jumps with large positive flow,

746 /// and avoid changing branch probability of outgoing edges drastically,

747 /// set the jump distance so as:

748 /// - to minimize the number of unlikely jumps used and subject to that,

749 /// - to minimize the number of Flow == 0 jumps used and subject to that,

750 /// - minimizes total multiplicative Flow increase for the remaining edges.

751 /// To capture this objective with integer distances, we round off fractional

752 /// parts to a multiple of 1 / BaseDistance.

753 int64_t jumpDistance(FlowJump *Jump) const{

754if (Jump->IsUnlikely)

755return Params.CostUnlikely;

756uint64_t BaseDistance =

757 std::max(FlowAdjuster::MinBaseDistance,

758 std::min(Func.Blocks[Func.Entry].Flow,

759 Params.CostUnlikely / (2 * (NumBlocks() + 1))));

760if (Jump->Flow > 0)

761return BaseDistance + BaseDistance / Jump->Flow;

762return 2 * BaseDistance * (NumBlocks() + 1);

763 };

764

765uint64_t NumBlocks() const{return Func.Blocks.size(); }

766

767 /// Rebalance unknown subgraphs so that the flow is split evenly across the

768 /// outgoing branches of every block of the subgraph. The method iterates over

769 /// blocks with known weight and identifies unknown subgraphs rooted at the

770 /// blocks. Then it verifies if flow rebalancing is feasible and applies it.

771void rebalanceUnknownSubgraphs() {

772// Try to find unknown subgraphs from each block

773for (constFlowBlock &SrcBlock : Func.Blocks) {

774// Verify if rebalancing rooted at SrcBlock is feasible

775if (!canRebalanceAtRoot(&SrcBlock))

776continue;

777

778// Find an unknown subgraphs starting at SrcBlock. Along the way,

779// fill in known destinations and intermediate unknown blocks.

780 std::vector<FlowBlock *> UnknownBlocks;

781 std::vector<FlowBlock *> KnownDstBlocks;

782 findUnknownSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks);

783

784// Verify if rebalancing of the subgraph is feasible. If the search is

785// successful, find the unique destination block (which can be null)

786FlowBlock *DstBlock =nullptr;

787if (!canRebalanceSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks,

788 DstBlock))

789continue;

790

791// We cannot rebalance subgraphs containing cycles among unknown blocks

792if (!isAcyclicSubgraph(&SrcBlock, DstBlock, UnknownBlocks))

793continue;

794

795// Rebalance the flow

796 rebalanceUnknownSubgraph(&SrcBlock, DstBlock, UnknownBlocks);

797 }

798 }

799

800 /// Verify if rebalancing rooted at a given block is possible.

801bool canRebalanceAtRoot(constFlowBlock *SrcBlock) {

802// Do not attempt to find unknown subgraphs from an unknown or a

803// zero-flow block

804if (SrcBlock->HasUnknownWeight || SrcBlock->Flow == 0)

805returnfalse;

806

807// Do not attempt to process subgraphs from a block w/o unknown sucessors

808bool HasUnknownSuccs =false;

809for (auto *Jump : SrcBlock->SuccJumps) {

810if (Func.Blocks[Jump->Target].HasUnknownWeight) {

811 HasUnknownSuccs =true;

812break;

813 }

814 }

815if (!HasUnknownSuccs)

816returnfalse;

817

818returntrue;

819 }

820

821 /// Find an unknown subgraph starting at block SrcBlock. The method sets

822 /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.

823void findUnknownSubgraph(constFlowBlock *SrcBlock,

824 std::vector<FlowBlock *> &KnownDstBlocks,

825 std::vector<FlowBlock *> &UnknownBlocks) {

826// Run BFS from SrcBlock and make sure all paths are going through unknown

827// blocks and end at a known DstBlock

828auto Visited =BitVector(NumBlocks(),false);

829 std::queue<uint64_t> Queue;

830

831 Queue.push(SrcBlock->Index);

832 Visited[SrcBlock->Index] =true;

833while (!Queue.empty()) {

834auto &Block = Func.Blocks[Queue.front()];

835 Queue.pop();

836// Process blocks reachable from Block

837for (auto *Jump :Block.SuccJumps) {

838// If Jump can be ignored, skip it

839if (ignoreJump(SrcBlock,nullptr, Jump))

840continue;

841

842uint64_t Dst = Jump->Target;

843// If Dst has been visited, skip Jump

844if (Visited[Dst])

845continue;

846// Process block Dst

847 Visited[Dst] =true;

848if (!Func.Blocks[Dst].HasUnknownWeight) {

849 KnownDstBlocks.push_back(&Func.Blocks[Dst]);

850 }else {

851 Queue.push(Dst);

852 UnknownBlocks.push_back(&Func.Blocks[Dst]);

853 }

854 }

855 }

856 }

857

858 /// Verify if rebalancing of the subgraph is feasible. If the checks are

859 /// successful, set the unique destination block, DstBlock (can be null).

860bool canRebalanceSubgraph(constFlowBlock *SrcBlock,

861const std::vector<FlowBlock *> &KnownDstBlocks,

862const std::vector<FlowBlock *> &UnknownBlocks,

863FlowBlock *&DstBlock) {

864// If the list of unknown blocks is empty, we don't need rebalancing

865if (UnknownBlocks.empty())

866returnfalse;

867

868// If there are multiple known sinks, we can't rebalance

869if (KnownDstBlocks.size() > 1)

870returnfalse;

871 DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();

872

873// Verify sinks of the subgraph

874for (auto *Block : UnknownBlocks) {

875if (Block->SuccJumps.empty()) {

876// If there are multiple (known and unknown) sinks, we can't rebalance

877if (DstBlock !=nullptr)

878returnfalse;

879continue;

880 }

881size_t NumIgnoredJumps = 0;

882for (auto *Jump :Block->SuccJumps) {

883if (ignoreJump(SrcBlock, DstBlock, Jump))

884 NumIgnoredJumps++;

885 }

886// If there is a non-sink block in UnknownBlocks with all jumps ignored,

887// then we can't rebalance

888if (NumIgnoredJumps ==Block->SuccJumps.size())

889returnfalse;

890 }

891

892returntrue;

893 }

894

895 /// Decide whether the Jump is ignored while processing an unknown subgraphs

896 /// rooted at basic block SrcBlock with the destination block, DstBlock.

897bool ignoreJump(constFlowBlock *SrcBlock,constFlowBlock *DstBlock,

898constFlowJump *Jump) {

899// Ignore unlikely jumps with zero flow

900if (Jump->IsUnlikely && Jump->Flow == 0)

901returntrue;

902

903auto JumpSource = &Func.Blocks[Jump->Source];

904auto JumpTarget = &Func.Blocks[Jump->Target];

905

906// Do not ignore jumps coming into DstBlock

907if (DstBlock !=nullptr && JumpTarget == DstBlock)

908returnfalse;

909

910// Ignore jumps out of SrcBlock to known blocks

911if (!JumpTarget->HasUnknownWeight && JumpSource == SrcBlock)

912returntrue;

913

914// Ignore jumps to known blocks with zero flow

915if (!JumpTarget->HasUnknownWeight && JumpTarget->Flow == 0)

916returntrue;

917

918returnfalse;

919 }

920

921 /// Verify if the given unknown subgraph is acyclic, and if yes, reorder

922 /// UnknownBlocks in the topological order (so that all jumps are "forward").

923bool isAcyclicSubgraph(constFlowBlock *SrcBlock,constFlowBlock *DstBlock,

924 std::vector<FlowBlock *> &UnknownBlocks) {

925// Extract local in-degrees in the considered subgraph

926auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);

927auto fillInDegree = [&](constFlowBlock *Block) {

928for (auto *Jump :Block->SuccJumps) {

929if (ignoreJump(SrcBlock, DstBlock, Jump))

930continue;

931 LocalInDegree[Jump->Target]++;

932 }

933 };

934 fillInDegree(SrcBlock);

935for (auto *Block : UnknownBlocks) {

936 fillInDegree(Block);

937 }

938// A loop containing SrcBlock

939if (LocalInDegree[SrcBlock->Index] > 0)

940returnfalse;

941

942 std::vector<FlowBlock *> AcyclicOrder;

943 std::queue<uint64_t> Queue;

944 Queue.push(SrcBlock->Index);

945while (!Queue.empty()) {

946FlowBlock *Block = &Func.Blocks[Queue.front()];

947 Queue.pop();

948// Stop propagation once we reach DstBlock, if any

949if (DstBlock !=nullptr &&Block == DstBlock)

950break;

951

952// Keep an acyclic order of unknown blocks

953if (Block->HasUnknownWeight &&Block != SrcBlock)

954 AcyclicOrder.push_back(Block);

955

956// Add to the queue all successors with zero local in-degree

957for (auto *Jump :Block->SuccJumps) {

958if (ignoreJump(SrcBlock, DstBlock, Jump))

959continue;

960uint64_t Dst = Jump->Target;

961 LocalInDegree[Dst]--;

962if (LocalInDegree[Dst] == 0) {

963 Queue.push(Dst);

964 }

965 }

966 }

967

968// If there is a cycle in the subgraph, AcyclicOrder contains only a subset

969// of all blocks

970if (UnknownBlocks.size() != AcyclicOrder.size())

971returnfalse;

972 UnknownBlocks = AcyclicOrder;

973returntrue;

974 }

975

976 /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and

977 /// having UnknownBlocks intermediate blocks.

978void rebalanceUnknownSubgraph(constFlowBlock *SrcBlock,

979constFlowBlock *DstBlock,

980const std::vector<FlowBlock *> &UnknownBlocks) {

981assert(SrcBlock->Flow > 0 &&"zero-flow block in unknown subgraph");

982

983// Ditribute flow from the source block

984uint64_t BlockFlow = 0;

985// SrcBlock's flow is the sum of outgoing flows along non-ignored jumps

986for (auto *Jump : SrcBlock->SuccJumps) {

987if (ignoreJump(SrcBlock, DstBlock, Jump))

988continue;

989 BlockFlow += Jump->Flow;

990 }

991 rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);

992

993// Ditribute flow from the remaining blocks

994for (auto *Block : UnknownBlocks) {

995assert(Block->HasUnknownWeight &&"incorrect unknown subgraph");

996uint64_t BlockFlow = 0;

997// Block's flow is the sum of incoming flows

998for (auto *Jump :Block->PredJumps) {

999 BlockFlow += Jump->Flow;

1000 }

1001Block->Flow = BlockFlow;

1002 rebalanceBlock(SrcBlock, DstBlock,Block, BlockFlow);

1003 }

1004 }

1005

1006 /// Redistribute flow for a block in a subgraph rooted at SrcBlock,

1007 /// and ending at DstBlock.

1008void rebalanceBlock(constFlowBlock *SrcBlock,constFlowBlock *DstBlock,

1009constFlowBlock *Block,uint64_t BlockFlow) {

1010// Process all successor jumps and update corresponding flow values

1011size_t BlockDegree = 0;

1012for (auto *Jump :Block->SuccJumps) {

1013if (ignoreJump(SrcBlock, DstBlock, Jump))

1014continue;

1015 BlockDegree++;

1016 }

1017// If all successor jumps of the block are ignored, skip it

1018if (DstBlock ==nullptr && BlockDegree == 0)

1019return;

1020assert(BlockDegree > 0 &&"all outgoing jumps are ignored");

1021

1022// Each of the Block's successors gets the following amount of flow.

1023// Rounding the value up so that all flow is propagated

1024uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;

1025for (auto *Jump :Block->SuccJumps) {

1026if (ignoreJump(SrcBlock, DstBlock, Jump))

1027continue;

1028uint64_t Flow = std::min(SuccFlow, BlockFlow);

1029 Jump->Flow =Flow;

1030 BlockFlow -=Flow;

1031 }

1032assert(BlockFlow == 0 &&"not all flow is propagated");

1033 }

1034

1035 /// A constant indicating an arbitrary exit block of a function.

1036staticconstexpruint64_t AnyExitBlock =uint64_t(-1);

1037 /// Minimum BaseDistance for the jump distance values in island joining.

1038staticconstexpruint64_t MinBaseDistance = 10000;

1039

1040 /// Params for flow computation.

1041constProfiParams &Params;

1042 /// The function.

1043FlowFunction &Func;

1044};

1045

1046std::pair<int64_t, int64_t> assignBlockCosts(constProfiParams &Params,

1047constFlowBlock &Block);

1048std::pair<int64_t, int64_t> assignJumpCosts(constProfiParams &Params,

1049constFlowJump &Jump);

1050

1051/// Initializing flow network for a given function.

1052///

1053/// Every block is split into two nodes that are responsible for (i) an

1054/// incoming flow, (ii) an outgoing flow; they penalize an increase or a

1055/// reduction of the block weight.

1056void initializeNetwork(constProfiParams &Params, MinCostMaxFlow &Network,

1057FlowFunction &Func) {

1058uint64_t NumBlocks = Func.Blocks.size();

1059assert(NumBlocks > 1 &&"Too few blocks in a function");

1060uint64_t NumJumps = Func.Jumps.size();

1061assert(NumJumps > 0 &&"Too few jumps in a function");

1062

1063// Introducing dummy source/sink pairs to allow flow circulation.

1064// The nodes corresponding to blocks of the function have indices in

1065// the range [0 .. 2 * NumBlocks); the dummy sources/sinks are indexed by the

1066// next four values.

1067uint64_t S = 2 * NumBlocks;

1068uint64_t T = S + 1;

1069uint64_t S1 = S + 2;

1070uint64_t T1 = S + 3;

1071

1072 Network.initialize(2 * NumBlocks + 4,S1,T1);

1073

1074// Initialize nodes of the flow network

1075for (uint64_t B = 0;B < NumBlocks;B++) {

1076auto &Block = Func.Blocks[B];

1077

1078// Split every block into two auxiliary nodes to allow

1079// increase/reduction of the block count.

1080uint64_t Bin = 2 *B;

1081uint64_t Bout = 2 *B + 1;

1082

1083// Edges from S and to T

1084if (Block.isEntry()) {

1085 Network.addEdge(S,Bin, 0);

1086 }elseif (Block.isExit()) {

1087 Network.addEdge(Bout,T, 0);

1088 }

1089

1090// Assign costs for increasing/decreasing the block counts

1091auto [AuxCostInc, AuxCostDec] = assignBlockCosts(Params,Block);

1092

1093// Add the corresponding edges to the network

1094 Network.addEdge(Bin, Bout, AuxCostInc);

1095if (Block.Weight > 0) {

1096 Network.addEdge(Bout,Bin,Block.Weight, AuxCostDec);

1097 Network.addEdge(S1, Bout,Block.Weight, 0);

1098 Network.addEdge(Bin,T1,Block.Weight, 0);

1099 }

1100 }

1101

1102// Initialize edges of the flow network

1103for (uint64_t J = 0; J < NumJumps; J++) {

1104auto &Jump = Func.Jumps[J];

1105

1106// Get the endpoints corresponding to the jump

1107uint64_t Jin = 2 * Jump.Source + 1;

1108uint64_t Jout = 2 * Jump.Target;

1109

1110// Assign costs for increasing/decreasing the jump counts

1111auto [AuxCostInc, AuxCostDec] = assignJumpCosts(Params, Jump);

1112

1113// Add the corresponding edges to the network

1114 Network.addEdge(Jin, Jout, AuxCostInc);

1115if (Jump.Weight > 0) {

1116 Network.addEdge(Jout, Jin, Jump.Weight, AuxCostDec);

1117 Network.addEdge(S1, Jout, Jump.Weight, 0);

1118 Network.addEdge(Jin,T1, Jump.Weight, 0);

1119 }

1120 }

1121

1122// Make sure we have a valid flow circulation

1123 Network.addEdge(T, S, 0);

1124}

1125

1126/// Assign costs for increasing/decreasing the block counts.

1127std::pair<int64_t, int64_t> assignBlockCosts(constProfiParams &Params,

1128constFlowBlock &Block) {

1129// Modifying the weight of an unlikely block is expensive

1130if (Block.IsUnlikely)

1131return std::make_pair(Params.CostUnlikely, Params.CostUnlikely);

1132

1133// Assign default values for the costs

1134 int64_t CostInc = Params.CostBlockInc;

1135 int64_t CostDec = Params.CostBlockDec;

1136// Update the costs depending on the block metadata

1137if (Block.HasUnknownWeight) {

1138 CostInc = Params.CostBlockUnknownInc;

1139 CostDec = 0;

1140 }else {

1141// Increasing the count for "cold" blocks with zero initial count is more

1142// expensive than for "hot" ones

1143if (Block.Weight == 0)

1144 CostInc = Params.CostBlockZeroInc;

1145// Modifying the count of the entry block is expensive

1146if (Block.isEntry()) {

1147 CostInc = Params.CostBlockEntryInc;

1148 CostDec = Params.CostBlockEntryDec;

1149 }

1150 }

1151return std::make_pair(CostInc, CostDec);

1152}

1153

1154/// Assign costs for increasing/decreasing the jump counts.

1155std::pair<int64_t, int64_t> assignJumpCosts(constProfiParams &Params,

1156constFlowJump &Jump) {

1157// Modifying the weight of an unlikely jump is expensive

1158if (Jump.IsUnlikely)

1159return std::make_pair(Params.CostUnlikely, Params.CostUnlikely);

1160

1161// Assign default values for the costs

1162 int64_t CostInc = Params.CostJumpInc;

1163 int64_t CostDec = Params.CostJumpDec;

1164// Update the costs depending on the block metadata

1165if (Jump.Source + 1 == Jump.Target) {

1166// Adjusting the fall-through branch

1167 CostInc = Params.CostJumpFTInc;

1168 CostDec = Params.CostJumpFTDec;

1169 }

1170if (Jump.HasUnknownWeight) {

1171// The cost is different for fall-through and non-fall-through branches

1172if (Jump.Source + 1 == Jump.Target)

1173 CostInc = Params.CostJumpUnknownFTInc;

1174else

1175 CostInc = Params.CostJumpUnknownInc;

1176 CostDec = 0;

1177 }else {

1178assert(Jump.Weight > 0 &&"found zero-weight jump with a positive weight");

1179 }

1180return std::make_pair(CostInc, CostDec);

1181}

1182

1183/// Extract resulting block and edge counts from the flow network.

1184void extractWeights(constProfiParams &Params, MinCostMaxFlow &Network,

1185FlowFunction &Func) {

1186uint64_t NumBlocks = Func.Blocks.size();

1187uint64_t NumJumps = Func.Jumps.size();

1188

1189// Extract resulting jump counts

1190for (uint64_t J = 0; J < NumJumps; J++) {

1191auto &Jump = Func.Jumps[J];

1192uint64_t SrcOut = 2 * Jump.Source + 1;

1193uint64_t DstIn = 2 * Jump.Target;

1194

1195 int64_tFlow = 0;

1196 int64_t AuxFlow = Network.getFlow(SrcOut, DstIn);

1197if (Jump.Source != Jump.Target)

1198Flow = int64_t(Jump.Weight) + AuxFlow;

1199else

1200Flow = int64_t(Jump.Weight) + (AuxFlow > 0 ? AuxFlow : 0);

1201

1202 Jump.Flow =Flow;

1203assert(Flow >= 0 &&"negative jump flow");

1204 }

1205

1206// Extract resulting block counts

1207auto InFlow = std::vector<uint64_t>(NumBlocks, 0);

1208auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);

1209for (auto &Jump : Func.Jumps) {

1210 InFlow[Jump.Target] += Jump.Flow;

1211 OutFlow[Jump.Source] += Jump.Flow;

1212 }

1213for (uint64_t B = 0;B < NumBlocks;B++) {

1214auto &Block = Func.Blocks[B];

1215Block.Flow = std::max(OutFlow[B], InFlow[B]);

1216 }

1217}

1218

1219#ifndef NDEBUG

1220/// Verify that the provided block/jump weights are as expected.

1221void verifyInput(constFlowFunction &Func) {

1222// Verify entry and exit blocks

1223assert(Func.Entry == 0 && Func.Blocks[0].isEntry());

1224size_t NumExitBlocks = 0;

1225for (size_tI = 1;I < Func.Blocks.size();I++) {

1226assert(!Func.Blocks[I].isEntry() &&"multiple entry blocks");

1227if (Func.Blocks[I].isExit())

1228 NumExitBlocks++;

1229 }

1230assert(NumExitBlocks > 0 &&"cannot find exit blocks");

1231

1232// Verify that there are no parallel edges

1233for (auto &Block : Func.Blocks) {

1234 std::unordered_set<uint64_t> UniqueSuccs;

1235for (auto &Jump :Block.SuccJumps) {

1236auto It = UniqueSuccs.insert(Jump->Target);

1237assert(It.second &&"input CFG contains parallel edges");

1238 }

1239 }

1240// Verify CFG jumps

1241for (auto &Block : Func.Blocks) {

1242assert((!Block.isEntry() || !Block.isExit()) &&

1243"a block cannot be an entry and an exit");

1244 }

1245// Verify input block weights

1246for (auto &Block : Func.Blocks) {

1247assert((!Block.HasUnknownWeight ||Block.Weight == 0 ||Block.isEntry()) &&

1248"non-zero weight of a block w/o weight except for an entry");

1249 }

1250// Verify input jump weights

1251for (auto &Jump : Func.Jumps) {

1252assert((!Jump.HasUnknownWeight || Jump.Weight == 0) &&

1253"non-zero weight of a jump w/o weight");

1254 }

1255}

1256

1257/// Verify that the computed flow values satisfy flow conservation rules.

1258void verifyOutput(constFlowFunction &Func) {

1259constuint64_t NumBlocks = Func.Blocks.size();

1260auto InFlow = std::vector<uint64_t>(NumBlocks, 0);

1261auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);

1262for (constauto &Jump : Func.Jumps) {

1263 InFlow[Jump.Target] += Jump.Flow;

1264 OutFlow[Jump.Source] += Jump.Flow;

1265 }

1266

1267uint64_t TotalInFlow = 0;

1268uint64_t TotalOutFlow = 0;

1269for (uint64_t I = 0;I < NumBlocks;I++) {

1270auto &Block = Func.Blocks[I];

1271if (Block.isEntry()) {

1272 TotalInFlow +=Block.Flow;

1273assert(Block.Flow == OutFlow[I] &&"incorrectly computed control flow");

1274 }elseif (Block.isExit()) {

1275 TotalOutFlow +=Block.Flow;

1276assert(Block.Flow == InFlow[I] &&"incorrectly computed control flow");

1277 }else {

1278assert(Block.Flow == OutFlow[I] &&"incorrectly computed control flow");

1279assert(Block.Flow == InFlow[I] &&"incorrectly computed control flow");

1280 }

1281 }

1282assert(TotalInFlow == TotalOutFlow &&"incorrectly computed control flow");

1283

1284// Verify that there are no isolated flow components

1285// One could modify FlowFunction to hold edges indexed by the sources, which

1286// will avoid a creation of the object

1287auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);

1288for (constauto &Jump : Func.Jumps) {

1289if (Jump.Flow > 0) {

1290 PositiveFlowEdges[Jump.Source].push_back(Jump.Target);

1291 }

1292 }

1293

1294// Run BFS from the source along edges with positive flow

1295 std::queue<uint64_t> Queue;

1296auto Visited =BitVector(NumBlocks,false);

1297 Queue.push(Func.Entry);

1298 Visited[Func.Entry] =true;

1299while (!Queue.empty()) {

1300uint64_t Src = Queue.front();

1301 Queue.pop();

1302for (uint64_t Dst : PositiveFlowEdges[Src]) {

1303if (!Visited[Dst]) {

1304 Queue.push(Dst);

1305 Visited[Dst] =true;

1306 }

1307 }

1308 }

1309

1310// Verify that every block that has a positive flow is reached from the source

1311// along edges with a positive flow

1312for (uint64_t I = 0;I < NumBlocks;I++) {

1313auto &Block = Func.Blocks[I];

1314assert((Visited[I] ||Block.Flow == 0) &&"an isolated flow component");

1315 }

1316}

1317#endif

1318

1319}// end of anonymous namespace

1320

1321/// Apply the profile inference algorithm for a given function and provided

1322/// profi options

1323voidllvm::applyFlowInference(constProfiParams &Params,FlowFunction &Func) {

1324// Check if the function has samples and assign initial flow values

1325bool HasSamples =false;

1326for (FlowBlock &Block : Func.Blocks) {

1327if (Block.Weight > 0)

1328 HasSamples =true;

1329Block.Flow =Block.Weight;

1330 }

1331for (FlowJump &Jump : Func.Jumps) {

1332if (Jump.Weight > 0)

1333 HasSamples =true;

1334 Jump.Flow = Jump.Weight;

1335 }

1336

1337// Quit early for functions with a single block or ones w/o samples

1338if (Func.Blocks.size() <= 1 || !HasSamples)

1339return;

1340

1341#ifndef NDEBUG

1342// Verify the input data

1343 verifyInput(Func);

1344#endif

1345

1346// Create and apply an inference network model

1347auto InferenceNetwork = MinCostMaxFlow(Params);

1348 initializeNetwork(Params, InferenceNetwork, Func);

1349 InferenceNetwork.run();

1350

1351// Extract flow values for every block and every edge

1352 extractWeights(Params, InferenceNetwork, Func);

1353

1354// Post-processing adjustments to the flow

1355auto Adjuster = FlowAdjuster(Params, Func);

1356 Adjuster.run();

1357

1358#ifndef NDEBUG

1359// Verify the result

1360 verifyOutput(Func);

1361#endif

1362}

1363

1364/// Apply the profile inference algorithm for a given flow function

1365voidllvm::applyFlowInference(FlowFunction &Func) {

1366ProfiParams Params;

1367// Set the params from the command-line flags.

1368 Params.EvenFlowDistribution = SampleProfileEvenFlowDistribution;

1369 Params.RebalanceUnknown = SampleProfileRebalanceUnknown;

1370 Params.JoinIslands = SampleProfileJoinIslands;

1371 Params.CostBlockInc = SampleProfileProfiCostBlockInc;

1372 Params.CostBlockDec = SampleProfileProfiCostBlockDec;

1373 Params.CostBlockEntryInc = SampleProfileProfiCostBlockEntryInc;

1374 Params.CostBlockEntryDec = SampleProfileProfiCostBlockEntryDec;

1375 Params.CostBlockZeroInc = SampleProfileProfiCostBlockZeroInc;

1376 Params.CostBlockUnknownInc = SampleProfileProfiCostBlockUnknownInc;

1377

1378applyFlowInference(Params, Func);

1379}

static const LLT S1

Definition:AMDGPULegalizerInfo.cpp:282

BitVector.h

This file implements the BitVector class.

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

CommandLine.h

Idx

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

Definition:DeadArgumentElimination.cpp:353

Debug.h

LLVM_DEBUG

#define LLVM_DEBUG(...)

Definition:Debug.h:106

addEdge

static void addEdge(SmallVectorImpl< LazyCallGraph::Edge > &Edges, DenseMap< LazyCallGraph::Node *, int > &EdgeIndexMap, LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK)

Definition:LazyCallGraph.cpp:62

#define I(x, y, z)

Definition:MD5.cpp:58

#define T1

Definition:Mips16ISelLowering.cpp:340

Flow

Annotate SI Control Flow

Definition:SIAnnotateControlFlow.cpp:446

assert

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

SampleProfileInference.h

This file provides the interface for the profile inference algorithm, profi.

initialize

static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)

Initialize the set of available library functions based on the specified target triple.

Definition:TargetLibraryInfo.cpp:917

Node

Definition:ItaniumDemangle.h:163

llvm::BitVector

Definition:BitVector.h:82

llvm::BitVector::push_back

void push_back(bool Val)

Definition:BitVector.h:466

llvm::InstructionCost

Definition:InstructionCost.h:29

llvm::Target

Target - Wrapper for Target specific information.

Definition:TargetRegistry.h:144

llvm::cl::opt

Definition:CommandLine.h:1423

uint64_t

llvm::cl::Hidden

@ Hidden

Definition:CommandLine.h:137

llvm::cl::init

initializer< Ty > init(const Ty &Val)

Definition:CommandLine.h:443

llvm

This is an optimization pass for GlobalISel generic memory operations.

Definition:AddressRanges.h:18

llvm::PseudoProbeType::Block

@ Block

llvm::size

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

Definition:STLExtras.h:1697

llvm::SubDirectoryType::Bin

@ Bin

llvm::dbgs

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

Definition:Debug.cpp:163

llvm::applyFlowInference

void applyFlowInference(const ProfiParams &Params, FlowFunction &Func)

Apply the profile inference algorithm for a given function and provided profi options.

Definition:SampleProfileInference.cpp:1323