1//===- SampleProf.h - Sampling profiling format support ---------*- C++ -*-===// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7//===----------------------------------------------------------------------===// 9// This file contains common definitions used in the reading and writing of 10// sample profile data. 12//===----------------------------------------------------------------------===// 14#ifndef LLVM_PROFILEDATA_SAMPLEPROF_H 15#define LLVM_PROFILEDATA_SAMPLEPROF_H 36#include <system_error> 37#include <unordered_map> 71// Prefer first error encountered as later errors may be secondary effects of 72// the initial problem. 84structis_error_code_enum<
llvm::sampleprof_error> : std::true_type {};
115// Section Type used by SampleProfileExtBinaryBaseReader and 116// SampleProfileExtBinaryBaseWriter. Never change the existing 117// value of enum. Only append new ones. 126// marker for the first type of profile. 132switch (
static_cast<int>(
Type)) {
// Avoid -Wcovered-switch-default 134return"InvalidSection";
136return"ProfileSummarySection";
138return"NameTableSection";
140return"ProfileSymbolListSection";
142return"FuncOffsetTableSection";
144return"FunctionMetadata";
146return"CSNameTableSection";
148return"LBRProfileSection";
150return"UnknownSection";
154// Entry type of section header table used by SampleProfileExtBinaryBaseReader 155// and SampleProfileExtBinaryBaseWriter. 161// The index indicating the location of the current entry in 162// SectionHdrLayout table. 166// Flags common for all sections are defined here. In SecHdrTableEntry::Flags, 167// common flags will be saved in the lower 32bits and section specific flags 168// will be saved in the higher 32 bits. 172// Indicate the section contains only profile without context. 176// Section specific flags are defined here. 177// !!!Note: Everytime a new enum class is created here, please add 178// a new check in verifySecFlag. 182// Store MD5 in fixed length instead of ULEB128 so NameTable can be 183// accessed like an array. 185// Profile contains ".__uniq." suffix name. Compiler shouldn't strip 186// the suffix when doing profile matching when seeing the flag. 191 /// SecFlagPartial means the profile is for common/shared code. 192 /// The common profile is usually merged from profiles collected 193 /// from running other targets. 195 /// SecFlagContext means this is context-sensitive flat profile for 198 /// SecFlagFSDiscriminator means this profile uses flow-sensitive 201 /// SecFlagIsPreInlined means this profile contains ShouldBeInlined 202 /// contexts thus this is CS preinliner computed. 214// Store function offsets in an order of contexts. The order ensures that 215// callee contexts of a given context laid out next to it. 219// Verify section specific flag is used for the correct section. 220template <
class SecFlagType>
222// No verification is needed for common flags. 223if (std::is_same<SecCommonFlags, SecFlagType>())
226// Verification starts here for section specific flag. 227bool IsFlagLegal =
false;
230 IsFlagLegal = std::is_same<SecNameTableFlags, SecFlagType>();
233 IsFlagLegal = std::is_same<SecProfSummaryFlags, SecFlagType>();
236 IsFlagLegal = std::is_same<SecFuncMetadataFlags, SecFlagType>();
240 IsFlagLegal = std::is_same<SecFuncOffsetFlags, SecFlagType>();
247template <
class SecFlagType>
250auto FVal =
static_cast<uint64_t>(Flag);
251bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>();
252 Entry.Flags |= IsCommon ? FVal : (FVal << 32);
255template <
class SecFlagType>
258auto FVal =
static_cast<uint64_t>(Flag);
259bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>();
260 Entry.Flags &= ~(IsCommon ? FVal : (FVal << 32));
263template <
class SecFlagType>
266auto FVal =
static_cast<uint64_t>(Flag);
267bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>();
268return Entry.Flags & (IsCommon ? FVal : (FVal << 32));
271/// Represents the relative location of an instruction. 273/// Instruction locations are specified by the line offset from the 274/// beginning of the function (marked by the line where the function 275/// header is) and the discriminator value within that line. 277/// The discriminator value is useful to distinguish instructions 278/// that are on the same line but belong to different basic blocks 279/// (e.g., the two post-increment instructions in "if (p) x++; else y++;"). 315/// Representation of a single sample record. 317/// A sample record is represented by a positive integer value, which 318/// indicates how frequently was the associated line location executed. 320/// Additionally, if the associated location contains a function call, 321/// the record will hold a list of all the possible called targets. For 322/// direct calls, this will be the exact function being invoked. For 323/// indirect calls (function pointers, virtual table dispatch), this 324/// will be a list of one or more functions. 330if (
LHS.second !=
RHS.second)
331returnLHS.second >
RHS.second;
333returnLHS.first <
RHS.first;
341 /// Increment the number of samples for this record by \p S. 342 /// Optionally scale sample count \p S by \p Weight. 344 /// Sample counts accumulate using saturating arithmetic, to avoid wrapping 345 /// around unsigned integers. 353 /// Decrease the number of samples for this record by \p S. Return the amout 354 /// of samples actually decreased. 362 /// Add called function \p F with samples \p S. 363 /// Optionally scale sample count \p S by \p Weight. 365 /// Sample counts accumulate using saturating arithmetic, to avoid wrapping 366 /// around unsigned integers. 377 /// Remove called function from the call target map. Return the target sample 378 /// count of the called function. 381autoI = CallTargets.find(
F);
382if (
I != CallTargets.end()) {
384 CallTargets.erase(
I);
389 /// Return true if this sample record contains function calls. 400for (
constauto &
I : CallTargets)
405 /// Sort call targets in descending order of call frequency. 409for (
constauto &[
Target, Frequency] : Targets) {
410 SortedTargets.emplace(
Target, Frequency);
415 /// Prorate call targets by a distribution factor. 417float DistributionFactor) {
419for (
constauto &[
Target, Frequency] : Targets) {
420 AdjustedTargets[
Target] = Frequency * DistributionFactor;
422return AdjustedTargets;
425 /// Merge the samples in \p Other into this record. 426 /// Optionally scale sample counts by \p Weight. 432return NumSamples ==
Other.NumSamples && CallTargets ==
Other.CallTargets;
436return !(*
this ==
Other);
446// State of context associated with FunctionSamples 452MergedContext = 0x8
// Profile for context merged into base profile 455// Attribute of context associated with FunctionSamples 461 0x4,
// Leaf of context is duplicated into the base profile 464// Represents a context frame with profile function and line location 479return !(*
this == That);
482 std::string
toString(
bool OutputLineLocation)
const{
483 std::ostringstream OContextStr;
485if (OutputLineLocation) {
490return OContextStr.str();
496return NameHash + (LocId << 5) + LocId;
513// Sample context for FunctionSamples. It consists of the calling context, 514// the function name and context state. Internally sample context is represented 515// using ArrayRef, which is also the input for constructing a `SampleContext`. 516// It can accept and represent both full context string as well as context-less 518// For a CS profile, a full context vector can look like: 519// `main:3 _Z5funcAi:1 _Z8funcLeafi` 520// For a base CS profile without calling context, the context vector should only 521// contain the leaf frame name. 522// For a non-CS profile, the context vector should be empty. 542// Give a context string, decode and populate internal states like 543// Function name, Calling context and context state. Example of input 544// `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]` 546 std::list<SampleContextFrameVector> &CSNameTable,
550// Note that `[]` wrapped input indicates a full context string, otherwise 551// it's treated as context-less function name only. 557 CSNameTable.emplace_back();
564 /// Create a context vector from a given context string and save it in 568// Remove encapsulating '[' and ']' if any 569 ContextStr = ContextStr.
substr(1, ContextStr.
size() - 2);
573while (!ContextRemain.
empty()) {
574auto ContextSplit = ContextRemain.
split(
" @ ");
575 ChildContext = ContextSplit.first;
576 ContextRemain = ContextSplit.second;
583// Decode context string for a frame to get function name and location. 584// `ContextStr` is in the form of `FuncName:StartLine.Discriminator`. 589auto EntrySplit = ContextStr.
split(
':');
593if (!EntrySplit.second.empty()) {
594// Get line offset, use signed int for getAsInteger so string will 595// be parsed as signed. 597auto LocSplit = EntrySplit.second.split(
'.');
598 LocSplit.first.getAsInteger(10, LineOffset);
602if (!LocSplit.second.empty())
621bool IncludeLeafLineLocation =
false) {
622 std::ostringstream OContextStr;
624if (OContextStr.str().size()) {
627 OContextStr << Context[
I].toString(
I != Context.
size() - 1 ||
628 IncludeLeafLineLocation);
630return OContextStr.str();
645 /// Set the name of the function and clear the current context. 647 Func = NewFunctionID;
655 FullContext = Context;
661return State == That.State && Func == That.Func &&
662 FullContext == That.FullContext;
668if (State != That.State)
669return State < That.State;
672return Func < That.Func;
676while (
I < std::min(FullContext.
size(), That.FullContext.
size())) {
677auto &Context1 = FullContext[
I];
678auto &Context2 = That.FullContext[
I];
679auto V = Context1.Func.compare(Context2.Func);
682if (Context1.Location != Context2.Location)
683return Context1.Location < Context2.Location;
687return FullContext.
size() < That.FullContext.
size();
697auto ThisContext = FullContext;
698auto ThatContext = That.FullContext;
699if (ThatContext.size() < ThisContext.size())
701 ThatContext = ThatContext.
take_front(ThisContext.size());
702// Compare Leaf frame first 703if (ThisContext.back().Func != ThatContext.back().Func)
705// Compare leading context 710// The function associated with this context. If CS profile, this is the leaf 713// Full context including calling context and leaf function name 715// State of the associated sample profile 717// Attribute of the associated sample profile 733// NOTE: Using a StringMap here makes parsed profiles consume around 17% more 734// memory, which is *very* significant for large profiles. 738 std::unordered_map<LineLocation, LineLocation, LineLocationHash>;
740/// Representation of the samples collected for a function. 742/// This data structure contains all the collected samples for the body 743/// of a function. Each sample corresponds to a LineLocation instance 744/// within the body of the function. 761if (TotalSamples < Num)
781return BodySamples[
LineLocation(LineOffset, Discriminator)].addSamples(
790return BodySamples[
LineLocation(LineOffset, Discriminator)].addCalledTarget(
797return BodySamples[Location].merge(
SampleRecord, Weight);
800// Remove a call target and decrease the body sample correspondingly. Return 801// the number of body samples actually decreased. 806autoI = BodySamples.find(
LineLocation(LineOffset, Discriminator));
807if (
I != BodySamples.end()) {
808 Count =
I->second.removeCalledTarget(Func);
809 Count =
I->second.removeSamples(Count);
810if (!
I->second.getSamples())
811 BodySamples.erase(
I);
816// Remove all call site samples for inlinees. This is needed when flattening 819 CallsiteSamples.clear();
822// Accumulate all call target samples to update the body samples. 824for (
auto &
I : BodySamples) {
825uint64_t TargetSamples =
I.second.getCallTargetSum();
826// It's possible that the body sample count can be greater than the call 827// target sum. E.g, if some call targets are external targets, they won't 828// be considered valid call targets, but the body sample count which is 829// from lbr ranges can actually include them. 830if (TargetSamples >
I.second.getSamples())
831I.second.addSamples(TargetSamples -
I.second.getSamples());
835// Accumulate all body samples to set total samples. 838for (
constauto &
I : BodySamples)
841for (
auto &
I : CallsiteSamples) {
842for (
auto &CS :
I.second) {
843 CS.second.updateTotalSamples();
849// Set current context and all callee contexts to be synthetic. 852for (
auto &
I : CallsiteSamples) {
853for (
auto &CS :
I.second) {
854 CS.second.setContextSynthetic();
859// Query the stale profile matching results and remap the location. 861// There is no remapping if the profile is not stale or the matching gives 863if (!IRToProfileLocationMap)
865constauto &ProfileLoc = IRToProfileLocationMap->find(IRLoc);
866if (ProfileLoc != IRToProfileLocationMap->end())
867return ProfileLoc->second;
871 /// Return the number of samples collected at the given location. 872 /// Each location is specified by \p LineOffset and \p Discriminator. 873 /// If the location is not found in profile, return error. 876constauto &Ret = BodySamples.find(
878if (Ret == BodySamples.end())
879return std::error_code();
880return Ret->second.getSamples();
883 /// Returns the call target map collected at a given location. 884 /// Each location is specified by \p LineOffset and \p Discriminator. 885 /// If the location is not found in profile, return error. 888constauto &Ret = BodySamples.find(
890if (Ret == BodySamples.end())
891return std::error_code();
892return Ret->second.getCallTargets();
895 /// Returns the call target map collected at a given location specified by \p 896 /// CallSite. If the location is not found in profile, return error. 900if (Ret == BodySamples.end())
901return std::error_code();
902return Ret->second.getCallTargets();
905 /// Return the function samples at the given callsite location. 910 /// Returns the FunctionSamplesMap at the given \p Loc. 914if (Iter == CallsiteSamples.end())
919 /// Returns a pointer to FunctionSamples at the given callsite location 920 /// \p Loc with callee \p CalleeName. If no callsite can be found, relax 921 /// the restriction to return the FunctionSamples at callsite location 922 /// \p Loc with the maximum total sample count. If \p Remapper or \p 923 /// FuncNameToProfNameMap is not nullptr, use them to find FunctionSamples 924 /// with equivalent name as \p CalleeName. 929 *FuncNameToProfNameMap =
nullptr)
const;
931boolempty()
const{
return TotalSamples == 0; }
933 /// Return the total number of samples collected inside the function. 936 /// For top-level functions, return the total number of branch samples that 937 /// have the function as the branch target (or 0 otherwise). This is the raw 938 /// data fetched from the profile. This should be equivalent to the sample of 939 /// the first instruction of the symbol. But as we directly get this info for 940 /// raw profile without referring to potentially inaccurate debug info, this 941 /// gives more accurate profile data and is preferred for standalone symbols. 944 /// Return an estimate of the sample count of the function entry basic block. 945 /// The function can be either a standalone symbol or an inlined function. 946 /// For Context-Sensitive profiles, this will prefer returning the head 947 /// samples (i.e. getHeadSamples()), if non-zero. Otherwise it estimates from 948 /// the function body's samples or callsite samples. 951// For CS profile, if we already have more accurate head samples 952// counted by branch sample from caller, use them as entry samples. 956// Use either BodySamples or CallsiteSamples which ever has the smaller 958if (!BodySamples.empty() &&
959 (CallsiteSamples.empty() ||
960 BodySamples.begin()->first < CallsiteSamples.begin()->first))
961 Count = BodySamples.begin()->second.getSamples();
962elseif (!CallsiteSamples.empty()) {
963// An indirect callsite may be promoted to several inlined direct calls. 964// We need to get the sum of them. 965for (
constauto &FuncSamples : CallsiteSamples.begin()->second)
966 Count += FuncSamples.second.getHeadSamplesEstimate();
968// Return at least 1 if total sample is not 0. 969return Count ? Count : TotalSamples > 0;
972 /// Return all the samples collected in the body of the function. 975 /// Return all the callsite samples collected in the body of the function. 977return CallsiteSamples;
980 /// Return the maximum of sample counts in a function body. When SkipCallSite 981 /// is false, which is the default, the return count includes samples in the 982 /// inlined functions. When SkipCallSite is true, the return count only 983 /// considers the body samples. 987 MaxCount = std::max(MaxCount, L.second.getSamples());
991for (
const FunctionSamplesMap::value_type &
F :
C.second)
992 MaxCount = std::max(MaxCount,
F.second.getMaxCountInside());
996 /// Merge the samples in \p Other into this one. 997 /// Optionally scale samples by \p Weight. 1003 Context =
Other.getContext();
1004if (FunctionHash == 0) {
1005// Set the function hash code for the target profile. 1006 FunctionHash =
Other.getFunctionHash();
1007 }
elseif (FunctionHash !=
Other.getFunctionHash()) {
1008// The two profiles coming with different valid hash codes indicates 1010// 1. They are same-named static functions from different compilation 1011// units (without using -unique-internal-linkage-names), or 1012// 2. They are really the same function but from different compilations. 1013// Let's bail out in either case for now, which means one profile is 1022for (
constauto &
I :
Other.getBodySamples()) {
1027for (
constauto &
I :
Other.getCallsiteSamples()) {
1030for (
constauto &Rec :
I.second)
1032 FSMap[Rec.first].merge(Rec.second, Weight));
1037 /// Recursively traverses all children, if the total sample count of the 1038 /// corresponding function is no less than \p Threshold, add its corresponding 1039 /// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID 1045if (TotalSamples <= Threshold)
1047auto IsDeclaration = [](
constFunction *
F) {
1048return !
F ||
F->isDeclaration();
1051// Add to the import list only when it's defined out of module. 1054// Import hot CallTargets, which may not be available in IR because full 1055// profile annotation cannot be done until backend compilation in ThinLTO. 1056for (
constauto &BS : BodySamples)
1057for (
constauto &TS : BS.second.getCallTargets())
1058if (TS.second > Threshold) {
1060if (IsDeclaration(Callee))
1061 S.
insert(TS.first.getHashCode());
1063for (
constauto &CS : CallsiteSamples)
1064for (
constauto &NameFS : CS.second)
1065 NameFS.second.findInlinedFunctions(S, SymbolMap, Threshold);
1068 /// Set the name of the function. 1073 /// Return the function name. 1076 /// Return the original function name. 1084assert(IRToProfileLocationMap ==
nullptr &&
"this should be set only once");
1085 IRToProfileLocationMap = LTLM;
1088 /// Return the canonical name for a function, taking into account 1089 /// suffix elision policy attributes. 1091constchar *AttrName =
"sample-profile-suffix-elision-policy";
1092auto Attr =
F.getFnAttribute(AttrName).getValueAsString();
1096 /// Name suffixes which canonicalization should handle to avoid 1097 /// profile mismatch. 1104// Note the sequence of the suffixes in the knownSuffixes array matters. 1105// If suffix "A" is appended after the suffix "B", "A" should be in front 1106// of "B" in knownSuffixes. 1108if (Attr ==
"" || Attr ==
"all")
1109return FnName.
split(
'.').first;
1110if (Attr ==
"selected") {
1112for (
constauto &Suf : KnownSuffixes) {
1114// If the profile contains ".__uniq." suffix, don't strip the 1115// suffix for names in the IR. 1118auto It = Cand.
rfind(Suffix);
1121auto Dit = Cand.
rfind(
'.');
1122if (Dit == It + Suffix.
size() - 1)
1123 Cand = Cand.
substr(0, It);
1129assert(
false &&
"internal error: unknown suffix elision policy");
1133 /// Translate \p Func into its original name. 1134 /// When profile doesn't use MD5, \p Func needs no translation. 1135 /// When profile uses MD5, \p Func in current FunctionSamples 1136 /// is actually GUID of the original function name. getFuncName will 1137 /// translate \p Func in current FunctionSamples into its original name 1138 /// by looking up in the function map GUIDToFuncNameMap. 1139 /// If the original name doesn't exist in the map, return empty StringRef. 1142return Func.stringRef();
1148 /// Returns the line offset to the start line of the subprogram. 1149 /// We assume that a single function will not exceed 65535 LOC. 1152 /// Returns a unique call site identifier for a given debug location of a call 1153 /// instruction. This is wrapper of two scenarios, the probe-based profile and 1154 /// regular profile, to hide implementation details from the sample loader and 1155 /// the context tracker. 1159 /// Returns a unique hash code for a combination of a callsite location and 1160 /// the callee function name. 1161 /// Guarantee MD5 and non-MD5 representation of the same function results in 1168 /// Get the FunctionSamples of the inline instance where DIL originates 1171 /// The FunctionSamples of the instruction (Machine or IR) associated to 1172 /// \p DIL is the inlined instance in which that instruction is coming from. 1173 /// We traverse the inline stack of that instruction, and match it with the 1174 /// tree nodes in the profile. 1176 /// \returns the FunctionSamples pointer to the inlined instance. 1177 /// If \p Remapper or \p FuncNameToProfNameMap is not nullptr, it will be used 1178 /// to find matching FunctionSamples with not exactly the same but equivalent 1184 *FuncNameToProfNameMap =
nullptr)
const;
1196 /// Whether the profile uses MD5 to represent string. 1199 /// Whether the profile contains any ".__uniq." suffix in a name. 1202 /// If this profile uses flow sensitive discriminators. 1205 /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for 1206 /// all the function symbols defined or declared in current module. 1209 /// Return the GUID of the context's name. If the context is already using 1210 /// MD5, don't hash it again. 1215// Find all the names in the current FunctionSamples including names in 1216// all the inline instances and names of call targets. 1223 FunctionHash ==
Other.FunctionHash && Context ==
Other.Context &&
1224 TotalSamples ==
Other.TotalSamples &&
1225 TotalHeadSamples ==
Other.TotalHeadSamples &&
1226 BodySamples ==
Other.BodySamples &&
1227 CallsiteSamples ==
Other.CallsiteSamples;
1231return !(*
this ==
Other);
1235 /// CFG hash value for the function. 1238 /// Calling context for function profile 1241 /// Total number of samples collected inside this function. 1243 /// Samples are cumulative, they include all the samples collected 1244 /// inside this function and all its inlined callees. 1247 /// Total number of samples collected at the head of the function. 1248 /// This is an approximation of the number of calls made to this function 1252 /// Map instruction locations to collected samples. 1254 /// Each entry in this map contains the number of samples 1255 /// collected at the corresponding line offset. All line locations 1256 /// are an offset from the start of the function. 1259 /// Map call sites to collected samples for the called function. 1261 /// Each entry in this map corresponds to all the samples 1262 /// collected for the inlined function call at the given 1263 /// location. For example, given: 1271 /// If the bar() and baz() calls were inlined inside foo(), this 1272 /// map will contain two entries. One for all the samples collected 1273 /// in the call to bar() at line offset 1, the other for all the samples 1274 /// collected in the call to baz() at line offset 8. 1277 /// IR to profile location map generated by stale profile matching. 1279 /// Each entry is a mapping from the location on current build to the matched 1280 /// location in the "stale" profile. For example: 1281 /// Profiled source code: 1286 /// Current source code: 1288 /// 1 // Code change 1291 /// Supposing the stale profile matching algorithm generated the mapping [2 -> 1292 /// 1], the profile query using the location of bar on the IR which is 2 will 1293 /// be remapped to 1 and find the location of bar in the profile. 1297/// Get the proper representation of a string according to whether the 1298/// current Format uses MD5 to represent the string. 1307/// This class provides operator overloads to the map container using MD5 as the 1308/// key type, so that existing code can still work in most cases using 1309/// SampleContext as key. 1310/// Note: when populating container, make sure to assign the SampleContext to 1311/// the mapped value immediately because the key no longer holds it. 1313 :
publicHashKeyMap<std::unordered_map, SampleContext, FunctionSamples> {
1315// Convenience method because this is being used in many places. Set the 1316// FunctionSamples' context if its newly inserted. 1320 Ret.first->second.setContext(Ctx);
1321return Ret.first->second;
1347 std::vector<NameFunctionSamples> &SortedProfiles);
1349/// Sort a LocationT->SampleT map by LocationT. 1351/// It produces a sorted list of <LocationT, SampleT> records by ascending 1352/// order of LocationT. 1359for (
constauto &
I : Samples)
1362returnA->first <
B->first;
1372/// SampleContextTrimmer impelements helper functions to trim, merge cold 1373/// context profiles. It also supports context profile canonicalization to make 1374/// sure ProfileMap's key is consistent with FunctionSample's name/context. 1378// Trim and merge cold context profile when requested. TrimBaseProfileOnly 1379// should only be effective when TrimColdContext is true. On top of 1380// TrimColdContext, TrimBaseProfileOnly can be used to specify to trim all 1381// cold profiles or only cold base profiles. Trimming base profiles only is 1382// mainly to honor the preinliner decsion. Note that when MergeColdContext is 1383// true, preinliner decsion is not honored anyway so TrimBaseProfileOnly will 1386bool TrimColdContext,
1387bool MergeColdContext,
1389bool TrimBaseProfileOnly);
1395/// Helper class for profile conversion. 1397/// It supports full context-sensitive profile to nested profile conversion, 1398/// nested profile to flatten profile conversion, etc. 1402// Convert a full context-sensitive flat sample profile into a nested sample 1411// Map line+discriminator location to child frame 1413// Function name for current frame 1415// Function Samples for current frame 1417// Callsite location in parent context 1425bool ProfileIsCS =
false) {
1428 ProfileMap = std::move(TmpProfiles);
1433bool ProfileIsCS =
false) {
1435for (
constauto &
I : InputProfiles) {
1436// Retain the profile name and clear the full context for each function 1442for (
constauto &
I : InputProfiles)
1443 flattenNestedProfile(OutputProfiles,
I.second);
1450// To retain the context, checksum, attributes of the original profile, make 1451// a copy of it if no profile is found. 1453auto Ret = OutputProfiles.
try_emplace(Context, FS);
1456// Clear nested inlinees' samples for the flattened copy. These inlinees 1457// will have their own top-level entries after flattening. 1458Profile.removeAllCallsiteSamples();
1459// We recompute TotalSamples later, so here set to zero. 1468"There should be no inlinees' profiles after flattening.");
1470// TotalSamples might not be equal to the sum of all samples from 1471// BodySamples and CallsiteSamples. So here we use "TotalSamples = 1472// Original_TotalSamples - All_of_Callsite_TotalSamples + 1473// All_of_Callsite_HeadSamples" to compute the new TotalSamples. 1476for (
constauto &
I :
FS.getCallsiteSamples()) {
1477for (
constauto &Callee :
I.second) {
1478constauto &CalleeProfile =
Callee.second;
1480Profile.addBodySamples(
I.first.LineOffset,
I.first.Discriminator,
1481 CalleeProfile.getHeadSamplesEstimate());
1482// Add callsite sample. 1483Profile.addCalledTargetSamples(
1484I.first.LineOffset,
I.first.Discriminator,
1485 CalleeProfile.getFunction(),
1486 CalleeProfile.getHeadSamplesEstimate());
1487// Update total samples. 1488 TotalSamples = TotalSamples >= CalleeProfile.getTotalSamples()
1489 ? TotalSamples - CalleeProfile.getTotalSamples()
1491 TotalSamples += CalleeProfile.getHeadSamplesEstimate();
1492// Recursively convert callee profile. 1493 flattenNestedProfile(OutputProfiles, CalleeProfile);
1496Profile.addTotalSamples(TotalSamples);
1501// Nest all children profiles into the profile of Node. 1503 FrameNode *getOrCreateContextPath(
const SampleContext &Context);
1505 SampleProfileMap &ProfileMap;
1506 FrameNode RootFrame;
1509/// ProfileSymbolList records the list of function symbols shown up 1510/// in the binary used to generate the profile. It is useful to 1511/// to discriminate a function being so cold as not to shown up 1512/// in the profile and a function newly added. 1515 /// copy indicates whether we need to copy the underlying memory 1516 /// for the input Name. 1522 Syms.insert(
Name.copy(Allocator));
1532unsignedsize() {
return Syms.size(); }
1542// Determine whether or not to compress the symbol list when 1543// writing it into profile. The variable is unused when the symbol 1544// list is read from an existing profile. 1545bool ToCompress =
false;
1550}
// end namespace sampleprof 1552using namespacesampleprof;
1553// Provide DenseMapInfo for SampleContext. 1570// Prepend "__uniq" before the hash for tools like profilers to understand 1571// that this symbol is of internal linkage type. The "__uniq" is the 1572// pre-determined prefix that is used to tell tools that this symbol was 1573// created with -funique-internal-linkage-symbols and the tools can strip or 1574// keep the prefix as needed. 1582// Convert MD5hash to Decimal. Demangler suffixes can either contain 1583// numbers or characters but not both. 1585returntoString(IntHash,
/* Radix = */ 10,
/* Signed = */false)
1589}
// end namespace llvm 1591#endif// LLVM_PROFILEDATA_SAMPLEPROF_H This file defines the BumpPtrAllocator interface.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file defines the DenseSet and SmallDenseSet classes.
Provides ErrorOr<T> smart pointer.
Defines HashKeyMap template.
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
Defines FunctionId class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
Class for arbitrary precision integers.
const T & back() const
back - Get the last element.
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
size_t size() const
size - Get the array size.
ArrayRef< T > drop_back(size_t N=1) const
Drop the last N elements of the array.
bool empty() const
empty - Check if the array is empty.
Allocate memory in an ever growing pool, as if by bump-pointer.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Implements a dense probed hash-table based set.
Represents either an error or a value T.
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
void update(ArrayRef< uint8_t > Data)
Updates the hash for the byte stream provided.
static void stringifyResult(MD5Result &Result, SmallVectorImpl< char > &Str)
Translates the bytes in Res to a hex string that is deposited into Str.
void final(MD5Result &Result)
Finishes off the hash and puts the result in result.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
static constexpr size_t npos
Target - Wrapper for Target specific information.
The instances of the Type class are immutable: once they are created, they are never changed.
std::pair< iterator, bool > insert(const ValueT &V)
An opaque object representing a hash code.
This class implements an extremely fast bulk output stream that can only output to a stream.
This class represents a function that is read from a sample profile.
uint64_t getHashCode() const
Get hash code of this object.
std::string str() const
Convert to a string, usually for output purpose.
Representation of the samples collected for a function.
void setTotalSamples(uint64_t Num)
static bool ProfileIsPreInlined
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const HashKeyMap< std::unordered_map, FunctionId, Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
bool operator!=(const FunctionSamples &Other) const
void updateTotalSamples()
void setHeadSamples(uint64_t Num)
const FunctionSamples * findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName, SampleProfileReaderItaniumRemapper *Remapper, const HashKeyMap< std::unordered_map, FunctionId, FunctionId > *FuncNameToProfNameMap=nullptr) const
Returns a pointer to FunctionSamples at the given callsite location Loc with callee CalleeName.
sampleprof_error addTotalSamples(uint64_t Num, uint64_t Weight=1)
static constexpr const char * UniqSuffix
void updateCallsiteSamples()
static StringRef getCanonicalFnName(StringRef FnName, StringRef Attr="selected")
bool operator==(const FunctionSamples &Other) const
static constexpr const char * PartSuffix
static uint64_t getCallSiteHash(FunctionId Callee, const LineLocation &Callsite)
Returns a unique hash code for a combination of a callsite location and the callee function name.
const FunctionSamplesMap * findFunctionSamplesMapAt(const LineLocation &Loc) const
Returns the FunctionSamplesMap at the given Loc.
void removeAllCallsiteSamples()
uint64_t getMaxCountInside(bool SkipCallSite=false) const
Return the maximum of sample counts in a function body.
void removeTotalSamples(uint64_t Num)
uint64_t getHeadSamples() const
For top-level functions, return the total number of branch samples that have the function as the bran...
void setFunction(FunctionId NewFunctionID)
Set the name of the function.
ErrorOr< uint64_t > findSamplesAt(uint32_t LineOffset, uint32_t Discriminator) const
Return the number of samples collected at the given location.
ErrorOr< const SampleRecord::CallTargetMap & > findCallTargetMapAt(const LineLocation &CallSite) const
Returns the call target map collected at a given location specified by CallSite.
const LineLocation & mapIRLocToProfileLoc(const LineLocation &IRLoc) const
FunctionId getFunction() const
Return the function name.
uint64_t getFunctionHash() const
static constexpr const char * LLVMSuffix
Name suffixes which canonicalization should handle to avoid profile mismatch.
StringRef getFuncName(FunctionId Func) const
Translate Func into its original name.
sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight=1)
sampleprof_error addSampleRecord(LineLocation Location, const SampleRecord &SampleRecord, uint64_t Weight=1)
uint64_t removeCalledTargetAndBodySample(uint32_t LineOffset, uint32_t Discriminator, FunctionId Func)
DenseMap< uint64_t, StringRef > * GUIDToFuncNameMap
GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for all the function symbols define...
sampleprof_error addCalledTargetSamples(uint32_t LineOffset, uint32_t Discriminator, FunctionId Func, uint64_t Num, uint64_t Weight=1)
FunctionSamplesMap & functionSamplesAt(const LineLocation &Loc)
Return the function samples at the given callsite location.
const FunctionSamples * findFunctionSamples(const DILocation *DIL, SampleProfileReaderItaniumRemapper *Remapper=nullptr, const HashKeyMap< std::unordered_map, FunctionId, FunctionId > *FuncNameToProfNameMap=nullptr) const
Get the FunctionSamples of the inline instance where DIL originates from.
static bool ProfileIsProbeBased
void setIRToProfileLocationMap(const LocToLocMap *LTLM)
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
StringRef getFuncName() const
Return the original function name.
void findAllNames(DenseSet< FunctionId > &NameSet) const
sampleprof_error addBodySamples(uint32_t LineOffset, uint32_t Discriminator, uint64_t Num, uint64_t Weight=1)
static unsigned getOffset(const DILocation *DIL)
Returns the line offset to the start line of the subprogram.
void setContextSynthetic()
void setFunctionHash(uint64_t Hash)
static bool ProfileIsFS
If this profile uses flow sensitive discriminators.
ErrorOr< const SampleRecord::CallTargetMap & > findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const
Returns the call target map collected at a given location.
SampleContext & getContext() const
static bool HasUniqSuffix
Whether the profile contains any ".__uniq." suffix in a name.
uint64_t getTotalSamples() const
Return the total number of samples collected inside the function.
void print(raw_ostream &OS=dbgs(), unsigned Indent=0) const
Print the samples collected for a function on stream OS.
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
FunctionSamples()=default
const CallsiteSampleMap & getCallsiteSamples() const
Return all the callsite samples collected in the body of the function.
void setContext(const SampleContext &FContext)
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
uint64_t getHeadSamplesEstimate() const
Return an estimate of the sample count of the function entry basic block.
uint64_t getGUID() const
Return the GUID of the context's name.
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
static bool UseMD5
Whether the profile uses MD5 to represent string.
This class is a wrapper to associative container MapT<KeyT, ValueT> using the hash value of the origi...
std::pair< iterator, bool > try_emplace(const key_type &Hash, const original_key_type &Key, Ts &&...Args)
typename base_type::iterator iterator
decltype(hash_value(SampleContext())) key_type
size_t erase(const original_key_type &Ctx)
iterator find(const original_key_type &Key)
typename base_type::const_iterator const_iterator
Helper class for profile conversion.
static void flattenProfile(SampleProfileMap &ProfileMap, bool ProfileIsCS=false)
static void flattenProfile(const SampleProfileMap &InputProfiles, SampleProfileMap &OutputProfiles, bool ProfileIsCS=false)
ProfileSymbolList records the list of function symbols shown up in the binary used to generate the pr...
void setToCompress(bool TC)
void add(StringRef Name, bool Copy=false)
copy indicates whether we need to copy the underlying memory for the input Name.
std::error_code write(raw_ostream &OS)
void dump(raw_ostream &OS=dbgs()) const
void merge(const ProfileSymbolList &List)
bool contains(StringRef Name)
std::error_code read(const uint8_t *Data, uint64_t ListSize)
SampleContextTrimmer impelements helper functions to trim, merge cold context profiles.
SampleContextTrimmer(SampleProfileMap &Profiles)
void trimAndMergeColdContextProfiles(uint64_t ColdCountThreshold, bool TrimColdContext, bool MergeColdContext, uint32_t ColdContextFrameLength, bool TrimBaseProfileOnly)
static void createCtxVectorFromStr(StringRef ContextStr, SampleContextFrameVector &Context)
Create a context vector from a given context string and save it in Context.
bool operator==(const SampleContext &That) const
void setFunction(FunctionId NewFunctionID)
Set the name of the function and clear the current context.
SampleContext(SampleContextFrames Context, ContextStateMask CState=RawContext)
bool operator<(const SampleContext &That) const
SampleContext(StringRef ContextStr, std::list< SampleContextFrameVector > &CSNameTable, ContextStateMask CState=RawContext)
bool hasState(ContextStateMask S)
void clearState(ContextStateMask S)
SampleContextFrames getContextFrames() const
SampleContext(FunctionId Func)
bool isBaseContext() const
static void decodeContextString(StringRef ContextStr, FunctionId &Func, LineLocation &LineLoc)
static std::string getContextString(SampleContextFrames Context, bool IncludeLeafLineLocation=false)
bool operator!=(const SampleContext &That) const
void setState(ContextStateMask S)
void setAllAttributes(uint32_t A)
uint64_t getHashCode() const
void setContext(SampleContextFrames Context, ContextStateMask CState=RawContext)
FunctionId getFunction() const
uint32_t getAllAttributes()
void setAttribute(ContextAttributeMask A)
bool hasAttribute(ContextAttributeMask A)
std::string toString() const
SampleContext(StringRef Name)
bool isPrefixOf(const SampleContext &That) const
This class provides operator overloads to the map container using MD5 as the key type,...
iterator find(const SampleContext &Ctx)
mapped_type & create(const SampleContext &Ctx)
iterator erase(iterator It)
size_t erase(const key_type &Key)
const_iterator find(const SampleContext &Ctx) const
size_t erase(const SampleContext &Ctx)
SampleProfileReaderItaniumRemapper remaps the profile data from a sample profile data reader,...
Representation of a single sample record.
std::unordered_map< FunctionId, uint64_t > CallTargetMap
bool hasCalls() const
Return true if this sample record contains function calls.
sampleprof_error merge(const SampleRecord &Other, uint64_t Weight=1)
Merge the samples in Other into this record.
static const SortedCallTargetSet sortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
const CallTargetMap & getCallTargets() const
std::set< CallTarget, CallTargetComparator > SortedCallTargetSet
uint64_t getSamples() const
uint64_t getCallTargetSum() const
uint64_t removeSamples(uint64_t S)
Decrease the number of samples for this record by S.
sampleprof_error addSamples(uint64_t S, uint64_t Weight=1)
Increment the number of samples for this record by S.
uint64_t removeCalledTarget(FunctionId F)
Remove called function from the call target map.
const SortedCallTargetSet getSortedCallTargets() const
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
std::pair< FunctionId, uint64_t > CallTarget
bool operator!=(const SampleRecord &Other) const
bool operator==(const SampleRecord &Other) const
void print(raw_ostream &OS, unsigned Indent) const
Print the sample record to the stream OS indented by Indent.
sampleprof_error addCalledTarget(FunctionId F, uint64_t S, uint64_t Weight=1)
Add called function F with samples S.
Sort a LocationT->SampleT map by LocationT.
std::pair< const LocationT, SampleT > SamplesWithLoc
SampleSorter(const std::map< LocationT, SampleT > &Samples)
const SamplesWithLocList & get() const
SmallVector< const SamplesWithLoc *, 20 > SamplesWithLocList
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
static FunctionId getRepInFormat(StringRef Name)
Get the proper representation of a string according to whether the current Format uses MD5 to represe...
static void verifySecFlag(SecType Type, SecFlagType Flag)
ArrayRef< SampleContextFrame > SampleContextFrames
void sortFuncProfiles(const SampleProfileMap &ProfileMap, std::vector< NameFunctionSamples > &SortedProfiles)
static uint64_t SPMagic(SampleProfileFormat Format=SPF_Binary)
std::unordered_map< LineLocation, LineLocation, LineLocationHash > LocToLocMap
std::pair< hash_code, const FunctionSamples * > NameFunctionSamples
static void addSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag)
static bool hasSecFlag(const SecHdrTableEntry &Entry, SecFlagType Flag)
std::map< LineLocation, FunctionSamplesMap > CallsiteSampleMap
@ ContextDuplicatedIntoBase
std::map< LineLocation, SampleRecord > BodySampleMap
@ SecFlagIsPreInlined
SecFlagIsPreInlined means this profile contains ShouldBeInlined contexts thus this is CS preinliner c...
@ SecFlagPartial
SecFlagPartial means the profile is for common/shared code.
@ SecFlagFSDiscriminator
SecFlagFSDiscriminator means this profile uses flow-sensitive discriminators.
@ SecFlagFullContext
SecFlagContext means this is context-sensitive flat profile for CSSPGO.
static void removeSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag)
std::map< FunctionId, FunctionSamples > FunctionSamplesMap
raw_ostream & operator<<(raw_ostream &OS, const FunctionId &Obj)
static std::string getSecName(SecType Type)
uint64_t hash_value(const FunctionId &Obj)
static uint64_t SPVersion()
This is an optimization pass for GlobalISel generic memory operations.
void stable_sort(R &&Range)
std::error_code make_error_code(BitcodeError E)
sampleprof_error mergeSampleProfErrors(sampleprof_error &Accumulator, sampleprof_error Result)
@ unsupported_writing_format
@ ostream_seek_unsupported
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
const std::error_category & sampleprof_category()
std::string getUniqueInternalLinkagePostfix(const StringRef &FName)
const char * toString(DWARFSectionKind Kind)
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Implement std::hash so that hash_code can be used in STL containers.
static unsigned getHashValue(const SampleContext &Val)
static SampleContext getTombstoneKey()
static SampleContext getEmptyKey()
static bool isEqual(const SampleContext &LHS, const SampleContext &RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
uint64_t operator()(const LineLocation &Loc) const
Represents the relative location of an instruction.
void print(raw_ostream &OS) const
LineLocation(uint32_t L, uint32_t D)
bool operator!=(const LineLocation &O) const
bool operator<(const LineLocation &O) const
uint64_t getHashCode() const
bool operator==(const LineLocation &O) const
FunctionSamples * FuncSamples
FrameNode(FunctionId FName=FunctionId(), FunctionSamples *FSamples=nullptr, LineLocation CallLoc={0, 0})
FrameNode * getOrCreateChildFrame(const LineLocation &CallSite, FunctionId CalleeName)
std::map< uint64_t, FrameNode > AllChildFrames
uint64_t operator()(const SampleContextFrameVector &S) const
bool operator==(const SampleContextFrame &That) const
SampleContextFrame(FunctionId Func, LineLocation Location)
bool operator!=(const SampleContextFrame &That) const
std::string toString(bool OutputLineLocation) const
uint64_t getHashCode() const
uint64_t operator()(const SampleContext &Context) const
bool operator()(const CallTarget &LHS, const CallTarget &RHS) const