1//===- BitcodeAnalyzer.cpp - Internal BitcodeAnalyzer implementation ------===// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7//===----------------------------------------------------------------------===// 24/// Return a symbolic block name if known, otherwise return null. 25static std::optional<const char *>
28// Standard blocks for all bitcode files. 31return"BLOCKINFO_BLOCK";
35// Check to see if we have a blockinfo record for this block, with a name. 38if (!
Info->Name.empty())
39returnInfo->Name.c_str();
49return"OPERAND_BUNDLE_TAGS_BLOCK";
53return"PARAMATTR_BLOCK";
55return"PARAMATTR_GROUP_BLOCK_ID";
59return"CONSTANTS_BLOCK";
61return"FUNCTION_BLOCK";
63return"IDENTIFICATION_BLOCK_ID";
67return"METADATA_BLOCK";
69return"METADATA_KIND_BLOCK";
71return"METADATA_ATTACHMENT_BLOCK";
73return"USELIST_BLOCK_ID";
75return"GLOBALVAL_SUMMARY_BLOCK";
77return"FULL_LTO_GLOBALVAL_SUMMARY_BLOCK";
79return"MODULE_STRTAB_BLOCK";
87/// Return a symbolic code name if known, otherwise return null. 88static std::optional<const char *>
92// Standard blocks for all bitcode files. 103return"SETRECORDNAME";
109// Check to see if we have a blockinfo record for this record, with a name. 112for (
const std::pair<unsigned, std::string> &RN :
Info->RecordNames)
113if (RN.first == CodeID)
114return RN.second.c_str();
120#define STRINGIFY_CODE(PREFIX, CODE) \ 121 case bitc::PREFIX##_##CODE: \ 135STRINGIFY_CODE(MODULE_CODE, DEPLIB)
// Deprecated, present in old bitcode 157// FIXME: Should these be different? 227return"CST_CODE_BLOCKADDRESS";
395return"USELIST_CODE_DEFAULT";
397return"USELIST_CODE_BB";
405return"OPERAND_BUNDLE_TAG";
426OS <<
format(
"%.2f/%.2fB/%luW", Bits, Bits / 8, (
unsignedlong)(Bits / 32));
429OS <<
format(
"%lub/%.2fB/%luW", (
unsignedlong)Bits, (
double)Bits / 8,
430 (
unsignedlong)(Bits / 32));
434auto tryRead = [&Stream](
char &Dest,
size_tsize) ->
Error {
436 Dest = MaybeWord.get();
438return MaybeWord.takeError();
443if (
Error Err = tryRead(Signature[0], 8))
444return std::move(Err);
445if (
Error Err = tryRead(Signature[1], 8))
446return std::move(Err);
448// Autodetect the file contents, if it is one we know. 449if (Signature[0] ==
'C' && Signature[1] ==
'P') {
450if (
Error Err = tryRead(Signature[2], 8))
451return std::move(Err);
452if (
Error Err = tryRead(Signature[3], 8))
453return std::move(Err);
454if (Signature[2] ==
'C' && Signature[3] ==
'H')
456 }
elseif (Signature[0] ==
'D' && Signature[1] ==
'I') {
457if (
Error Err = tryRead(Signature[2], 8))
458return std::move(Err);
459if (
Error Err = tryRead(Signature[3], 8))
460return std::move(Err);
461if (Signature[2] ==
'A' && Signature[3] ==
'G')
463 }
elseif (Signature[0] ==
'R' && Signature[1] ==
'M') {
464if (
Error Err = tryRead(Signature[2], 8))
465return std::move(Err);
466if (
Error Err = tryRead(Signature[3], 8))
467return std::move(Err);
468if (Signature[2] ==
'R' && Signature[3] ==
'K')
471if (
Error Err = tryRead(Signature[2], 4))
472return std::move(Err);
473if (
Error Err = tryRead(Signature[3], 4))
474return std::move(Err);
475if (
Error Err = tryRead(Signature[4], 4))
476return std::move(Err);
477if (
Error Err = tryRead(Signature[5], 4))
478return std::move(Err);
479if (Signature[0] ==
'B' && Signature[1] ==
'C' && Signature[2] == 0x0 &&
480 Signature[3] == 0xC && Signature[4] == 0xE && Signature[5] == 0xD)
489constunsignedchar *BufPtr = (
constunsignedchar *)Bytes.
data();
490constunsignedchar *EndBufPtr = BufPtr + Bytes.
size();
492// If we have a wrapper header, parse it and ignore the non-bc file 493// contents. The magic number is 0x0B17C0DE stored in little endian. 496returnreportError(
"Invalid bitcode wrapper header");
505 O->OS <<
"<BITCODE_WRAPPER_HEADER" 510 <<
" CPUType=" <<
format_hex(CPUType, 10) <<
"/>\n";
514returnreportError(
"Invalid bitcode wrapper header");
517// Use the cursor modified by skipping the wrapper header. 536"Decoding metadata strings blob needs two record entries.");
538unsigned NumStrings =
Record[0];
539unsigned StringsOffset =
Record[1];
540OS <<
" num-strings = " << NumStrings <<
" {\n";
546if (
R.AtEndOfStream())
552if (Strings.size() <
Size)
558 Strings = Strings.drop_front(
Size);
559 }
while (--NumStrings);
566 std::optional<StringRef> BlockInfoBuffer)
569 BlockInfoStream.emplace(*BlockInfoBuffer);
573 std::optional<StringRef> CheckHash) {
579// Read block info from BlockInfoStream, if specified. 580// The block info must be a top-level block. 581if (BlockInfoStream) {
591returnreportError(
"Invalid record at top-level in block info file");
597 std::optional<BitstreamBlockInfo> NewBlockInfo;
600 .moveInto(NewBlockInfo))
603returnreportError(
"Malformed BlockInfoBlock in block info file");
604 BlockInfo = std::move(*NewBlockInfo);
613// Parse the top-level structure. We only allow blocks at the top-level. 625if (
Error E = parseBlock(MaybeBlockID.
get(), 0, O, CheckHash))
634 std::optional<StringRef> Filename) {
636// Print a summary of the read file. 639 O.OS <<
"of " << Filename->data() <<
":\n";
640 O.OS <<
" Total size: ";
643 O.OS <<
" Stream type: ";
644switch (CurStreamType) {
652 O.OS <<
"Clang Serialized AST\n";
655 O.OS <<
"Clang Serialized Diagnostics\n";
658 O.OS <<
"LLVM Remarks\n";
661 O.OS <<
" # Toplevel Blocks: " << NumTopBlocks <<
"\n";
664// Emit per-block stats. 665 O.OS <<
"Per-block Summary:\n";
666for (
constauto &Stat : BlockIDStats) {
667 O.OS <<
" Block ID #" << Stat.first;
668if (std::optional<const char *> BlockName =
670 O.OS <<
" (" << *BlockName <<
")";
673const PerBlockIDStats &
Stats = Stat.second;
674 O.OS <<
" Num Instances: " <<
Stats.NumInstances <<
"\n";
675 O.OS <<
" Total Size: ";
678double pct = (
Stats.NumBits * 100.0) / BufferSizeBits;
679 O.OS <<
" Percent of file: " <<
format(
"%2.4f%%", pct) <<
"\n";
680if (
Stats.NumInstances > 1) {
681 O.OS <<
" Average Size: ";
684 O.OS <<
" Tot/Avg SubBlocks: " <<
Stats.NumSubBlocks <<
"/" 685 <<
Stats.NumSubBlocks / (double)
Stats.NumInstances <<
"\n";
686 O.OS <<
" Tot/Avg Abbrevs: " <<
Stats.NumAbbrevs <<
"/" 687 <<
Stats.NumAbbrevs / (
double)
Stats.NumInstances <<
"\n";
688 O.OS <<
" Tot/Avg Records: " <<
Stats.NumRecords <<
"/" 689 <<
Stats.NumRecords / (double)
Stats.NumInstances <<
"\n";
691 O.OS <<
" Num SubBlocks: " <<
Stats.NumSubBlocks <<
"\n";
692 O.OS <<
" Num Abbrevs: " <<
Stats.NumAbbrevs <<
"\n";
693 O.OS <<
" Num Records: " <<
Stats.NumRecords <<
"\n";
695if (
Stats.NumRecords) {
696double pct = (
Stats.NumAbbreviatedRecords * 100.0) /
Stats.NumRecords;
697 O.OS <<
" Percent Abbrevs: " <<
format(
"%2.4f%%", pct) <<
"\n";
701// Print a histogram of the codes we see. 702if (O.Histogram && !
Stats.CodeFreq.empty()) {
703 std::vector<std::pair<unsigned, unsigned>> FreqPairs;
// <freq,code> 704for (
unsigned i = 0, e =
Stats.CodeFreq.size(); i != e; ++i)
705if (
unsigned Freq =
Stats.CodeFreq[i].NumInstances)
706 FreqPairs.push_back(std::make_pair(Freq, i));
708 std::reverse(FreqPairs.begin(), FreqPairs.end());
710 O.OS <<
"\tRecord Histogram:\n";
711 O.OS <<
"\t\t Count # Bits b/Rec % Abv Record Kind\n";
712for (
constauto &FreqPair : FreqPairs) {
713const PerRecordStats &RecStats =
Stats.CodeFreq[FreqPair.second];
715 O.OS <<
format(
"\t\t%7d %9lu", RecStats.NumInstances,
716 (
unsignedlong)RecStats.TotalBits);
718if (RecStats.NumInstances > 1)
720 (
double)RecStats.TotalBits / RecStats.NumInstances);
724if (RecStats.NumAbbrev)
725 O.OS <<
format(
" %7.2f", (
double)RecStats.NumAbbrev /
726 RecStats.NumInstances * 100);
731if (std::optional<const char *> CodeName =
GetCodeName(
732 FreqPair.second, Stat.first, BlockInfo, CurStreamType))
733 O.OS << *CodeName <<
"\n";
735 O.OS <<
"UnknownCode" << FreqPair.second <<
"\n";
742Error BitcodeAnalyzer::parseBlock(
unsigned BlockID,
unsigned IndentLevel,
743 std::optional<BCDumpOptions> O,
744 std::optional<StringRef> CheckHash) {
745 std::string Indent(IndentLevel * 2,
' ');
748// Get the statistics for this BlockID. 749 PerBlockIDStats &BlockStats = BlockIDStats[BlockID];
751 BlockStats.NumInstances++;
753// BLOCKINFO is a special part of the stream. 754bool DumpRecords = O.has_value();
756if (O && !O->DumpBlockinfo)
757 O->OS << Indent <<
"<BLOCKINFO_BLOCK/>\n";
758 std::optional<BitstreamBlockInfo> NewBlockInfo;
760 .moveInto(NewBlockInfo))
764 BlockInfo = std::move(*NewBlockInfo);
767// It's not really interesting to dump the contents of the blockinfo 768// block, so only do it if the user explicitly requests it. 769 DumpRecords = O && O->DumpBlockinfo;
772unsigned NumWords = 0;
776// Keep it for later, when we see a MODULE_HASH record 779 std::optional<const char *> BlockName;
781 O->OS << Indent <<
"<";
782if ((BlockName =
GetBlockName(BlockID, BlockInfo, CurStreamType)))
785 O->OS <<
"UnknownBlock" << BlockID;
787if (!O->Symbolic && BlockName)
788 O->OS <<
" BlockID=" << BlockID;
790 O->OS <<
" NumWords=" << NumWords
796// Keep the offset to the metadata index if seen. 799// Read all the records for this block. 816 BlockStats.NumBits += BlockBitEnd - BlockBitStart;
818O->OS << Indent <<
"</";
820O->OS << *BlockName <<
">\n";
822O->OS <<
"UnknownBlock" << BlockID <<
">\n";
829if (
Error E = parseBlock(
Entry.ID, IndentLevel + 1, O, CheckHash))
831 ++BlockStats.NumSubBlocks;
834// Don't include subblock sizes in the size of this block. 835 BlockBitStart += SubBlockBitEnd - SubBlockBitStart;
839// The interesting case. 846 ++BlockStats.NumAbbrevs;
852 ++BlockStats.NumRecords;
860// Increment the # occurrences of this code. 861if (BlockStats.CodeFreq.size() <= Code)
862 BlockStats.CodeFreq.resize(Code + 1);
863 BlockStats.CodeFreq[
Code].NumInstances++;
864 BlockStats.CodeFreq[
Code].TotalBits +=
867 BlockStats.CodeFreq[
Code].NumAbbrev++;
868 ++BlockStats.NumAbbreviatedRecords;
872O->OS << Indent <<
" <";
873 std::optional<const char *> CodeName =
874GetCodeName(Code, BlockID, BlockInfo, CurStreamType);
878O->OS <<
"UnknownCode" <<
Code;
879if (!
O->Symbolic && CodeName)
880O->OS <<
" codeid=" <<
Code;
886 Abbv = MaybeAbbv.
get();
887O->OS <<
" abbrevid=" <<
Entry.ID;
890for (
unsigned i = 0, e =
Record.size(); i != e; ++i)
891O->OS <<
" op" << i <<
"=" << (int64_t)
Record[i];
893// If we found a metadata index, let's verify that we had an offset 894// before and validate its forward reference offset was correct! 898O->OS <<
"(Invalid record)";
906if (MetadataIndexOffset == RecordStartBit)
909O->OS <<
"mismatch: " << MetadataIndexOffset <<
" vs " 910 << RecordStartBit <<
")";
914// If we found a module hash, let's verify that it matches! 920// Recompute the hash and compare it to the one in the bitcode 922 std::array<uint8_t, 20> Hash;
923 Hasher.
update(*CheckHash);
925intBlockSize = (CurrentRecordPos / 8) - BlockEntryPos;
928 Hash = Hasher.result();
930 std::array<uint8_t, 20> RecordedHash;
933assert(!(Val >> 32) &&
"Unexpected high bits set");
937if (Hash == RecordedHash)
940O->OS <<
" (!mismatch!)";
951assert(i + 2 == e &&
"Array op not second to last");
953bool ArrayIsPrintable =
true;
954for (
unsigned j = i - 1, je =
Record.size(); j != je; ++j) {
956 ArrayIsPrintable =
false;
962O->OS <<
" record string = '" << Str <<
"'";
969if (
Error E = decodeMetadataStringsBlob(Indent,
Record, Blob,
O->OS))
972O->OS <<
" blob data = ";
973if (
O->ShowBinaryBlobs) {
975O->OS.write_escaped(Blob,
/*hex=*/true) <<
"'";
977bool BlobIsPrintable =
true;
979if (!
isPrint(
static_cast<unsignedchar>(
C))) {
980 BlobIsPrintable =
false;
985O->OS <<
"'" << Blob <<
"'";
987O->OS <<
"unprintable, " << Blob.size() <<
" bytes.";
995// Make sure that we can skip the current record. 996if (
Error Err = Stream.JumpToBit(CurrentRecordPos))
1001return Skipped.takeError();
#define STRINGIFY_CODE(PREFIX, CODE)
static bool canDecodeBlob(unsigned Code, unsigned BlockID)
static std::optional< const char * > GetBlockName(unsigned BlockID, const BitstreamBlockInfo &BlockInfo, CurStreamTypeType CurStreamType)
Return a symbolic block name if known, otherwise return null.
static Expected< CurStreamTypeType > ReadSignature(BitstreamCursor &Stream)
static std::optional< const char * > GetCodeName(unsigned CodeID, unsigned BlockID, const BitstreamBlockInfo &BlockInfo, CurStreamTypeType CurStreamType)
Return a symbolic code name if known, otherwise return null.
static void printSize(raw_ostream &OS, double Bits)
static Expected< CurStreamTypeType > analyzeHeader(std::optional< BCDumpOptions > O, BitstreamCursor &Stream)
static Error reportError(StringRef Message)
Analysis containing CSE Info
#define FUNCTION(NAME, NARG, ROUND_MODE, INTRINSIC)
#define KIND(ENUM, FIELD)
#define ENTRY(ASMNAME, ENUM)
block placement Basic Block Placement Stats
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static const int BlockSize
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BitCodeAbbrevOp - This describes one or more operands in an abbreviation.
BitCodeAbbrev - This class represents an abbreviation record.
unsigned getNumOperandInfos() const
const BitCodeAbbrevOp & getOperandInfo(unsigned N) const
BitcodeAnalyzer(StringRef Buffer, std::optional< StringRef > BlockInfoBuffer=std::nullopt)
Error analyze(std::optional< BCDumpOptions > O=std::nullopt, std::optional< StringRef > CheckHash=std::nullopt)
Analyze the bitcode file.
void printStats(BCDumpOptions O, std::optional< StringRef > Filename=std::nullopt)
Print stats about the bitcode file.
This class maintains the abbreviations read from a block info block.
const BlockInfo * getBlockInfo(unsigned BlockID) const
If there is block info for the specified ID, return it, otherwise return null.
This represents a position within a bitcode file, implemented on top of a SimpleBitstreamCursor.
Error JumpToBit(uint64_t BitNo)
Reset the stream to the specified bit number.
uint64_t GetCurrentBitNo() const
Return the bit # of the bit we are reading.
Expected< unsigned > ReadSubBlockID()
Having read the ENTER_SUBBLOCK code, read the BlockID for the block.
ArrayRef< uint8_t > getBitcodeBytes() const
Expected< word_t > Read(unsigned NumBits)
Expected< BitstreamEntry > advance(unsigned Flags=0)
Advance the current bitstream, returning the next entry in the stream.
Expected< const BitCodeAbbrev * > getAbbrev(unsigned AbbrevID)
Return the abbreviation for the specified AbbrevId.
void setBlockInfo(BitstreamBlockInfo *BI)
Set the block info to be used by this BitstreamCursor to interpret abbreviated records.
const uint8_t * getPointerToByte(uint64_t ByteNo, uint64_t NumBytes)
Get a pointer into the bitstream at the specified byte offset.
Expected< unsigned > readRecord(unsigned AbbrevID, SmallVectorImpl< uint64_t > &Vals, StringRef *Blob=nullptr)
Error EnterSubBlock(unsigned BlockID, unsigned *NumWordsP=nullptr)
Having read the ENTER_SUBBLOCK abbrevid, and enter the block.
@ AF_DontAutoprocessAbbrevs
If this flag is used, abbrev entries are returned just like normal records.
Error SkipBlock()
Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body of this block.
uint64_t getCurrentByteNo() const
Expected< unsigned > ReadCode()
Expected< std::optional< BitstreamBlockInfo > > ReadBlockInfoBlock(bool ReadBlockInfoNames=false)
Read and return a block info block from the bitstream.
unsigned getAbbrevIDWidth() const
Return the number of bits used to encode an abbrev #.
This class represents an Operation in the Expression.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
reference get()
Returns a reference to the stored T value.
A class that wrap the SHA1 algorithm.
void update(ArrayRef< uint8_t > Data)
Digest more data.
This represents a position within a bitstream.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
This class implements an extremely fast bulk output stream that can only output to a stream.
raw_ostream & write_escaped(StringRef Str, bool UseHexEscapes=false)
Output Str, turning '\', '\t', ' ', '"', and anything that doesn't satisfy llvm::isPrint into an esca...
@ C
The default llvm calling convention, compatible with C.
@ BLOCKINFO_CODE_BLOCKNAME
@ BLOCKINFO_CODE_SETRECORDNAME
@ PARAMATTR_GROUP_BLOCK_ID
@ IDENTIFICATION_BLOCK_ID
@ GLOBALVAL_SUMMARY_BLOCK_ID
@ FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID
@ OPERAND_BUNDLE_TAGS_BLOCK_ID
@ BLOCKINFO_BLOCK_ID
BLOCKINFO_BLOCK is used to define metadata about blocks, for example, standard abbrevs that should be...
@ FIRST_APPLICATION_BLOCKID
@ DEFINE_ABBREV
DEFINE_ABBREV - Defines an abbrev for the current block.
@ PARAMATTR_CODE_ENTRY_OLD
@ PARAMATTR_GRP_CODE_ENTRY
NodeAddr< CodeNode * > Code
void write32be(void *P, uint32_t V)
uint32_t read32le(const void *P)
This is an optimization pass for GlobalISel generic memory operations.
void stable_sort(R &&Range)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
FormattedNumber format_hex(uint64_t N, unsigned Width, bool Upper=false)
format_hex - Output N as a fixed width hexadecimal.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
CurStreamTypeType
CurStreamTypeType - A type for CurStreamType.
@ ClangSerializedDiagnosticsBitstream
@ ClangSerializedASTBitstream
bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr, const unsigned char *&BufEnd, bool VerifyBufferSize)
SkipBitcodeWrapperHeader - Some systems wrap bc files with a special header for padding or other reas...
bool isBitcodeWrapper(const unsigned char *BufPtr, const unsigned char *BufEnd)
isBitcodeWrapper - Return true if the given bytes are the magic bytes for an LLVM IR bitcode wrapper.
This contains information emitted to BLOCKINFO_BLOCK blocks.
When advancing through a bitstream cursor, each advance can discover a few different kinds of entries...