1//===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7//===----------------------------------------------------------------------===// 9// This file defines a demangler for Rust v0 mangled symbols as specified in 10// https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html 12//===----------------------------------------------------------------------===// 27usingllvm::itanium_demangle::OutputBuffer;
28usingllvm::itanium_demangle::ScopedOverride;
29usingllvm::itanium_demangle::starts_with;
37bool empty()
const{
returnName.empty(); }
69enum class LeaveGenericsOpen {
75// Maximum recursion level. Used to avoid stack overflow. 76size_t MaxRecursionLevel;
77// Current recursion level. 80// Input string that is being demangled with "_R" prefix removed. 81 std::string_view Input;
82// Position in the input string. 84// When true, print methods append the output to the stream. 85// When false, the output is suppressed. 87// True if an error occurred. 96booldemangle(std::string_view MangledName);
99bool demanglePath(IsInType
Type,
100 LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
101void demangleImplPath(IsInType InType);
102void demangleGenericArg();
105void demangleDynBounds();
106void demangleDynTrait();
107void demangleOptionalBinder();
109void demangleConstInt();
110void demangleConstBool();
111void demangleConstChar();
113template <
typename Callable>
void demangleBackref(Callable Demangler) {
114uint64_t Backref = parseBase62Number();
115if (
Error || Backref >= Position) {
129uint64_t parseOptionalBase62Number(
char Tag);
132uint64_t parseHexNumber(std::string_view &HexDigits);
135voidprint(std::string_view S);
137void printBasicType(BasicType);
139void printIdentifier(Identifier Ident);
143bool consumeIf(
char Prefix);
152// Return early if mangled name doesn't look like a Rust symbol. 153if (MangledName.empty() || !
starts_with(MangledName,
"_R"))
157if (!
D.demangle(MangledName)) {
158 std::free(
D.Output.getBuffer());
164returnD.Output.getBuffer();
167Demangler::Demangler(
size_t MaxRecursionLevel)
168 : MaxRecursionLevel(MaxRecursionLevel) {}
170staticinlineboolisDigit(
constcharC) {
return'0' <=
C &&
C <=
'9'; }
173return (
'0' <=
C &&
C <=
'9') || (
'a' <=
C &&
C <=
'f');
176staticinlineboolisLower(
constcharC) {
return'a' <=
C &&
C <=
'z'; }
178staticinlineboolisUpper(
constcharC) {
return'A' <=
C &&
C <=
'Z'; }
180/// Returns true if C is a valid mangled character: <0-9a-zA-Z_>. 185// Demangles Rust v0 mangled symbol. Returns true when successful, and false 186// otherwise. The demangled symbol is stored in Output field. It is 187// responsibility of the caller to free the memory behind the output stream. 189// <symbol-name> = "_R" <path> [<instantiating-crate>] 190bool Demangler::demangle(std::string_view Mangled) {
203 Input = Dot == std::string_view::npos ?
Mangled :
Mangled.substr(0, Dot);
205 demanglePath(IsInType::No);
207if (Position != Input.size()) {
209 demanglePath(IsInType::No);
212if (Position != Input.size())
215if (Dot != std::string_view::npos) {
224// Demangles a path. InType indicates whether a path is inside a type. When 225// LeaveOpen is true, a closing `>` after generic arguments is omitted from the 226// output. Return value indicates whether generics arguments have been left 229// <path> = "C" <identifier> // crate root 230// | "M" <impl-path> <type> // <T> (inherent impl) 231// | "X" <impl-path> <type> <path> // <T as Trait> (trait impl) 232// | "Y" <type> <path> // <T as Trait> (trait definition) 233// | "N" <ns> <path> <identifier> // ...::ident (nested path) 234// | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args) 236// <identifier> = [<disambiguator>] <undisambiguated-identifier> 237// <ns> = "C" // closure 239// | <A-Z> // other special namespaces 240// | <a-z> // internal namespaces 241bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
242if (
Error || RecursionLevel >= MaxRecursionLevel) {
250 parseOptionalBase62Number(
's');
251 printIdentifier(parseIdentifier());
255 demangleImplPath(InType);
262 demangleImplPath(InType);
266 demanglePath(IsInType::Yes);
274 demanglePath(IsInType::Yes);
284 demanglePath(InType);
286uint64_t Disambiguator = parseOptionalBase62Number(
's');
300 printIdentifier(Ident);
303 printDecimalNumber(Disambiguator);
306// Implementation internal namespaces. 309 printIdentifier(Ident);
315 demanglePath(InType);
316// Omit "::" when in a type, where it is optional. 317if (InType == IsInType::No)
320for (
size_tI = 0; !
Error && !consumeIf(
'E'); ++
I) {
323 demangleGenericArg();
325if (LeaveOpen == LeaveGenericsOpen::Yes)
333 demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
344// <impl-path> = [<disambiguator>] <path> 345// <disambiguator> = "s" <base-62-number> 346void Demangler::demangleImplPath(IsInType InType) {
348 parseOptionalBase62Number(
's');
349 demanglePath(InType);
352// <generic-arg> = <lifetime> 355// <lifetime> = "L" <base-62-number> 356void Demangler::demangleGenericArg() {
358 printLifetime(parseBase62Number());
359elseif (consumeIf(
'K'))
365// <basic-type> = "a" // i8 385// | "p" // placeholder (e.g. for generic params), shown as _ 392Type = BasicType::Bool;
395Type = BasicType::Char;
398Type = BasicType::F64;
401Type = BasicType::Str;
404Type = BasicType::F32;
410Type = BasicType::ISize;
413Type = BasicType::USize;
416Type = BasicType::I32;
419Type = BasicType::U32;
422Type = BasicType::I128;
425Type = BasicType::U128;
428Type = BasicType::Placeholder;
431Type = BasicType::I16;
434Type = BasicType::U16;
437Type = BasicType::Unit;
440Type = BasicType::Variadic;
443Type = BasicType::I64;
446Type = BasicType::U64;
449Type = BasicType::Never;
456void Demangler::printBasicType(BasicType
Type) {
479case BasicType::ISize:
497case BasicType::USize:
509case BasicType::Placeholder:
515case BasicType::Variadic:
518case BasicType::Never:
524// <type> = | <basic-type> 525// | <path> // named type 526// | "A" <type> <const> // [T; N] 527// | "S" <type> // [T] 528// | "T" {<type>} "E" // (T1, T2, T3, ...) 529// | "R" [<lifetime>] <type> // &T 530// | "Q" [<lifetime>] <type> // &mut T 531// | "P" <type> // *const T 532// | "O" <type> // *mut T 533// | "F" <fn-sig> // fn(...) -> ... 534// | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a 535// | <backref> // backref 536void Demangler::demangleType() {
537if (
Error || RecursionLevel >= MaxRecursionLevel) {
543size_t Start = Position;
547return printBasicType(
Type);
565for (; !
Error && !consumeIf(
'E'); ++
I) {
579if (
auto Lifetime = parseBase62Number()) {
580 printLifetime(Lifetime);
602if (
auto Lifetime = parseBase62Number()) {
604 printLifetime(Lifetime);
611 demangleBackref([&] { demangleType(); });
615 demanglePath(IsInType::Yes);
620// <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type> 622// | <undisambiguated-identifier> 623void Demangler::demangleFnSig() {
625 demangleOptionalBinder();
638for (
charC : Ident.Name) {
639// When mangling ABI string, the "-" is replaced with "_". 649for (
size_tI = 0; !
Error && !consumeIf(
'E'); ++
I) {
657// Skip the unit type from the output. 664// <dyn-bounds> = [<binder>] {<dyn-trait>} "E" 665void Demangler::demangleDynBounds() {
668 demangleOptionalBinder();
669for (
size_tI = 0; !
Error && !consumeIf(
'E'); ++
I) {
676// <dyn-trait> = <path> {<dyn-trait-assoc-binding>} 677// <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type> 678void Demangler::demangleDynTrait() {
679bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
680while (!
Error && consumeIf(
'p')) {
695// Demangles optional binder and updates the number of bound lifetimes. 697// <binder> = "G" <base-62-number> 698void Demangler::demangleOptionalBinder() {
699uint64_t Binder = parseOptionalBase62Number(
'G');
700if (
Error || Binder == 0)
703// In valid inputs each bound lifetime is referenced later. Referencing a 704// lifetime requires at least one byte of input. Reject inputs that are too 705// short to reference all bound lifetimes. Otherwise demangling of invalid 706// binders could generate excessive amounts of output. 707if (Binder >= Input.size() - BoundLifetimes) {
713for (
size_tI = 0;
I != Binder; ++
I) {
722// <const> = <basic-type> <const-data> 723// | "p" // placeholder 725void Demangler::demangleConst() {
726if (
Error || RecursionLevel >= MaxRecursionLevel) {
741case BasicType::ISize:
747case BasicType::USize:
756case BasicType::Placeholder:
764 demangleBackref([&] { demangleConst(); });
770// <const-data> = ["n"] <hex-number> 771void Demangler::demangleConstInt() {
775 std::string_view HexDigits;
777if (HexDigits.size() <= 16) {
778 printDecimalNumber(
Value);
785// <const-data> = "0_" // false 787void Demangler::demangleConstBool() {
788 std::string_view HexDigits;
789 parseHexNumber(HexDigits);
792elseif (HexDigits ==
"1")
798/// Returns true if CodePoint represents a printable ASCII character. 800return 0x20 <= CodePoint && CodePoint <= 0x7e;
803// <const-data> = <hex-number> 804void Demangler::demangleConstChar() {
805 std::string_view HexDigits;
806uint64_t CodePoint = parseHexNumber(HexDigits);
807if (
Error || HexDigits.size() > 6) {
846// <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> 848bool Punycode = consumeIf(
'u');
849uint64_t Bytes = parseDecimalNumber();
851// Underscore resolves the ambiguity when identifier starts with a decimal 852// digit or another underscore. 855if (
Error || Bytes > Input.size() - Position) {
859 std::string_view S = Input.substr(Position, Bytes);
862if (!std::all_of(S.begin(), S.end(),
isValid)) {
870// Parses optional base 62 number. The presence of a number is determined using 871// Tag. Returns 0 when tag is absent and parsed value + 1 otherwise 873// This function is intended for parsing disambiguators and binders which when 874// not present have their value interpreted as 0, and otherwise as decoded 875// value + 1. For example for binders, value for "G_" is 1, for "G0_" value is 876// 2. When "G" is absent value is 0. 877uint64_t Demangler::parseOptionalBase62Number(
char Tag) {
882if (
Error || !addAssign(
N, 1))
888// Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by 889// "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1, 890// "1_" encodes 2, etc. 892// <base-62-number> = {<0-9a-zA-Z>} "_" 893uint64_t Demangler::parseBase62Number() {
908 Digit = 10 + (
C -
'a');
910 Digit = 10 + 26 + (
C -
'A');
916if (!mulAssign(
Value, 62))
919if (!addAssign(
Value, Digit))
923if (!addAssign(
Value, 1))
929// Parses a decimal number that had been encoded without any leading zeros. 931// <decimal-number> = "0" 933uint64_t Demangler::parseDecimalNumber() {
948if (!mulAssign(
Value, 10)) {
961// Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed 962// value and stores hex digits in HexDigits. The return value is unspecified if 963// HexDigits.size() > 16. 965// <hex-number> = "0_" 966// | <1-9a-f> {<0-9a-f>} "_" 967uint64_t Demangler::parseHexNumber(std::string_view &HexDigits) {
968size_t Start = Position;
978while (!
Error && !consumeIf(
'_')) {
983elseif (
'a' <=
C &&
C <=
'f')
991 HexDigits = std::string_view();
995size_tEnd = Position - 1;
997 HexDigits = Input.substr(Start,
End - Start);
1001void Demangler::print(
charC) {
1008void Demangler::print(std::string_view S) {
1015void Demangler::printDecimalNumber(
uint64_tN) {
1022// Prints a lifetime. An index 0 always represents an erased lifetime. Indices 1023// starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes 1024// bound by one of the enclosing binders. 1025void Demangler::printLifetime(
uint64_t Index) {
1031if (Index - 1 >= BoundLifetimes) {
1043 printDecimalNumber(
Depth - 26 + 1);
1047staticinlinebool decodePunycodeDigit(
charC,
size_t &
Value) {
1061staticvoid removeNullBytes(
OutputBuffer &Output,
size_t StartIdx) {
1063char *Start = Buffer + StartIdx;
1068// Encodes code point as UTF-8 and stores results in Output. Returns false if 1069// CodePoint is not a valid unicode scalar value. 1070staticinlineboolencodeUTF8(
size_t CodePoint,
char *Output) {
1071if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
1074if (CodePoint <= 0x7F) {
1075 Output[0] = CodePoint;
1079if (CodePoint <= 0x7FF) {
1080 Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
1081 Output[1] = 0x80 | (CodePoint & 0x3F);
1085if (CodePoint <= 0xFFFF) {
1086 Output[0] = 0xE0 | (CodePoint >> 12);
1087 Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
1088 Output[2] = 0x80 | (CodePoint & 0x3F);
1092if (CodePoint <= 0x10FFFF) {
1093 Output[0] = 0xF0 | (CodePoint >> 18);
1094 Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
1095 Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
1096 Output[3] = 0x80 | (CodePoint & 0x3F);
1103// Decodes string encoded using punycode and appends results to Output. 1104// Returns true if decoding was successful. 1105staticbool decodePunycode(std::string_view Input,
OutputBuffer &Output) {
1109// Rust uses an underscore as a delimiter. 1110size_t DelimiterPos = std::string_view::npos;
1111for (
size_tI = 0;
I != Input.size(); ++
I)
1115if (DelimiterPos != std::string_view::npos) {
1116// Copy basic code points before the last delimiter to the output. 1117for (; InputIdx != DelimiterPos; ++InputIdx) {
1118charC = Input[InputIdx];
1121// Code points are padded with zeros while decoding is in progress. 1123 Output += std::string_view(
UTF8, 4);
1125// Skip over the delimiter. 1137auto Adapt = [&](
size_t Delta,
size_t NumPoints) {
1139 Delta += Delta / NumPoints;
1143while (Delta > (
Base - TMin) * TMax / 2) {
1144 Delta /=
Base - TMin;
1147returnK + (((
Base - TMin + 1) * Delta) / (Delta + Skew));
1150// Main decoding loop. 1151for (
size_tI = 0; InputIdx != Input.size();
I += 1) {
1154size_tMax = std::numeric_limits<size_t>::max();
1156if (InputIdx == Input.size())
1158charC = Input[InputIdx++];
1160if (!decodePunycodeDigit(
C, Digit))
1163if (Digit > (Max -
I) / W)
1170elseif (K >= Bias + TMax)
1178if (W > Max / (
Base -
T))
1183 Bias = Adapt(
I - OldI, NumPoints);
1185if (
I / NumPoints > Max -
N)
1190// Insert N at position I in the output. 1197 removeNullBytes(Output, OutputSize);
1201void Demangler::printIdentifier(Identifier Ident) {
1205if (Ident.Punycode) {
1206if (!decodePunycode(Ident.Name, Output))
1213char Demangler::look()
const{
1214if (
Error || Position >= Input.size())
1217return Input[Position];
1220char Demangler::consume() {
1221if (
Error || Position >= Input.size()) {
1226return Input[Position++];
1229bool Demangler::consumeIf(
char Prefix) {
1230if (
Error || Position >= Input.size() || Input[Position] != Prefix)
1237/// Computes A + B. When computation wraps around sets the error and returns 1238/// false. Otherwise assigns the result to A and returns true. 1240if (
A > std::numeric_limits<uint64_t>::max() -
B) {
1249/// Computes A * B. When computation wraps around sets the error and returns 1250/// false. Otherwise assigns the result to A and returns true. 1252if (
B != 0 &&
A > std::numeric_limits<uint64_t>::max() /
B) {
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
itanium_demangle::ManglingParser< DefaultAllocator > Demangler
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
static bool isDigit(const char C)
static bool isAsciiPrintable(uint64_t CodePoint)
Returns true if CodePoint represents a printable ASCII character.
static bool isHexDigit(const char C)
static bool isLower(const char C)
static bool parseBasicType(char C, BasicType &Type)
static bool isUpper(const char C)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
DEMANGLE_NAMESPACE_BEGIN bool starts_with(std::string_view self, char C) noexcept
static void encodeUTF8(uint32_t UnicodeScalarValue, SmallVectorImpl< char > &Result)
encodeUTF8 - Encode UnicodeScalarValue in UTF-8 and append it to result.
void setCurrentPosition(size_t NewPos)
size_t getCurrentPosition() const
void insert(size_t Pos, const char *S, size_t N)
Lightweight error class with error context and mandatory checking.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
@ C
The default llvm calling convention, compatible with C.
Print(const T &, const DataFlowGraph &) -> Print< T >
This is an optimization pass for GlobalISel generic memory operations.
@ Never
Never set the bit.
char * rustDemangle(std::string_view MangledName)
std::string demangle(std::string_view MangledName)
Attempt to demangle a string using different demangling schemes.