Movatterモバイル変換


[0]ホーム

URL:


LLVM 20.0.0git
RustDemangle.cpp
Go to the documentation of this file.
1//===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a demangler for Rust v0 mangled symbols as specified in
10// https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Demangle/Demangle.h"
15#include "llvm/Demangle/StringViewExtras.h"
16#include "llvm/Demangle/Utility.h"
17
18#include <algorithm>
19#include <cassert>
20#include <cstdint>
21#include <cstring>
22#include <limits>
23#include <string_view>
24
25using namespacellvm;
26
27usingllvm::itanium_demangle::OutputBuffer;
28usingllvm::itanium_demangle::ScopedOverride;
29usingllvm::itanium_demangle::starts_with;
30
31namespace{
32
33structIdentifier {
34 std::string_viewName;
35bool Punycode;
36
37bool empty() const{returnName.empty(); }
38};
39
40enum class BasicType {
41Bool,
42Char,
43I8,
44I16,
45I32,
46I64,
47 I128,
48 ISize,
49U8,
50U16,
51U32,
52U64,
53 U128,
54 USize,
55F32,
56F64,
57 Str,
58 Placeholder,
59 Unit,
60Variadic,
61Never,
62};
63
64enum class IsInType {
65No,
66Yes,
67};
68
69enum class LeaveGenericsOpen {
70No,
71Yes,
72};
73
74classDemangler {
75// Maximum recursion level. Used to avoid stack overflow.
76size_t MaxRecursionLevel;
77// Current recursion level.
78size_t RecursionLevel;
79size_t BoundLifetimes;
80// Input string that is being demangled with "_R" prefix removed.
81 std::string_view Input;
82// Position in the input string.
83size_t Position;
84// When true, print methods append the output to the stream.
85// When false, the output is suppressed.
86boolPrint;
87// True if an error occurred.
88boolError;
89
90public:
91// Demangled output.
92OutputBuffer Output;
93
94Demangler(size_t MaxRecursionLevel = 500);
95
96booldemangle(std::string_view MangledName);
97
98private:
99bool demanglePath(IsInTypeType,
100 LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
101void demangleImplPath(IsInType InType);
102void demangleGenericArg();
103void demangleType();
104void demangleFnSig();
105void demangleDynBounds();
106void demangleDynTrait();
107void demangleOptionalBinder();
108void demangleConst();
109void demangleConstInt();
110void demangleConstBool();
111void demangleConstChar();
112
113template <typename Callable>void demangleBackref(Callable Demangler) {
114uint64_t Backref = parseBase62Number();
115if (Error || Backref >= Position) {
116Error =true;
117return;
118 }
119
120if (!Print)
121return;
122
123ScopedOverride<size_t> SavePosition(Position, Position);
124 Position = Backref;
125Demangler();
126 }
127
128Identifier parseIdentifier();
129uint64_t parseOptionalBase62Number(char Tag);
130uint64_t parseBase62Number();
131uint64_t parseDecimalNumber();
132uint64_t parseHexNumber(std::string_view &HexDigits);
133
134voidprint(charC);
135voidprint(std::string_view S);
136void printDecimalNumber(uint64_tN);
137void printBasicType(BasicType);
138void printLifetime(uint64_t Index);
139void printIdentifier(Identifier Ident);
140
141char look()const;
142charconsume();
143bool consumeIf(char Prefix);
144
145bool addAssign(uint64_t &A,uint64_tB);
146bool mulAssign(uint64_t &A,uint64_tB);
147};
148
149}// namespace
150
151char *llvm::rustDemangle(std::string_view MangledName) {
152// Return early if mangled name doesn't look like a Rust symbol.
153if (MangledName.empty() || !starts_with(MangledName,"_R"))
154returnnullptr;
155
156DemanglerD;
157if (!D.demangle(MangledName)) {
158 std::free(D.Output.getBuffer());
159returnnullptr;
160 }
161
162D.Output +='\0';
163
164returnD.Output.getBuffer();
165}
166
167Demangler::Demangler(size_t MaxRecursionLevel)
168 : MaxRecursionLevel(MaxRecursionLevel) {}
169
170staticinlineboolisDigit(constcharC) {return'0' <=C &&C <='9'; }
171
172staticinlineboolisHexDigit(constcharC) {
173return ('0' <=C &&C <='9') || ('a' <=C &&C <='f');
174}
175
176staticinlineboolisLower(constcharC) {return'a' <=C &&C <='z'; }
177
178staticinlineboolisUpper(constcharC) {return'A' <=C &&C <='Z'; }
179
180/// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
181staticinlineboolisValid(constcharC) {
182returnisDigit(C) ||isLower(C) ||isUpper(C) ||C =='_';
183}
184
185// Demangles Rust v0 mangled symbol. Returns true when successful, and false
186// otherwise. The demangled symbol is stored in Output field. It is
187// responsibility of the caller to free the memory behind the output stream.
188//
189// <symbol-name> = "_R" <path> [<instantiating-crate>]
190bool Demangler::demangle(std::string_view Mangled) {
191 Position = 0;
192Error =false;
193Print =true;
194 RecursionLevel = 0;
195 BoundLifetimes = 0;
196
197if (!starts_with(Mangled,"_R")) {
198Error =true;
199returnfalse;
200 }
201Mangled.remove_prefix(2);
202size_t Dot =Mangled.find('.');
203 Input = Dot == std::string_view::npos ?Mangled :Mangled.substr(0, Dot);
204
205 demanglePath(IsInType::No);
206
207if (Position != Input.size()) {
208ScopedOverride<bool> SavePrint(Print,false);
209 demanglePath(IsInType::No);
210 }
211
212if (Position != Input.size())
213Error =true;
214
215if (Dot != std::string_view::npos) {
216print(" (");
217print(Mangled.substr(Dot));
218print(")");
219 }
220
221return !Error;
222}
223
224// Demangles a path. InType indicates whether a path is inside a type. When
225// LeaveOpen is true, a closing `>` after generic arguments is omitted from the
226// output. Return value indicates whether generics arguments have been left
227// open.
228//
229// <path> = "C" <identifier> // crate root
230// | "M" <impl-path> <type> // <T> (inherent impl)
231// | "X" <impl-path> <type> <path> // <T as Trait> (trait impl)
232// | "Y" <type> <path> // <T as Trait> (trait definition)
233// | "N" <ns> <path> <identifier> // ...::ident (nested path)
234// | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
235// | <backref>
236// <identifier> = [<disambiguator>] <undisambiguated-identifier>
237// <ns> = "C" // closure
238// | "S" // shim
239// | <A-Z> // other special namespaces
240// | <a-z> // internal namespaces
241bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
242if (Error || RecursionLevel >= MaxRecursionLevel) {
243Error =true;
244returnfalse;
245 }
246ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
247
248switch (consume()) {
249case'C': {
250 parseOptionalBase62Number('s');
251 printIdentifier(parseIdentifier());
252break;
253 }
254case'M': {
255 demangleImplPath(InType);
256print("<");
257 demangleType();
258print(">");
259break;
260 }
261case'X': {
262 demangleImplPath(InType);
263print("<");
264 demangleType();
265print(" as ");
266 demanglePath(IsInType::Yes);
267print(">");
268break;
269 }
270case'Y': {
271print("<");
272 demangleType();
273print(" as ");
274 demanglePath(IsInType::Yes);
275print(">");
276break;
277 }
278case'N': {
279char NS =consume();
280if (!isLower(NS) && !isUpper(NS)) {
281Error =true;
282break;
283 }
284 demanglePath(InType);
285
286uint64_t Disambiguator = parseOptionalBase62Number('s');
287Identifier Ident = parseIdentifier();
288
289if (isUpper(NS)) {
290// Special namespaces
291print("::{");
292if (NS =='C')
293print("closure");
294elseif (NS =='S')
295print("shim");
296else
297print(NS);
298if (!Ident.empty()) {
299print(":");
300 printIdentifier(Ident);
301 }
302print('#');
303 printDecimalNumber(Disambiguator);
304print('}');
305 }else {
306// Implementation internal namespaces.
307if (!Ident.empty()) {
308print("::");
309 printIdentifier(Ident);
310 }
311 }
312break;
313 }
314case'I': {
315 demanglePath(InType);
316// Omit "::" when in a type, where it is optional.
317if (InType == IsInType::No)
318print("::");
319print("<");
320for (size_tI = 0; !Error && !consumeIf('E'); ++I) {
321if (I > 0)
322print(", ");
323 demangleGenericArg();
324 }
325if (LeaveOpen == LeaveGenericsOpen::Yes)
326returntrue;
327else
328print(">");
329break;
330 }
331case'B': {
332bool IsOpen =false;
333 demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
334return IsOpen;
335 }
336default:
337Error =true;
338break;
339 }
340
341returnfalse;
342}
343
344// <impl-path> = [<disambiguator>] <path>
345// <disambiguator> = "s" <base-62-number>
346void Demangler::demangleImplPath(IsInType InType) {
347ScopedOverride<bool> SavePrint(Print,false);
348 parseOptionalBase62Number('s');
349 demanglePath(InType);
350}
351
352// <generic-arg> = <lifetime>
353// | <type>
354// | "K" <const>
355// <lifetime> = "L" <base-62-number>
356void Demangler::demangleGenericArg() {
357if (consumeIf('L'))
358 printLifetime(parseBase62Number());
359elseif (consumeIf('K'))
360 demangleConst();
361else
362 demangleType();
363}
364
365// <basic-type> = "a" // i8
366// | "b" // bool
367// | "c" // char
368// | "d" // f64
369// | "e" // str
370// | "f" // f32
371// | "h" // u8
372// | "i" // isize
373// | "j" // usize
374// | "l" // i32
375// | "m" // u32
376// | "n" // i128
377// | "o" // u128
378// | "s" // i16
379// | "t" // u16
380// | "u" // ()
381// | "v" // ...
382// | "x" // i64
383// | "y" // u64
384// | "z" // !
385// | "p" // placeholder (e.g. for generic params), shown as _
386staticboolparseBasicType(charC, BasicType &Type) {
387switch (C) {
388case'a':
389Type = BasicType::I8;
390returntrue;
391case'b':
392Type = BasicType::Bool;
393returntrue;
394case'c':
395Type = BasicType::Char;
396returntrue;
397case'd':
398Type = BasicType::F64;
399returntrue;
400case'e':
401Type = BasicType::Str;
402returntrue;
403case'f':
404Type = BasicType::F32;
405returntrue;
406case'h':
407Type = BasicType::U8;
408returntrue;
409case'i':
410Type = BasicType::ISize;
411returntrue;
412case'j':
413Type = BasicType::USize;
414returntrue;
415case'l':
416Type = BasicType::I32;
417returntrue;
418case'm':
419Type = BasicType::U32;
420returntrue;
421case'n':
422Type = BasicType::I128;
423returntrue;
424case'o':
425Type = BasicType::U128;
426returntrue;
427case'p':
428Type = BasicType::Placeholder;
429returntrue;
430case's':
431Type = BasicType::I16;
432returntrue;
433case't':
434Type = BasicType::U16;
435returntrue;
436case'u':
437Type = BasicType::Unit;
438returntrue;
439case'v':
440Type = BasicType::Variadic;
441returntrue;
442case'x':
443Type = BasicType::I64;
444returntrue;
445case'y':
446Type = BasicType::U64;
447returntrue;
448case'z':
449Type = BasicType::Never;
450returntrue;
451default:
452returnfalse;
453 }
454}
455
456void Demangler::printBasicType(BasicTypeType) {
457switch (Type) {
458case BasicType::Bool:
459print("bool");
460break;
461case BasicType::Char:
462print("char");
463break;
464case BasicType::I8:
465print("i8");
466break;
467case BasicType::I16:
468print("i16");
469break;
470case BasicType::I32:
471print("i32");
472break;
473case BasicType::I64:
474print("i64");
475break;
476case BasicType::I128:
477print("i128");
478break;
479case BasicType::ISize:
480print("isize");
481break;
482case BasicType::U8:
483print("u8");
484break;
485case BasicType::U16:
486print("u16");
487break;
488case BasicType::U32:
489print("u32");
490break;
491case BasicType::U64:
492print("u64");
493break;
494case BasicType::U128:
495print("u128");
496break;
497case BasicType::USize:
498print("usize");
499break;
500case BasicType::F32:
501print("f32");
502break;
503case BasicType::F64:
504print("f64");
505break;
506case BasicType::Str:
507print("str");
508break;
509case BasicType::Placeholder:
510print("_");
511break;
512case BasicType::Unit:
513print("()");
514break;
515case BasicType::Variadic:
516print("...");
517break;
518case BasicType::Never:
519print("!");
520break;
521 }
522}
523
524// <type> = | <basic-type>
525// | <path> // named type
526// | "A" <type> <const> // [T; N]
527// | "S" <type> // [T]
528// | "T" {<type>} "E" // (T1, T2, T3, ...)
529// | "R" [<lifetime>] <type> // &T
530// | "Q" [<lifetime>] <type> // &mut T
531// | "P" <type> // *const T
532// | "O" <type> // *mut T
533// | "F" <fn-sig> // fn(...) -> ...
534// | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
535// | <backref> // backref
536void Demangler::demangleType() {
537if (Error || RecursionLevel >= MaxRecursionLevel) {
538Error =true;
539return;
540 }
541ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
542
543size_t Start = Position;
544charC =consume();
545 BasicTypeType;
546if (parseBasicType(C,Type))
547return printBasicType(Type);
548
549switch (C) {
550case'A':
551print("[");
552 demangleType();
553print("; ");
554 demangleConst();
555print("]");
556break;
557case'S':
558print("[");
559 demangleType();
560print("]");
561break;
562case'T': {
563print("(");
564size_tI = 0;
565for (; !Error && !consumeIf('E'); ++I) {
566if (I > 0)
567print(", ");
568 demangleType();
569 }
570if (I == 1)
571print(",");
572print(")");
573break;
574 }
575case'R':
576case'Q':
577print('&');
578if (consumeIf('L')) {
579if (auto Lifetime = parseBase62Number()) {
580 printLifetime(Lifetime);
581print(' ');
582 }
583 }
584if (C =='Q')
585print("mut ");
586 demangleType();
587break;
588case'P':
589print("*const ");
590 demangleType();
591break;
592case'O':
593print("*mut ");
594 demangleType();
595break;
596case'F':
597 demangleFnSig();
598break;
599case'D':
600 demangleDynBounds();
601if (consumeIf('L')) {
602if (auto Lifetime = parseBase62Number()) {
603print(" + ");
604 printLifetime(Lifetime);
605 }
606 }else {
607Error =true;
608 }
609break;
610case'B':
611 demangleBackref([&] { demangleType(); });
612break;
613default:
614 Position = Start;
615 demanglePath(IsInType::Yes);
616break;
617 }
618}
619
620// <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
621// <abi> = "C"
622// | <undisambiguated-identifier>
623void Demangler::demangleFnSig() {
624ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
625 demangleOptionalBinder();
626
627if (consumeIf('U'))
628print("unsafe ");
629
630if (consumeIf('K')) {
631print("extern \"");
632if (consumeIf('C')) {
633print("C");
634 }else {
635Identifier Ident = parseIdentifier();
636if (Ident.Punycode)
637Error =true;
638for (charC : Ident.Name) {
639// When mangling ABI string, the "-" is replaced with "_".
640if (C =='_')
641C ='-';
642print(C);
643 }
644 }
645print("\" ");
646 }
647
648print("fn(");
649for (size_tI = 0; !Error && !consumeIf('E'); ++I) {
650if (I > 0)
651print(", ");
652 demangleType();
653 }
654print(")");
655
656if (consumeIf('u')) {
657// Skip the unit type from the output.
658 }else {
659print(" -> ");
660 demangleType();
661 }
662}
663
664// <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
665void Demangler::demangleDynBounds() {
666ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
667print("dyn ");
668 demangleOptionalBinder();
669for (size_tI = 0; !Error && !consumeIf('E'); ++I) {
670if (I > 0)
671print(" + ");
672 demangleDynTrait();
673 }
674}
675
676// <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
677// <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
678void Demangler::demangleDynTrait() {
679bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
680while (!Error && consumeIf('p')) {
681if (!IsOpen) {
682 IsOpen =true;
683print('<');
684 }else {
685print(", ");
686 }
687print(parseIdentifier().Name);
688print(" = ");
689 demangleType();
690 }
691if (IsOpen)
692print(">");
693}
694
695// Demangles optional binder and updates the number of bound lifetimes.
696//
697// <binder> = "G" <base-62-number>
698void Demangler::demangleOptionalBinder() {
699uint64_t Binder = parseOptionalBase62Number('G');
700if (Error || Binder == 0)
701return;
702
703// In valid inputs each bound lifetime is referenced later. Referencing a
704// lifetime requires at least one byte of input. Reject inputs that are too
705// short to reference all bound lifetimes. Otherwise demangling of invalid
706// binders could generate excessive amounts of output.
707if (Binder >= Input.size() - BoundLifetimes) {
708Error =true;
709return;
710 }
711
712print("for<");
713for (size_tI = 0;I != Binder; ++I) {
714 BoundLifetimes += 1;
715if (I > 0)
716print(", ");
717 printLifetime(1);
718 }
719print("> ");
720}
721
722// <const> = <basic-type> <const-data>
723// | "p" // placeholder
724// | <backref>
725void Demangler::demangleConst() {
726if (Error || RecursionLevel >= MaxRecursionLevel) {
727Error =true;
728return;
729 }
730ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
731
732charC =consume();
733 BasicTypeType;
734if (parseBasicType(C,Type)) {
735switch (Type) {
736case BasicType::I8:
737case BasicType::I16:
738case BasicType::I32:
739case BasicType::I64:
740case BasicType::I128:
741case BasicType::ISize:
742case BasicType::U8:
743case BasicType::U16:
744case BasicType::U32:
745case BasicType::U64:
746case BasicType::U128:
747case BasicType::USize:
748 demangleConstInt();
749break;
750case BasicType::Bool:
751 demangleConstBool();
752break;
753case BasicType::Char:
754 demangleConstChar();
755break;
756case BasicType::Placeholder:
757print('_');
758break;
759default:
760Error =true;
761break;
762 }
763 }elseif (C =='B') {
764 demangleBackref([&] { demangleConst(); });
765 }else {
766Error =true;
767 }
768}
769
770// <const-data> = ["n"] <hex-number>
771void Demangler::demangleConstInt() {
772if (consumeIf('n'))
773print('-');
774
775 std::string_view HexDigits;
776uint64_tValue = parseHexNumber(HexDigits);
777if (HexDigits.size() <= 16) {
778 printDecimalNumber(Value);
779 }else {
780print("0x");
781print(HexDigits);
782 }
783}
784
785// <const-data> = "0_" // false
786// | "1_" // true
787void Demangler::demangleConstBool() {
788 std::string_view HexDigits;
789 parseHexNumber(HexDigits);
790if (HexDigits =="0")
791print("false");
792elseif (HexDigits =="1")
793print("true");
794else
795Error =true;
796}
797
798/// Returns true if CodePoint represents a printable ASCII character.
799staticboolisAsciiPrintable(uint64_t CodePoint) {
800return 0x20 <= CodePoint && CodePoint <= 0x7e;
801}
802
803// <const-data> = <hex-number>
804void Demangler::demangleConstChar() {
805 std::string_view HexDigits;
806uint64_t CodePoint = parseHexNumber(HexDigits);
807if (Error || HexDigits.size() > 6) {
808Error =true;
809return;
810 }
811
812print("'");
813switch (CodePoint) {
814case'\t':
815print(R"(\t)");
816break;
817case'\r':
818print(R"(\r)");
819break;
820case'\n':
821print(R"(\n)");
822break;
823case'\\':
824print(R"(\\)");
825break;
826case'"':
827print(R"(")");
828break;
829case'\'':
830print(R"(\')");
831break;
832default:
833if (isAsciiPrintable(CodePoint)) {
834charC = CodePoint;
835print(C);
836 }else {
837print(R"(\u{)");
838print(HexDigits);
839print('}');
840 }
841break;
842 }
843print('\'');
844}
845
846// <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
847Identifier Demangler::parseIdentifier() {
848bool Punycode = consumeIf('u');
849uint64_t Bytes = parseDecimalNumber();
850
851// Underscore resolves the ambiguity when identifier starts with a decimal
852// digit or another underscore.
853 consumeIf('_');
854
855if (Error || Bytes > Input.size() - Position) {
856Error =true;
857return {};
858 }
859 std::string_view S = Input.substr(Position, Bytes);
860 Position += Bytes;
861
862if (!std::all_of(S.begin(), S.end(),isValid)) {
863Error =true;
864return {};
865 }
866
867return {S, Punycode};
868}
869
870// Parses optional base 62 number. The presence of a number is determined using
871// Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
872//
873// This function is intended for parsing disambiguators and binders which when
874// not present have their value interpreted as 0, and otherwise as decoded
875// value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
876// 2. When "G" is absent value is 0.
877uint64_t Demangler::parseOptionalBase62Number(char Tag) {
878if (!consumeIf(Tag))
879return 0;
880
881uint64_tN = parseBase62Number();
882if (Error || !addAssign(N, 1))
883return 0;
884
885returnN;
886}
887
888// Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
889// "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
890// "1_" encodes 2, etc.
891//
892// <base-62-number> = {<0-9a-zA-Z>} "_"
893uint64_t Demangler::parseBase62Number() {
894if (consumeIf('_'))
895return 0;
896
897uint64_tValue = 0;
898
899while (true) {
900uint64_t Digit;
901charC =consume();
902
903if (C =='_') {
904break;
905 }elseif (isDigit(C)) {
906 Digit =C -'0';
907 }elseif (isLower(C)) {
908 Digit = 10 + (C -'a');
909 }elseif (isUpper(C)) {
910 Digit = 10 + 26 + (C -'A');
911 }else {
912Error =true;
913return 0;
914 }
915
916if (!mulAssign(Value, 62))
917return 0;
918
919if (!addAssign(Value, Digit))
920return 0;
921 }
922
923if (!addAssign(Value, 1))
924return 0;
925
926returnValue;
927}
928
929// Parses a decimal number that had been encoded without any leading zeros.
930//
931// <decimal-number> = "0"
932// | <1-9> {<0-9>}
933uint64_t Demangler::parseDecimalNumber() {
934charC = look();
935if (!isDigit(C)) {
936Error =true;
937return 0;
938 }
939
940if (C =='0') {
941consume();
942return 0;
943 }
944
945uint64_tValue = 0;
946
947while (isDigit(look())) {
948if (!mulAssign(Value, 10)) {
949Error =true;
950return 0;
951 }
952
953uint64_tD =consume() -'0';
954if (!addAssign(Value,D))
955return 0;
956 }
957
958returnValue;
959}
960
961// Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
962// value and stores hex digits in HexDigits. The return value is unspecified if
963// HexDigits.size() > 16.
964//
965// <hex-number> = "0_"
966// | <1-9a-f> {<0-9a-f>} "_"
967uint64_t Demangler::parseHexNumber(std::string_view &HexDigits) {
968size_t Start = Position;
969uint64_tValue = 0;
970
971if (!isHexDigit(look()))
972Error =true;
973
974if (consumeIf('0')) {
975if (!consumeIf('_'))
976Error =true;
977 }else {
978while (!Error && !consumeIf('_')) {
979charC =consume();
980Value *= 16;
981if (isDigit(C))
982Value +=C -'0';
983elseif ('a' <=C &&C <='f')
984Value += 10 + (C -'a');
985else
986Error =true;
987 }
988 }
989
990if (Error) {
991 HexDigits = std::string_view();
992return 0;
993 }
994
995size_tEnd = Position - 1;
996assert(Start <End);
997 HexDigits = Input.substr(Start,End - Start);
998returnValue;
999}
1000
1001void Demangler::print(charC) {
1002if (Error || !Print)
1003return;
1004
1005 Output +=C;
1006}
1007
1008void Demangler::print(std::string_view S) {
1009if (Error || !Print)
1010return;
1011
1012 Output += S;
1013}
1014
1015void Demangler::printDecimalNumber(uint64_tN) {
1016if (Error || !Print)
1017return;
1018
1019 Output <<N;
1020}
1021
1022// Prints a lifetime. An index 0 always represents an erased lifetime. Indices
1023// starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
1024// bound by one of the enclosing binders.
1025void Demangler::printLifetime(uint64_t Index) {
1026if (Index == 0) {
1027print("'_");
1028return;
1029 }
1030
1031if (Index - 1 >= BoundLifetimes) {
1032Error =true;
1033return;
1034 }
1035
1036uint64_tDepth = BoundLifetimes -Index;
1037print('\'');
1038if (Depth < 26) {
1039charC ='a' +Depth;
1040print(C);
1041 }else {
1042print('z');
1043 printDecimalNumber(Depth - 26 + 1);
1044 }
1045}
1046
1047staticinlinebool decodePunycodeDigit(charC,size_t &Value) {
1048if (isLower(C)) {
1049Value =C -'a';
1050returntrue;
1051 }
1052
1053if (isDigit(C)) {
1054Value = 26 + (C -'0');
1055returntrue;
1056 }
1057
1058returnfalse;
1059}
1060
1061staticvoid removeNullBytes(OutputBuffer &Output,size_t StartIdx) {
1062char *Buffer = Output.getBuffer();
1063char *Start = Buffer + StartIdx;
1064char *End = Buffer + Output.getCurrentPosition();
1065 Output.setCurrentPosition(std::remove(Start,End,'\0') - Buffer);
1066}
1067
1068// Encodes code point as UTF-8 and stores results in Output. Returns false if
1069// CodePoint is not a valid unicode scalar value.
1070staticinlineboolencodeUTF8(size_t CodePoint,char *Output) {
1071if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
1072returnfalse;
1073
1074if (CodePoint <= 0x7F) {
1075 Output[0] = CodePoint;
1076returntrue;
1077 }
1078
1079if (CodePoint <= 0x7FF) {
1080 Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
1081 Output[1] = 0x80 | (CodePoint & 0x3F);
1082returntrue;
1083 }
1084
1085if (CodePoint <= 0xFFFF) {
1086 Output[0] = 0xE0 | (CodePoint >> 12);
1087 Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
1088 Output[2] = 0x80 | (CodePoint & 0x3F);
1089returntrue;
1090 }
1091
1092if (CodePoint <= 0x10FFFF) {
1093 Output[0] = 0xF0 | (CodePoint >> 18);
1094 Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
1095 Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
1096 Output[3] = 0x80 | (CodePoint & 0x3F);
1097returntrue;
1098 }
1099
1100returnfalse;
1101}
1102
1103// Decodes string encoded using punycode and appends results to Output.
1104// Returns true if decoding was successful.
1105staticbool decodePunycode(std::string_view Input,OutputBuffer &Output) {
1106size_t OutputSize = Output.getCurrentPosition();
1107size_t InputIdx = 0;
1108
1109// Rust uses an underscore as a delimiter.
1110size_t DelimiterPos = std::string_view::npos;
1111for (size_tI = 0;I != Input.size(); ++I)
1112if (Input[I] =='_')
1113 DelimiterPos =I;
1114
1115if (DelimiterPos != std::string_view::npos) {
1116// Copy basic code points before the last delimiter to the output.
1117for (; InputIdx != DelimiterPos; ++InputIdx) {
1118charC = Input[InputIdx];
1119if (!isValid(C))
1120returnfalse;
1121// Code points are padded with zeros while decoding is in progress.
1122charUTF8[4] = {C};
1123 Output += std::string_view(UTF8, 4);
1124 }
1125// Skip over the delimiter.
1126 ++InputIdx;
1127 }
1128
1129size_tBase = 36;
1130size_t Skew = 38;
1131size_t Bias = 72;
1132size_tN = 0x80;
1133size_t TMin = 1;
1134size_t TMax = 26;
1135size_t Damp = 700;
1136
1137auto Adapt = [&](size_t Delta,size_t NumPoints) {
1138 Delta /= Damp;
1139 Delta += Delta / NumPoints;
1140 Damp = 2;
1141
1142size_tK = 0;
1143while (Delta > (Base - TMin) * TMax / 2) {
1144 Delta /=Base - TMin;
1145K +=Base;
1146 }
1147returnK + (((Base - TMin + 1) * Delta) / (Delta + Skew));
1148 };
1149
1150// Main decoding loop.
1151for (size_tI = 0; InputIdx != Input.size();I += 1) {
1152size_t OldI =I;
1153size_tW = 1;
1154size_tMax = std::numeric_limits<size_t>::max();
1155for (size_t K =Base;true;K +=Base) {
1156if (InputIdx == Input.size())
1157returnfalse;
1158charC = Input[InputIdx++];
1159size_t Digit = 0;
1160if (!decodePunycodeDigit(C, Digit))
1161returnfalse;
1162
1163if (Digit > (Max -I) / W)
1164returnfalse;
1165I += Digit *W;
1166
1167size_tT;
1168if (K <= Bias)
1169T = TMin;
1170elseif (K >= Bias + TMax)
1171T = TMax;
1172else
1173T =K - Bias;
1174
1175if (Digit <T)
1176break;
1177
1178if (W > Max / (Base -T))
1179returnfalse;
1180W *= (Base -T);
1181 }
1182size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
1183 Bias = Adapt(I - OldI, NumPoints);
1184
1185if (I / NumPoints > Max -N)
1186returnfalse;
1187N +=I / NumPoints;
1188I =I % NumPoints;
1189
1190// Insert N at position I in the output.
1191charUTF8[4] = {};
1192if (!encodeUTF8(N,UTF8))
1193returnfalse;
1194 Output.insert(OutputSize +I * 4,UTF8, 4);
1195 }
1196
1197 removeNullBytes(Output, OutputSize);
1198returntrue;
1199}
1200
1201void Demangler::printIdentifier(Identifier Ident) {
1202if (Error || !Print)
1203return;
1204
1205if (Ident.Punycode) {
1206if (!decodePunycode(Ident.Name, Output))
1207Error =true;
1208 }else {
1209print(Ident.Name);
1210 }
1211}
1212
1213char Demangler::look() const{
1214if (Error || Position >= Input.size())
1215return 0;
1216
1217return Input[Position];
1218}
1219
1220char Demangler::consume() {
1221if (Error || Position >= Input.size()) {
1222Error =true;
1223return 0;
1224 }
1225
1226return Input[Position++];
1227}
1228
1229bool Demangler::consumeIf(char Prefix) {
1230if (Error || Position >= Input.size() || Input[Position] != Prefix)
1231returnfalse;
1232
1233 Position += 1;
1234returntrue;
1235}
1236
1237/// Computes A + B. When computation wraps around sets the error and returns
1238/// false. Otherwise assigns the result to A and returns true.
1239bool Demangler::addAssign(uint64_t &A,uint64_tB) {
1240if (A > std::numeric_limits<uint64_t>::max() -B) {
1241Error =true;
1242returnfalse;
1243 }
1244
1245A +=B;
1246returntrue;
1247}
1248
1249/// Computes A * B. When computation wraps around sets the error and returns
1250/// false. Otherwise assigns the result to A and returns true.
1251bool Demangler::mulAssign(uint64_t &A,uint64_tB) {
1252if (B != 0 &&A > std::numeric_limits<uint64_t>::max() /B) {
1253Error =true;
1254returnfalse;
1255 }
1256
1257A *=B;
1258returntrue;
1259}
F64
static const LLT F64
Definition:AMDGPULegalizerInfo.cpp:288
F32
static const LLT F32
Definition:AMDGPULegalizerInfo.cpp:286
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition:ArchiveWriter.cpp:205
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Utility.h
Demangle.h
Name
std::string Name
Definition:ELFObjHandler.cpp:77
End
bool End
Definition:ELF_riscv.cpp:480
Demangler
itanium_demangle::ManglingParser< DefaultAllocator > Demangler
Definition:ItaniumDemangle.cpp:367
I
#define I(x, y, z)
Definition:MD5.cpp:58
isValid
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Definition:RustDemangle.cpp:181
isDigit
static bool isDigit(const char C)
Definition:RustDemangle.cpp:170
isAsciiPrintable
static bool isAsciiPrintable(uint64_t CodePoint)
Returns true if CodePoint represents a printable ASCII character.
Definition:RustDemangle.cpp:799
isHexDigit
static bool isHexDigit(const char C)
Definition:RustDemangle.cpp:172
isLower
static bool isLower(const char C)
Definition:RustDemangle.cpp:176
parseBasicType
static bool parseBasicType(char C, BasicType &Type)
Definition:RustDemangle.cpp:386
isUpper
static bool isUpper(const char C)
Definition:RustDemangle.cpp:178
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
StringViewExtras.h
starts_with
DEMANGLE_NAMESPACE_BEGIN bool starts_with(std::string_view self, char C) noexcept
Definition:StringViewExtras.h:24
Bool
@ Bool
Definition:TargetLibraryInfo.cpp:62
encodeUTF8
static void encodeUTF8(uint32_t UnicodeScalarValue, SmallVectorImpl< char > &Result)
encodeUTF8 - Encode UnicodeScalarValue in UTF-8 and append it to result.
Definition:YAMLParser.cpp:579
OutputBuffer
Definition:Utility.h:32
OutputBuffer::getBuffer
char * getBuffer()
Definition:Utility.h:180
OutputBuffer::setCurrentPosition
void setCurrentPosition(size_t NewPos)
Definition:Utility.h:171
OutputBuffer::getCurrentPosition
size_t getCurrentPosition() const
Definition:Utility.h:170
OutputBuffer::insert
void insert(size_t Pos, const char *S, size_t N)
Definition:Utility.h:160
ScopedOverride
Definition:Utility.h:185
T
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition:Error.h:160
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition:Type.h:45
llvm::Value
LLVM Value Representation.
Definition:Value.h:74
uint64_t
llvm::AMDGPU::HSAMD::ValueType::U64
@ U64
llvm::AMDGPU::HSAMD::ValueType::I8
@ I8
llvm::AMDGPU::HSAMD::ValueType::U8
@ U8
llvm::AMDGPU::HSAMD::ValueType::I16
@ I16
llvm::AMDGPU::HSAMD::ValueType::U32
@ U32
llvm::AMDGPU::HSAMD::ValueType::U16
@ U16
llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition:CallingConv.h:34
llvm::GraphProgram::Name
Name
Definition:GraphWriter.h:50
llvm::M68k::MemAddrModeKind::K
@ K
llvm::MCID::Variadic
@ Variadic
Definition:MCInstrDesc.h:150
llvm::RISCVFenceField::W
@ W
Definition:RISCVBaseInfo.h:374
llvm::codegenoptions::DebugTemplateNamesKind::Mangled
@ Mangled
llvm::dwarf::Index
Index
Definition:Dwarf.h:882
llvm::object::Identifier
@ Identifier
Definition:COFFModuleDefinition.cpp:34
llvm::pdb::PDB_BuiltinType::Char
@ Char
llvm::pdb::DbgHeaderType::Max
@ Max
llvm::rdf::Print
Print(const T &, const DataFlowGraph &) -> Print< T >
llvm::sampleprof::Base
@ Base
Definition:Discriminator.h:58
llvm::wasm::ValType::I32
@ I32
llvm::wasm::ValType::I64
@ I64
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition:AddressRanges.h:18
llvm::Depth
@ Depth
Definition:SIMachineScheduler.h:36
llvm::AtomicOrderingCABI::consume
@ consume
llvm::SwiftAsyncFramePointerMode::Never
@ Never
Never set the bit.
llvm::rustDemangle
char * rustDemangle(std::string_view MangledName)
Definition:RustDemangle.cpp:151
llvm::PrevailingType::Yes
@ Yes
llvm::UTF8
unsigned char UTF8
Definition:ConvertUTF.h:130
llvm::InlinerFunctionImportStatsOpts::No
@ No
llvm::demangle
std::string demangle(std::string_view MangledName)
Attempt to demangle a string using different demangling schemes.
Definition:Demangle.cpp:20
N
#define N

Generated on Thu Jul 17 2025 12:04:43 for LLVM by doxygen 1.9.6
[8]ページ先頭

©2009-2025 Movatter.jp