- Notifications
You must be signed in to change notification settings - Fork14.5k
[KeyInstr] Inline asm atoms#149076
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.
Already on GitHub?Sign in to your account
base:main
Are you sure you want to change the base?
[KeyInstr] Inline asm atoms#149076
Conversation
@llvm/pr-subscribers-clang-codegen Author: Orlando Cazalet-Hyams (OCHyams) ChangesCopied an aarch64 test as it was the only one I could find that exercised both results-store codepaths. Full diff:https://github.com/llvm/llvm-project/pull/149076.diff 2 Files Affected:
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cppindex e0650067b9547..656b3a8ea067a 100644--- a/clang/lib/CodeGen/CGStmt.cpp+++ b/clang/lib/CodeGen/CGStmt.cpp@@ -2672,6 +2672,9 @@ static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect, llvm::ConstantAsMetadata::get(Loc))); }+ // Make inline-asm calls Key for the debug info feature Key Instructions.+ CGF.addInstToNewSourceAtom(&Result, nullptr);+ if (!NoConvergent && CGF.getLangOpts().assumeFunctionsAreConvergent()) // Conservatively, mark all inline asm blocks in CUDA or OpenCL as // convergent (meaning, they may call an intrinsically convergent op, such@@ -2750,6 +2753,7 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S, } }+ ApplyAtomGroup Grp(CGF.getDebugInfo()); LValue Dest = ResultRegDests[i]; // ResultTypeRequiresCast elements correspond to the first // ResultTypeRequiresCast.size() elements of RegResults.@@ -2757,7 +2761,8 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S, unsigned Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]); Address A = Dest.getAddress().withElementType(ResultRegTypes[i]); if (CGF.getTargetHooks().isScalarizableAsmOperand(CGF, TruncTy)) {- Builder.CreateStore(Tmp, A);+ llvm::StoreInst *S = Builder.CreateStore(Tmp, A);+ CGF.addInstToCurrentSourceAtom(S, S->getValueOperand()); continue; }diff --git a/clang/test/DebugInfo/KeyInstructions/asm.c b/clang/test/DebugInfo/KeyInstructions/asm.cnew file mode 100644index 0000000000000..0fb697554f5fc--- /dev/null+++ b/clang/test/DebugInfo/KeyInstructions/asm.c@@ -0,0 +1,83 @@+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5+// RUN: %clang_cc1 -triple aarch64 -target-feature +ls64 -O0 -emit-llvm -x c %s -o - -gkey-instructions -debug-info-kind=limited -gno-column-info | FileCheck %s+// Partially copied from clang/test/CodeGen/AArch64/ls64-inline-asm.c++// Check the inline asm call and result store are Key and distinct atoms.++struct foo { unsigned long long x[8]; };+// CHECK-LABEL: define dso_local void @load(+// CHECK-SAME: ptr noundef [[OUTPUT:%.*]], ptr noundef [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG5:![0-9]+]] {+// CHECK-NEXT: [[ENTRY:.*:]]+// CHECK-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 8+// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8+// CHECK-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 8+// CHECK-NEXT: #dbg_declare(ptr [[OUTPUT_ADDR]], [[META19:![0-9]+]], !DIExpression(), [[META20:![0-9]+]])+// CHECK-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8+// CHECK-NEXT: #dbg_declare(ptr [[ADDR_ADDR]], [[META21:![0-9]+]], !DIExpression(), [[META20]])+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 8, !dbg [[DBG22:![0-9]+]]+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8, !dbg [[DBG22]]+// CHECK-NEXT: [[TMP2:%.*]] = call i512 asm sideeffect "ld64b $0,[$1]", "=r,r,~{memory}"(ptr [[TMP1]]) #[[ATTR1:[0-9]+]], !dbg [[DBG23:![0-9]+]], !srcloc [[META24:![0-9]+]]+// CHECK-NEXT: store i512 [[TMP2]], ptr [[TMP0]], align 8, !dbg [[DBG25:![0-9]+]]+// CHECK-NEXT: ret void, !dbg [[DBG26:![0-9]+]]+//+void load(struct foo *output, void *addr) {+ __asm__ volatile ("ld64b %0,[%1]" : "=r" (*output) : "r" (addr) : "memory");+}++// CHECK-LABEL: define dso_local void @load2(+// CHECK-SAME: ptr noundef [[OUTPUT:%.*]], ptr noundef [[ADDR:%.*]]) #[[ATTR0]] !dbg [[DBG27:![0-9]+]] {+// CHECK-NEXT: [[ENTRY:.*:]]+// CHECK-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 8+// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8+// CHECK-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 8+// CHECK-NEXT: #dbg_declare(ptr [[OUTPUT_ADDR]], [[META32:![0-9]+]], !DIExpression(), [[META33:![0-9]+]])+// CHECK-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8+// CHECK-NEXT: #dbg_declare(ptr [[ADDR_ADDR]], [[META34:![0-9]+]], !DIExpression(), [[META33]])+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 8, !dbg [[DBG35:![0-9]+]]+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8, !dbg [[DBG35]]+// CHECK-NEXT: [[TMP2:%.*]] = call i32 asm sideeffect "ld64b $0,[$1]", "=r,r,~{memory}"(ptr [[TMP1]]) #[[ATTR1]], !dbg [[DBG36:![0-9]+]], !srcloc [[META37:![0-9]+]]+// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP0]], align 4, !dbg [[DBG38:![0-9]+]]+// CHECK-NEXT: ret void, !dbg [[DBG39:![0-9]+]]+//+void load2(int *output, void *addr) {+ __asm__ volatile ("ld64b %0,[%1]" : "=r" (*output) : "r" (addr) : "memory");+}+//.+// CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C11, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)+// CHECK: [[META1]] = !DIFile(filename: "{{.*}}<stdin>", directory: {{.*}})+// CHECK: [[DBG5]] = distinct !DISubprogram(name: "load", scope: [[META6:![0-9]+]], file: [[META6]], line: 23, type: [[META7:![0-9]+]], scopeLine: 23, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META0]], retainedNodes: [[META18:![0-9]+]], keyInstructions: true)+// CHECK: [[META6]] = !DIFile(filename: "{{.*}}asm.c", directory: {{.*}})+// CHECK: [[META7]] = !DISubroutineType(types: [[META8:![0-9]+]])+// CHECK: [[META8]] = !{null, [[META9:![0-9]+]], [[META17:![0-9]+]]}+// CHECK: [[META9]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META10:![0-9]+]], size: 64)+// CHECK: [[META10]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "foo", file: [[META6]], line: 7, size: 512, elements: [[META11:![0-9]+]])+// CHECK: [[META11]] = !{[[META12:![0-9]+]]}+// CHECK: [[META12]] = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: [[META10]], file: [[META6]], line: 7, baseType: [[META13:![0-9]+]], size: 512)+// CHECK: [[META13]] = !DICompositeType(tag: DW_TAG_array_type, baseType: [[META14:![0-9]+]], size: 512, elements: [[META15:![0-9]+]])+// CHECK: [[META14]] = !DIBasicType(name: "unsigned long long", size: 64, encoding: DW_ATE_unsigned)+// CHECK: [[META15]] = !{[[META16:![0-9]+]]}+// CHECK: [[META16]] = !DISubrange(count: 8)+// CHECK: [[META17]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64)+// CHECK: [[META18]] = !{}+// CHECK: [[META19]] = !DILocalVariable(name: "output", arg: 1, scope: [[DBG5]], file: [[META6]], line: 23, type: [[META9]])+// CHECK: [[META20]] = !DILocation(line: 23, scope: [[DBG5]])+// CHECK: [[META21]] = !DILocalVariable(name: "addr", arg: 2, scope: [[DBG5]], file: [[META6]], line: 23, type: [[META17]])+// CHECK: [[DBG22]] = !DILocation(line: 24, scope: [[DBG5]])+// CHECK: [[DBG23]] = !DILocation(line: 24, scope: [[DBG5]], atomGroup: 1, atomRank: 1)+// CHECK: [[META24]] = !{i64 1663}+// CHECK: [[DBG25]] = !DILocation(line: 24, scope: [[DBG5]], atomGroup: 2, atomRank: 1)+// CHECK: [[DBG26]] = !DILocation(line: 25, scope: [[DBG5]], atomGroup: 3, atomRank: 1)+// CHECK: [[DBG27]] = distinct !DISubprogram(name: "load2", scope: [[META6]], file: [[META6]], line: 42, type: [[META28:![0-9]+]], scopeLine: 42, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META0]], retainedNodes: [[META18]], keyInstructions: true)+// CHECK: [[META28]] = !DISubroutineType(types: [[META29:![0-9]+]])+// CHECK: [[META29]] = !{null, [[META30:![0-9]+]], [[META17]]}+// CHECK: [[META30]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META31:![0-9]+]], size: 64)+// CHECK: [[META31]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)+// CHECK: [[META32]] = !DILocalVariable(name: "output", arg: 1, scope: [[DBG27]], file: [[META6]], line: 42, type: [[META30]])+// CHECK: [[META33]] = !DILocation(line: 42, scope: [[DBG27]])+// CHECK: [[META34]] = !DILocalVariable(name: "addr", arg: 2, scope: [[DBG27]], file: [[META6]], line: 42, type: [[META17]])+// CHECK: [[DBG35]] = !DILocation(line: 43, scope: [[DBG27]])+// CHECK: [[DBG36]] = !DILocation(line: 43, scope: [[DBG27]], atomGroup: 1, atomRank: 1)+// CHECK: [[META37]] = !{i64 2918}+// CHECK: [[DBG38]] = !DILocation(line: 43, scope: [[DBG27]], atomGroup: 2, atomRank: 1)+// CHECK: [[DBG39]] = !DILocation(line: 44, scope: [[DBG27]], atomGroup: 3, atomRank: 1)+//. |
@llvm/pr-subscribers-clang Author: Orlando Cazalet-Hyams (OCHyams) ChangesCopied an aarch64 test as it was the only one I could find that exercised both results-store codepaths. Full diff:https://github.com/llvm/llvm-project/pull/149076.diff 2 Files Affected:
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cppindex e0650067b9547..656b3a8ea067a 100644--- a/clang/lib/CodeGen/CGStmt.cpp+++ b/clang/lib/CodeGen/CGStmt.cpp@@ -2672,6 +2672,9 @@ static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect, llvm::ConstantAsMetadata::get(Loc))); }+ // Make inline-asm calls Key for the debug info feature Key Instructions.+ CGF.addInstToNewSourceAtom(&Result, nullptr);+ if (!NoConvergent && CGF.getLangOpts().assumeFunctionsAreConvergent()) // Conservatively, mark all inline asm blocks in CUDA or OpenCL as // convergent (meaning, they may call an intrinsically convergent op, such@@ -2750,6 +2753,7 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S, } }+ ApplyAtomGroup Grp(CGF.getDebugInfo()); LValue Dest = ResultRegDests[i]; // ResultTypeRequiresCast elements correspond to the first // ResultTypeRequiresCast.size() elements of RegResults.@@ -2757,7 +2761,8 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S, unsigned Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]); Address A = Dest.getAddress().withElementType(ResultRegTypes[i]); if (CGF.getTargetHooks().isScalarizableAsmOperand(CGF, TruncTy)) {- Builder.CreateStore(Tmp, A);+ llvm::StoreInst *S = Builder.CreateStore(Tmp, A);+ CGF.addInstToCurrentSourceAtom(S, S->getValueOperand()); continue; }diff --git a/clang/test/DebugInfo/KeyInstructions/asm.c b/clang/test/DebugInfo/KeyInstructions/asm.cnew file mode 100644index 0000000000000..0fb697554f5fc--- /dev/null+++ b/clang/test/DebugInfo/KeyInstructions/asm.c@@ -0,0 +1,83 @@+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5+// RUN: %clang_cc1 -triple aarch64 -target-feature +ls64 -O0 -emit-llvm -x c %s -o - -gkey-instructions -debug-info-kind=limited -gno-column-info | FileCheck %s+// Partially copied from clang/test/CodeGen/AArch64/ls64-inline-asm.c++// Check the inline asm call and result store are Key and distinct atoms.++struct foo { unsigned long long x[8]; };+// CHECK-LABEL: define dso_local void @load(+// CHECK-SAME: ptr noundef [[OUTPUT:%.*]], ptr noundef [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG5:![0-9]+]] {+// CHECK-NEXT: [[ENTRY:.*:]]+// CHECK-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 8+// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8+// CHECK-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 8+// CHECK-NEXT: #dbg_declare(ptr [[OUTPUT_ADDR]], [[META19:![0-9]+]], !DIExpression(), [[META20:![0-9]+]])+// CHECK-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8+// CHECK-NEXT: #dbg_declare(ptr [[ADDR_ADDR]], [[META21:![0-9]+]], !DIExpression(), [[META20]])+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 8, !dbg [[DBG22:![0-9]+]]+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8, !dbg [[DBG22]]+// CHECK-NEXT: [[TMP2:%.*]] = call i512 asm sideeffect "ld64b $0,[$1]", "=r,r,~{memory}"(ptr [[TMP1]]) #[[ATTR1:[0-9]+]], !dbg [[DBG23:![0-9]+]], !srcloc [[META24:![0-9]+]]+// CHECK-NEXT: store i512 [[TMP2]], ptr [[TMP0]], align 8, !dbg [[DBG25:![0-9]+]]+// CHECK-NEXT: ret void, !dbg [[DBG26:![0-9]+]]+//+void load(struct foo *output, void *addr) {+ __asm__ volatile ("ld64b %0,[%1]" : "=r" (*output) : "r" (addr) : "memory");+}++// CHECK-LABEL: define dso_local void @load2(+// CHECK-SAME: ptr noundef [[OUTPUT:%.*]], ptr noundef [[ADDR:%.*]]) #[[ATTR0]] !dbg [[DBG27:![0-9]+]] {+// CHECK-NEXT: [[ENTRY:.*:]]+// CHECK-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 8+// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8+// CHECK-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 8+// CHECK-NEXT: #dbg_declare(ptr [[OUTPUT_ADDR]], [[META32:![0-9]+]], !DIExpression(), [[META33:![0-9]+]])+// CHECK-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8+// CHECK-NEXT: #dbg_declare(ptr [[ADDR_ADDR]], [[META34:![0-9]+]], !DIExpression(), [[META33]])+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 8, !dbg [[DBG35:![0-9]+]]+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8, !dbg [[DBG35]]+// CHECK-NEXT: [[TMP2:%.*]] = call i32 asm sideeffect "ld64b $0,[$1]", "=r,r,~{memory}"(ptr [[TMP1]]) #[[ATTR1]], !dbg [[DBG36:![0-9]+]], !srcloc [[META37:![0-9]+]]+// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP0]], align 4, !dbg [[DBG38:![0-9]+]]+// CHECK-NEXT: ret void, !dbg [[DBG39:![0-9]+]]+//+void load2(int *output, void *addr) {+ __asm__ volatile ("ld64b %0,[%1]" : "=r" (*output) : "r" (addr) : "memory");+}+//.+// CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C11, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)+// CHECK: [[META1]] = !DIFile(filename: "{{.*}}<stdin>", directory: {{.*}})+// CHECK: [[DBG5]] = distinct !DISubprogram(name: "load", scope: [[META6:![0-9]+]], file: [[META6]], line: 23, type: [[META7:![0-9]+]], scopeLine: 23, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META0]], retainedNodes: [[META18:![0-9]+]], keyInstructions: true)+// CHECK: [[META6]] = !DIFile(filename: "{{.*}}asm.c", directory: {{.*}})+// CHECK: [[META7]] = !DISubroutineType(types: [[META8:![0-9]+]])+// CHECK: [[META8]] = !{null, [[META9:![0-9]+]], [[META17:![0-9]+]]}+// CHECK: [[META9]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META10:![0-9]+]], size: 64)+// CHECK: [[META10]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "foo", file: [[META6]], line: 7, size: 512, elements: [[META11:![0-9]+]])+// CHECK: [[META11]] = !{[[META12:![0-9]+]]}+// CHECK: [[META12]] = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: [[META10]], file: [[META6]], line: 7, baseType: [[META13:![0-9]+]], size: 512)+// CHECK: [[META13]] = !DICompositeType(tag: DW_TAG_array_type, baseType: [[META14:![0-9]+]], size: 512, elements: [[META15:![0-9]+]])+// CHECK: [[META14]] = !DIBasicType(name: "unsigned long long", size: 64, encoding: DW_ATE_unsigned)+// CHECK: [[META15]] = !{[[META16:![0-9]+]]}+// CHECK: [[META16]] = !DISubrange(count: 8)+// CHECK: [[META17]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64)+// CHECK: [[META18]] = !{}+// CHECK: [[META19]] = !DILocalVariable(name: "output", arg: 1, scope: [[DBG5]], file: [[META6]], line: 23, type: [[META9]])+// CHECK: [[META20]] = !DILocation(line: 23, scope: [[DBG5]])+// CHECK: [[META21]] = !DILocalVariable(name: "addr", arg: 2, scope: [[DBG5]], file: [[META6]], line: 23, type: [[META17]])+// CHECK: [[DBG22]] = !DILocation(line: 24, scope: [[DBG5]])+// CHECK: [[DBG23]] = !DILocation(line: 24, scope: [[DBG5]], atomGroup: 1, atomRank: 1)+// CHECK: [[META24]] = !{i64 1663}+// CHECK: [[DBG25]] = !DILocation(line: 24, scope: [[DBG5]], atomGroup: 2, atomRank: 1)+// CHECK: [[DBG26]] = !DILocation(line: 25, scope: [[DBG5]], atomGroup: 3, atomRank: 1)+// CHECK: [[DBG27]] = distinct !DISubprogram(name: "load2", scope: [[META6]], file: [[META6]], line: 42, type: [[META28:![0-9]+]], scopeLine: 42, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META0]], retainedNodes: [[META18]], keyInstructions: true)+// CHECK: [[META28]] = !DISubroutineType(types: [[META29:![0-9]+]])+// CHECK: [[META29]] = !{null, [[META30:![0-9]+]], [[META17]]}+// CHECK: [[META30]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META31:![0-9]+]], size: 64)+// CHECK: [[META31]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)+// CHECK: [[META32]] = !DILocalVariable(name: "output", arg: 1, scope: [[DBG27]], file: [[META6]], line: 42, type: [[META30]])+// CHECK: [[META33]] = !DILocation(line: 42, scope: [[DBG27]])+// CHECK: [[META34]] = !DILocalVariable(name: "addr", arg: 2, scope: [[DBG27]], file: [[META6]], line: 42, type: [[META17]])+// CHECK: [[DBG35]] = !DILocation(line: 43, scope: [[DBG27]])+// CHECK: [[DBG36]] = !DILocation(line: 43, scope: [[DBG27]], atomGroup: 1, atomRank: 1)+// CHECK: [[META37]] = !{i64 2918}+// CHECK: [[DBG38]] = !DILocation(line: 43, scope: [[DBG27]], atomGroup: 2, atomRank: 1)+// CHECK: [[DBG39]] = !DILocation(line: 44, scope: [[DBG27]], atomGroup: 3, atomRank: 1)+//. |
Copied an aarch64 test as it was the only one I could find that exercised both results-store codepaths.