- Notifications
You must be signed in to change notification settings - Fork14.5k
[AArch64] Allow splitting bitmasks for ANDS.#149095
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.
Already on GitHub?Sign in to your account
base:main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: Ricardo Jesus (rj-jesus) ChangesFixes #148987. Full diff:https://github.com/llvm/llvm-project/pull/149095.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cppindex 0ddd17cee1344..683692c4ecf20 100644--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp@@ -8,8 +8,8 @@ // // This pass performs below peephole optimizations on MIR level. //-// 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri-// MOVi64imm + ANDXrr ==> ANDXri + ANDXri+// 1. MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri+// MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri // // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi // MOVi64imm + ADDXrr ==> ANDXri + ANDXri@@ -126,7 +126,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass { bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI); template <typename T>- bool visitAND(unsigned Opc, MachineInstr &MI);+ bool visitLogic(unsigned Opc, MachineInstr &MI, unsigned OtherOpc = 0); bool visitORR(MachineInstr &MI); bool visitCSEL(MachineInstr &MI); bool visitINSERT(MachineInstr &MI);@@ -194,24 +194,24 @@ static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) { } template <typename T>-bool AArch64MIPeepholeOpt::visitAND(- unsigned Opc, MachineInstr &MI) {+bool AArch64MIPeepholeOpt::visitLogic(unsigned Opc, MachineInstr &MI,+ unsigned OtherOpc) { // Try below transformation. //- // MOVi32imm + ANDWrr ==> ANDWri + ANDWri- // MOVi64imm + ANDXrr ==> ANDXri + ANDXri+ // MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri+ // MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri // // The mov pseudo instruction could be expanded to multiple mov instructions // later. Let's try to split the constant operand of mov instruction into two- // bitmask immediates. It makes only two AND instructions instead of multiple- // mov + and instructions.+ // bitmask immediates. It makes only two logic instructions instead of+ // multiple mov + logic instructions. return splitTwoPartImm<T>( MI,- [Opc](T Imm, unsigned RegSize, T &Imm0,- T &Imm1) -> std::optional<OpcodePair> {+ [Opc, OtherOpc](T Imm, unsigned RegSize, T &Imm0,+ T &Imm1) -> std::optional<OpcodePair> { if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1))- return std::make_pair(Opc, Opc);+ return std::make_pair(Opc, !OtherOpc ? Opc : OtherOpc); return std::nullopt; }, [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,@@ -859,10 +859,28 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { Changed |= visitINSERT(MI); break; case AArch64::ANDWrr:- Changed |= visitAND<uint32_t>(AArch64::ANDWri, MI);+ Changed |= visitLogic<uint32_t>(AArch64::ANDWri, MI); break; case AArch64::ANDXrr:- Changed |= visitAND<uint64_t>(AArch64::ANDXri, MI);+ Changed |= visitLogic<uint64_t>(AArch64::ANDXri, MI);+ break;+ case AArch64::ANDSWrr:+ Changed |= visitLogic<uint32_t>(AArch64::ANDWri, MI, AArch64::ANDSWri);+ break;+ case AArch64::ANDSXrr:+ Changed |= visitLogic<uint64_t>(AArch64::ANDXri, MI, AArch64::ANDSXri);+ break;+ case AArch64::EORWrr:+ Changed |= visitLogic<uint32_t>(AArch64::EORWri, MI);+ break;+ case AArch64::EORXrr:+ Changed |= visitLogic<uint64_t>(AArch64::EORXri, MI);+ break;+ case AArch64::ORRWrr:+ Changed |= visitLogic<uint32_t>(AArch64::ORRWri, MI);+ break;+ case AArch64::ORRXrr:+ Changed |= visitLogic<uint64_t>(AArch64::ORRXri, MI); break; case AArch64::ORRWrs: Changed |= visitORR(MI);diff --git a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll b/llvm/test/CodeGen/AArch64/aarch64-split-logic-bitmask-immediate.llsimilarity index 52%rename from llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.llrename to llvm/test/CodeGen/AArch64/aarch64-split-logic-bitmask-immediate.llindex e31c9a072dc4b..4245eb7ce9418 100644--- a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll+++ b/llvm/test/CodeGen/AArch64/aarch64-split-logic-bitmask-immediate.ll@@ -263,3 +263,282 @@ entry: %conv = zext i1 %cmp to i8 ret i8 %conv }++; Test ANDS.+define i32 @test1_ands(i32 %a) {+; CHECK-LABEL: test1_ands:+; CHECK: // %bb.0: // %entry+; CHECK-NEXT: and w8, w0, #0x3ffc00+; CHECK-NEXT: ands w8, w8, #0xffe007ff+; CHECK-NEXT: csel w0, w8, wzr, eq+; CHECK-NEXT: ret+entry:+ %ands = and i32 %a, 2098176+ %c = icmp eq i32 %ands, 0+ %r = select i1 %c, i32 %ands, i32 0+ ret i32 %r+}++; This constant should not be split because it can be handled by one mov.+define i32 @test2_ands(i32 %a) {+; CHECK-LABEL: test2_ands:+; CHECK: // %bb.0: // %entry+; CHECK-NEXT: mov w8, #135 // =0x87+; CHECK-NEXT: ands w8, w0, w8+; CHECK-NEXT: csel w0, w8, wzr, eq+; CHECK-NEXT: ret+entry:+ %ands = and i32 %a, 135+ %c = icmp eq i32 %ands, 0+ %r = select i1 %c, i32 %ands, i32 0+ ret i32 %r+}++; This constant should not be split because the split immediate is not valid+; bitmask immediate.+define i32 @test3_ands(i32 %a) {+; CHECK-LABEL: test3_ands:+; CHECK: // %bb.0: // %entry+; CHECK-NEXT: mov w8, #1024 // =0x400+; CHECK-NEXT: movk w8, #33, lsl #16+; CHECK-NEXT: ands w8, w0, w8+; CHECK-NEXT: csel w0, w8, wzr, eq+; CHECK-NEXT: ret+entry:+ %ands = and i32 %a, 2163712+ %c = icmp eq i32 %ands, 0+ %r = select i1 %c, i32 %ands, i32 0+ ret i32 %r+}++define i64 @test4_ands(i64 %a) {+; CHECK-LABEL: test4_ands:+; CHECK: // %bb.0: // %entry+; CHECK-NEXT: and x8, x0, #0x3ffc00+; CHECK-NEXT: ands x8, x8, #0xffffffffffe007ff+; CHECK-NEXT: csel x0, x8, xzr, eq+; CHECK-NEXT: ret+entry:+ %ands = and i64 %a, 2098176+ %c = icmp eq i64 %ands, 0+ %r = select i1 %c, i64 %ands, i64 0+ ret i64 %r+}++define i64 @test5_ands(i64 %a) {+; CHECK-LABEL: test5_ands:+; CHECK: // %bb.0: // %entry+; CHECK-NEXT: and x8, x0, #0x3ffffc000+; CHECK-NEXT: ands x8, x8, #0xfffffffe00007fff+; CHECK-NEXT: csel x0, x8, xzr, eq+; CHECK-NEXT: ret+entry:+ %ands = and i64 %a, 8589950976+ %c = icmp eq i64 %ands, 0+ %r = select i1 %c, i64 %ands, i64 0+ ret i64 %r+}++; This constant should not be split because it can be handled by one mov.+define i64 @test6_ands(i64 %a) {+; CHECK-LABEL: test6_ands:+; CHECK: // %bb.0: // %entry+; CHECK-NEXT: mov w8, #135 // =0x87+; CHECK-NEXT: ands x8, x0, x8+; CHECK-NEXT: csel x0, x8, xzr, eq+; CHECK-NEXT: ret+entry:+ %ands = and i64 %a, 135+ %c = icmp eq i64 %ands, 0+ %r = select i1 %c, i64 %ands, i64 0+ ret i64 %r+}++; This constant should not be split because the split immediate is not valid+; bitmask immediate.+define i64 @test7_ands(i64 %a) {+; CHECK-LABEL: test7_ands:+; CHECK: // %bb.0: // %entry+; CHECK-NEXT: mov w8, #1024 // =0x400+; CHECK-NEXT: movk w8, #33, lsl #16+; CHECK-NEXT: ands x8, x0, x8+; CHECK-NEXT: csel x0, x8, xzr, eq+; CHECK-NEXT: ret+entry:+ %ands = and i64 %a, 2163712+ %c = icmp eq i64 %ands, 0+ %r = select i1 %c, i64 %ands, i64 0+ ret i64 %r+}++; Test EOR.+define i32 @test1_eor(i32 %a) {+; CHECK-LABEL: test1_eor:+; CHECK: // %bb.0: // %entry+; CHECK-NEXT: eor w8, w0, #0x3ffc00+; CHECK-NEXT: eor w0, w8, #0xffe007ff+; CHECK-NEXT: ret+entry:+ %eor = xor i32 %a, 2098176+ ret i32 %eor+}++; This constant should not be split because it can be handled by one mov.+define i32 @test2_eor(i32 %a) {+; CHECK-LABEL: test2_eor:+; CHECK: // %bb.0: // %entry+; CHECK-NEXT: mov w8, #135 // =0x87+; CHECK-NEXT: eor w0, w0, w8+; CHECK-NEXT: ret+entry:+ %eor = xor i32 %a, 135+ ret i32 %eor+}++; This constant should not be split because the split immediate is not valid+; bitmask immediate.+define i32 @test3_eor(i32 %a) {+; CHECK-LABEL: test3_eor:+; CHECK: // %bb.0: // %entry+; CHECK-NEXT: mov w8, #1024 // =0x400+; CHECK-NEXT: movk w8, #33, lsl #16+; CHECK-NEXT: eor w0, w0, w8+; CHECK-NEXT: ret+entry:+ %eor = xor i32 %a, 2163712+ ret i32 %eor+}++define i64 @test4_eor(i64 %a) {+; CHECK-LABEL: test4_eor:+; CHECK: // %bb.0: // %entry+; CHECK-NEXT: eor x8, x0, #0x3ffc00+; CHECK-NEXT: eor x0, x8, #0xffffffffffe007ff+; CHECK-NEXT: ret+entry:+ %eor = xor i64 %a, 2098176+ ret i64 %eor+}++define i64 @test5_eor(i64 %a) {+; CHECK-LABEL: test5_eor:+; CHECK: // %bb.0: // %entry+; CHECK-NEXT: eor x8, x0, #0x3ffffc000+; CHECK-NEXT: eor x0, x8, #0xfffffffe00007fff+; CHECK-NEXT: ret+entry:+ %eor = xor i64 %a, 8589950976+ ret i64 %eor+}++; This constant should not be split because it can be handled by one mov.+define i64 @test6_eor(i64 %a) {+; CHECK-LABEL: test6_eor:+; CHECK: // %bb.0: // %entry+; CHECK-NEXT: mov w8, #135 // =0x87+; CHECK-NEXT: eor x0, x0, x8+; CHECK-NEXT: ret+entry:+ %eor = xor i64 %a, 135+ ret i64 %eor+}++; This constant should not be split because the split immediate is not valid+; bitmask immediate.+define i64 @test7_eor(i64 %a) {+; CHECK-LABEL: test7_eor:+; CHECK: // %bb.0: // %entry+; CHECK-NEXT: mov w8, #1024 // =0x400+; CHECK-NEXT: movk w8, #33, lsl #16+; CHECK-NEXT: eor x0, x0, x8+; CHECK-NEXT: ret+entry:+ %eor = xor i64 %a, 2163712+ ret i64 %eor+}++; Test ORR.+define i32 @test1_or(i32 %a) {+; CHECK-LABEL: test1_or:+; CHECK: // %bb.0: // %entry+; CHECK-NEXT: orr w8, w0, #0x3ffc00+; CHECK-NEXT: orr w0, w8, #0xffe007ff+; CHECK-NEXT: ret+entry:+ %or = or i32 %a, 2098176+ ret i32 %or+}++; This constant should not be split because it can be handled by one mov.+define i32 @test2_or(i32 %a) {+; CHECK-LABEL: test2_or:+; CHECK: // %bb.0: // %entry+; CHECK-NEXT: mov w8, #135 // =0x87+; CHECK-NEXT: orr w0, w0, w8+; CHECK-NEXT: ret+entry:+ %or = or i32 %a, 135+ ret i32 %or+}++; This constant should not be split because the split immediate is not valid+; bitmask immediate.+define i32 @test3_or(i32 %a) {+; CHECK-LABEL: test3_or:+; CHECK: // %bb.0: // %entry+; CHECK-NEXT: mov w8, #1024 // =0x400+; CHECK-NEXT: movk w8, #33, lsl #16+; CHECK-NEXT: orr w0, w0, w8+; CHECK-NEXT: ret+entry:+ %or = or i32 %a, 2163712+ ret i32 %or+}++define i64 @test4_or(i64 %a) {+; CHECK-LABEL: test4_or:+; CHECK: // %bb.0: // %entry+; CHECK-NEXT: orr x8, x0, #0x3ffc00+; CHECK-NEXT: orr x0, x8, #0xffffffffffe007ff+; CHECK-NEXT: ret+entry:+ %or = or i64 %a, 2098176+ ret i64 %or+}++define i64 @test5_or(i64 %a) {+; CHECK-LABEL: test5_or:+; CHECK: // %bb.0: // %entry+; CHECK-NEXT: orr x8, x0, #0x3ffffc000+; CHECK-NEXT: orr x0, x8, #0xfffffffe00007fff+; CHECK-NEXT: ret+entry:+ %or = or i64 %a, 8589950976+ ret i64 %or+}++; This constant should not be split because it can be handled by one mov.+define i64 @test6_or(i64 %a) {+; CHECK-LABEL: test6_or:+; CHECK: // %bb.0: // %entry+; CHECK-NEXT: mov w8, #135 // =0x87+; CHECK-NEXT: orr x0, x0, x8+; CHECK-NEXT: ret+entry:+ %or = or i64 %a, 135+ ret i64 %or+}++; This constant should not be split because the split immediate is not valid+; bitmask immediate.+define i64 @test7_or(i64 %a) {+; CHECK-LABEL: test7_or:+; CHECK: // %bb.0: // %entry+; CHECK-NEXT: mov w8, #1024 // =0x400+; CHECK-NEXT: movk w8, #33, lsl #16+; CHECK-NEXT: orr x0, x0, x8+; CHECK-NEXT: ret+entry:+ %or = or i64 %a, 2163712+ ret i64 %or+} |
@@ -263,3 +263,282 @@ entry: | |||
%conv = zext i1 %cmp to i8 | |||
ret i8 %conv | |||
} | |||
; Test ANDS. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others.Learn more.
The new tests are based on the simple preexisting AND tests.
Uh oh!
There was an error while loading.Please reload this page.
These are based on the simple preexisting AND tests.
We already do this for AND; we can reuse the same infrastructure forANDS so long as the second instruction of the pair is ANDS.
3473c8a
to2543774
CompareI've narrowed the scope of this PR to ANDS and will look into adding support for EOR and ORR separately. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others.Learn more.
I think this sounds OK. If you can update the test cases a bit then it LGTM.
Uh oh!
There was an error while loading.Please reload this page.
Uh oh!
There was an error while loading.Please reload this page.
This is already done for AND; we can reuse its infrastructure for
ANDS so long as the second instruction of the pair is ANDS.