[llvm] [AArch64] Allow splitting bitmasks for ANDS, EOR and ORR. (PR #149095)
Ricardo Jesus via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 16 06:13:50 PDT 2025
https://github.com/rj-jesus created https://github.com/llvm/llvm-project/pull/149095
Fixes #148987.
>From 8ae4c47f72c425f94949ef827f69e3b080446667 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 16 Jul 2025 03:05:41 -0700
Subject: [PATCH 1/2] Add tests.
These are based on the simple preexisting AND tests.
---
... aarch64-split-logic-bitmask-immediate.ll} | 288 ++++++++++++++++++
1 file changed, 288 insertions(+)
rename llvm/test/CodeGen/AArch64/{aarch64-split-and-bitmask-immediate.ll => aarch64-split-logic-bitmask-immediate.ll} (51%)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll b/llvm/test/CodeGen/AArch64/aarch64-split-logic-bitmask-immediate.ll
similarity index 51%
rename from llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
rename to llvm/test/CodeGen/AArch64/aarch64-split-logic-bitmask-immediate.ll
index e31c9a072dc4b..d3fe968f32099 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-split-logic-bitmask-immediate.ll
@@ -263,3 +263,291 @@ entry:
%conv = zext i1 %cmp to i8
ret i8 %conv
}
+
+; Test ANDS.
+define i32 @test1_ands(i32 %a) {
+; CHECK-LABEL: test1_ands:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1024 // =0x400
+; CHECK-NEXT: movk w8, #32, lsl #16
+; CHECK-NEXT: ands w8, w0, w8
+; CHECK-NEXT: csel w0, w8, wzr, eq
+; CHECK-NEXT: ret
+entry:
+ %ands = and i32 %a, 2098176
+ %c = icmp eq i32 %ands, 0
+ %r = select i1 %c, i32 %ands, i32 0
+ ret i32 %r
+}
+
+; This constant should not be split because it can be handled by one mov.
+define i32 @test2_ands(i32 %a) {
+; CHECK-LABEL: test2_ands:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #135 // =0x87
+; CHECK-NEXT: ands w8, w0, w8
+; CHECK-NEXT: csel w0, w8, wzr, eq
+; CHECK-NEXT: ret
+entry:
+ %ands = and i32 %a, 135
+ %c = icmp eq i32 %ands, 0
+ %r = select i1 %c, i32 %ands, i32 0
+ ret i32 %r
+}
+
+; This constant should not be split because the split immediate is not valid
+; bitmask immediate.
+define i32 @test3_ands(i32 %a) {
+; CHECK-LABEL: test3_ands:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1024 // =0x400
+; CHECK-NEXT: movk w8, #33, lsl #16
+; CHECK-NEXT: ands w8, w0, w8
+; CHECK-NEXT: csel w0, w8, wzr, eq
+; CHECK-NEXT: ret
+entry:
+ %ands = and i32 %a, 2163712
+ %c = icmp eq i32 %ands, 0
+ %r = select i1 %c, i32 %ands, i32 0
+ ret i32 %r
+}
+
+define i64 @test4_ands(i64 %a) {
+; CHECK-LABEL: test4_ands:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1024 // =0x400
+; CHECK-NEXT: movk w8, #32, lsl #16
+; CHECK-NEXT: ands x8, x0, x8
+; CHECK-NEXT: csel x0, x8, xzr, eq
+; CHECK-NEXT: ret
+entry:
+ %ands = and i64 %a, 2098176
+ %c = icmp eq i64 %ands, 0
+ %r = select i1 %c, i64 %ands, i64 0
+ ret i64 %r
+}
+
+define i64 @test5_ands(i64 %a) {
+; CHECK-LABEL: test5_ands:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov x8, #16384 // =0x4000
+; CHECK-NEXT: movk x8, #2, lsl #32
+; CHECK-NEXT: ands x8, x0, x8
+; CHECK-NEXT: csel x0, x8, xzr, eq
+; CHECK-NEXT: ret
+entry:
+ %ands = and i64 %a, 8589950976
+ %c = icmp eq i64 %ands, 0
+ %r = select i1 %c, i64 %ands, i64 0
+ ret i64 %r
+}
+
+; This constant should not be split because it can be handled by one mov.
+define i64 @test6_ands(i64 %a) {
+; CHECK-LABEL: test6_ands:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #135 // =0x87
+; CHECK-NEXT: ands x8, x0, x8
+; CHECK-NEXT: csel x0, x8, xzr, eq
+; CHECK-NEXT: ret
+entry:
+ %ands = and i64 %a, 135
+ %c = icmp eq i64 %ands, 0
+ %r = select i1 %c, i64 %ands, i64 0
+ ret i64 %r
+}
+
+; This constant should not be split because the split immediate is not valid
+; bitmask immediate.
+define i64 @test7_ands(i64 %a) {
+; CHECK-LABEL: test7_ands:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1024 // =0x400
+; CHECK-NEXT: movk w8, #33, lsl #16
+; CHECK-NEXT: ands x8, x0, x8
+; CHECK-NEXT: csel x0, x8, xzr, eq
+; CHECK-NEXT: ret
+entry:
+ %ands = and i64 %a, 2163712
+ %c = icmp eq i64 %ands, 0
+ %r = select i1 %c, i64 %ands, i64 0
+ ret i64 %r
+}
+
+; Test EOR.
+define i32 @test1_eor(i32 %a) {
+; CHECK-LABEL: test1_eor:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1024 // =0x400
+; CHECK-NEXT: movk w8, #32, lsl #16
+; CHECK-NEXT: eor w0, w0, w8
+; CHECK-NEXT: ret
+entry:
+ %eor = xor i32 %a, 2098176
+ ret i32 %eor
+}
+
+; This constant should not be split because it can be handled by one mov.
+define i32 @test2_eor(i32 %a) {
+; CHECK-LABEL: test2_eor:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #135 // =0x87
+; CHECK-NEXT: eor w0, w0, w8
+; CHECK-NEXT: ret
+entry:
+ %eor = xor i32 %a, 135
+ ret i32 %eor
+}
+
+; This constant should not be split because the split immediate is not valid
+; bitmask immediate.
+define i32 @test3_eor(i32 %a) {
+; CHECK-LABEL: test3_eor:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1024 // =0x400
+; CHECK-NEXT: movk w8, #33, lsl #16
+; CHECK-NEXT: eor w0, w0, w8
+; CHECK-NEXT: ret
+entry:
+ %eor = xor i32 %a, 2163712
+ ret i32 %eor
+}
+
+define i64 @test4_eor(i64 %a) {
+; CHECK-LABEL: test4_eor:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1024 // =0x400
+; CHECK-NEXT: movk w8, #32, lsl #16
+; CHECK-NEXT: eor x0, x0, x8
+; CHECK-NEXT: ret
+entry:
+ %eor = xor i64 %a, 2098176
+ ret i64 %eor
+}
+
+define i64 @test5_eor(i64 %a) {
+; CHECK-LABEL: test5_eor:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov x8, #16384 // =0x4000
+; CHECK-NEXT: movk x8, #2, lsl #32
+; CHECK-NEXT: eor x0, x0, x8
+; CHECK-NEXT: ret
+entry:
+ %eor = xor i64 %a, 8589950976
+ ret i64 %eor
+}
+
+; This constant should not be split because it can be handled by one mov.
+define i64 @test6_eor(i64 %a) {
+; CHECK-LABEL: test6_eor:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #135 // =0x87
+; CHECK-NEXT: eor x0, x0, x8
+; CHECK-NEXT: ret
+entry:
+ %eor = xor i64 %a, 135
+ ret i64 %eor
+}
+
+; This constant should not be split because the split immediate is not valid
+; bitmask immediate.
+define i64 @test7_eor(i64 %a) {
+; CHECK-LABEL: test7_eor:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1024 // =0x400
+; CHECK-NEXT: movk w8, #33, lsl #16
+; CHECK-NEXT: eor x0, x0, x8
+; CHECK-NEXT: ret
+entry:
+ %eor = xor i64 %a, 2163712
+ ret i64 %eor
+}
+
+; Test ORR.
+define i32 @test1_or(i32 %a) {
+; CHECK-LABEL: test1_or:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1024 // =0x400
+; CHECK-NEXT: movk w8, #32, lsl #16
+; CHECK-NEXT: orr w0, w0, w8
+; CHECK-NEXT: ret
+entry:
+ %or = or i32 %a, 2098176
+ ret i32 %or
+}
+
+; This constant should not be split because it can be handled by one mov.
+define i32 @test2_or(i32 %a) {
+; CHECK-LABEL: test2_or:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #135 // =0x87
+; CHECK-NEXT: orr w0, w0, w8
+; CHECK-NEXT: ret
+entry:
+ %or = or i32 %a, 135
+ ret i32 %or
+}
+
+; This constant should not be split because the split immediate is not valid
+; bitmask immediate.
+define i32 @test3_or(i32 %a) {
+; CHECK-LABEL: test3_or:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1024 // =0x400
+; CHECK-NEXT: movk w8, #33, lsl #16
+; CHECK-NEXT: orr w0, w0, w8
+; CHECK-NEXT: ret
+entry:
+ %or = or i32 %a, 2163712
+ ret i32 %or
+}
+
+define i64 @test4_or(i64 %a) {
+; CHECK-LABEL: test4_or:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1024 // =0x400
+; CHECK-NEXT: movk w8, #32, lsl #16
+; CHECK-NEXT: orr x0, x0, x8
+; CHECK-NEXT: ret
+entry:
+ %or = or i64 %a, 2098176
+ ret i64 %or
+}
+
+define i64 @test5_or(i64 %a) {
+; CHECK-LABEL: test5_or:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov x8, #16384 // =0x4000
+; CHECK-NEXT: movk x8, #2, lsl #32
+; CHECK-NEXT: orr x0, x0, x8
+; CHECK-NEXT: ret
+entry:
+ %or = or i64 %a, 8589950976
+ ret i64 %or
+}
+
+; This constant should not be split because it can be handled by one mov.
+define i64 @test6_or(i64 %a) {
+; CHECK-LABEL: test6_or:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #135 // =0x87
+; CHECK-NEXT: orr x0, x0, x8
+; CHECK-NEXT: ret
+entry:
+ %or = or i64 %a, 135
+ ret i64 %or
+}
+
+; This constant should not be split because the split immediate is not valid
+; bitmask immediate.
+define i64 @test7_or(i64 %a) {
+; CHECK-LABEL: test7_or:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1024 // =0x400
+; CHECK-NEXT: movk w8, #33, lsl #16
+; CHECK-NEXT: orr x0, x0, x8
+; CHECK-NEXT: ret
+entry:
+ %or = or i64 %a, 2163712
+ ret i64 %or
+}
>From 3473c8a5314132461095dd5c74127105f1a112b7 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 16 Jul 2025 03:07:27 -0700
Subject: [PATCH 2/2] [AArch64] Allow splitting bitmasks for ANDS, EOR and ORR.
Fixes #148987.
---
.../Target/AArch64/AArch64MIPeepholeOpt.cpp | 46 +++++++++++++------
.../aarch64-split-logic-bitmask-immediate.ll | 45 ++++++++----------
2 files changed, 50 insertions(+), 41 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index 0ddd17cee1344..683692c4ecf20 100644
--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -8,8 +8,8 @@
//
// This pass performs below peephole optimizations on MIR level.
//
-// 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
-// MOVi64imm + ANDXrr ==> ANDXri + ANDXri
+// 1. MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri
+// MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri
//
// 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
// MOVi64imm + ADDXrr ==> ANDXri + ANDXri
@@ -126,7 +126,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI);
template <typename T>
- bool visitAND(unsigned Opc, MachineInstr &MI);
+ bool visitLogic(unsigned Opc, MachineInstr &MI, unsigned OtherOpc = 0);
bool visitORR(MachineInstr &MI);
bool visitCSEL(MachineInstr &MI);
bool visitINSERT(MachineInstr &MI);
@@ -194,24 +194,24 @@ static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
}
template <typename T>
-bool AArch64MIPeepholeOpt::visitAND(
- unsigned Opc, MachineInstr &MI) {
+bool AArch64MIPeepholeOpt::visitLogic(unsigned Opc, MachineInstr &MI,
+ unsigned OtherOpc) {
// Try below transformation.
//
- // MOVi32imm + ANDWrr ==> ANDWri + ANDWri
- // MOVi64imm + ANDXrr ==> ANDXri + ANDXri
+ // MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri
+ // MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri
//
// The mov pseudo instruction could be expanded to multiple mov instructions
// later. Let's try to split the constant operand of mov instruction into two
- // bitmask immediates. It makes only two AND instructions instead of multiple
- // mov + and instructions.
+ // bitmask immediates. It makes only two logic instructions instead of
+ // multiple mov + logic instructions.
return splitTwoPartImm<T>(
MI,
- [Opc](T Imm, unsigned RegSize, T &Imm0,
- T &Imm1) -> std::optional<OpcodePair> {
+ [Opc, OtherOpc](T Imm, unsigned RegSize, T &Imm0,
+ T &Imm1) -> std::optional<OpcodePair> {
if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1))
- return std::make_pair(Opc, Opc);
+ return std::make_pair(Opc, !OtherOpc ? Opc : OtherOpc);
return std::nullopt;
},
[&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
@@ -859,10 +859,28 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
Changed |= visitINSERT(MI);
break;
case AArch64::ANDWrr:
- Changed |= visitAND<uint32_t>(AArch64::ANDWri, MI);
+ Changed |= visitLogic<uint32_t>(AArch64::ANDWri, MI);
break;
case AArch64::ANDXrr:
- Changed |= visitAND<uint64_t>(AArch64::ANDXri, MI);
+ Changed |= visitLogic<uint64_t>(AArch64::ANDXri, MI);
+ break;
+ case AArch64::ANDSWrr:
+ Changed |= visitLogic<uint32_t>(AArch64::ANDWri, MI, AArch64::ANDSWri);
+ break;
+ case AArch64::ANDSXrr:
+ Changed |= visitLogic<uint64_t>(AArch64::ANDXri, MI, AArch64::ANDSXri);
+ break;
+ case AArch64::EORWrr:
+ Changed |= visitLogic<uint32_t>(AArch64::EORWri, MI);
+ break;
+ case AArch64::EORXrr:
+ Changed |= visitLogic<uint64_t>(AArch64::EORXri, MI);
+ break;
+ case AArch64::ORRWrr:
+ Changed |= visitLogic<uint32_t>(AArch64::ORRWri, MI);
+ break;
+ case AArch64::ORRXrr:
+ Changed |= visitLogic<uint64_t>(AArch64::ORRXri, MI);
break;
case AArch64::ORRWrs:
Changed |= visitORR(MI);
diff --git a/llvm/test/CodeGen/AArch64/aarch64-split-logic-bitmask-immediate.ll b/llvm/test/CodeGen/AArch64/aarch64-split-logic-bitmask-immediate.ll
index d3fe968f32099..4245eb7ce9418 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-split-logic-bitmask-immediate.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-split-logic-bitmask-immediate.ll
@@ -268,9 +268,8 @@ entry:
define i32 @test1_ands(i32 %a) {
; CHECK-LABEL: test1_ands:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #1024 // =0x400
-; CHECK-NEXT: movk w8, #32, lsl #16
-; CHECK-NEXT: ands w8, w0, w8
+; CHECK-NEXT: and w8, w0, #0x3ffc00
+; CHECK-NEXT: ands w8, w8, #0xffe007ff
; CHECK-NEXT: csel w0, w8, wzr, eq
; CHECK-NEXT: ret
entry:
@@ -315,9 +314,8 @@ entry:
define i64 @test4_ands(i64 %a) {
; CHECK-LABEL: test4_ands:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #1024 // =0x400
-; CHECK-NEXT: movk w8, #32, lsl #16
-; CHECK-NEXT: ands x8, x0, x8
+; CHECK-NEXT: and x8, x0, #0x3ffc00
+; CHECK-NEXT: ands x8, x8, #0xffffffffffe007ff
; CHECK-NEXT: csel x0, x8, xzr, eq
; CHECK-NEXT: ret
entry:
@@ -330,9 +328,8 @@ entry:
define i64 @test5_ands(i64 %a) {
; CHECK-LABEL: test5_ands:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov x8, #16384 // =0x4000
-; CHECK-NEXT: movk x8, #2, lsl #32
-; CHECK-NEXT: ands x8, x0, x8
+; CHECK-NEXT: and x8, x0, #0x3ffffc000
+; CHECK-NEXT: ands x8, x8, #0xfffffffe00007fff
; CHECK-NEXT: csel x0, x8, xzr, eq
; CHECK-NEXT: ret
entry:
@@ -378,9 +375,8 @@ entry:
define i32 @test1_eor(i32 %a) {
; CHECK-LABEL: test1_eor:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #1024 // =0x400
-; CHECK-NEXT: movk w8, #32, lsl #16
-; CHECK-NEXT: eor w0, w0, w8
+; CHECK-NEXT: eor w8, w0, #0x3ffc00
+; CHECK-NEXT: eor w0, w8, #0xffe007ff
; CHECK-NEXT: ret
entry:
%eor = xor i32 %a, 2098176
@@ -416,9 +412,8 @@ entry:
define i64 @test4_eor(i64 %a) {
; CHECK-LABEL: test4_eor:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #1024 // =0x400
-; CHECK-NEXT: movk w8, #32, lsl #16
-; CHECK-NEXT: eor x0, x0, x8
+; CHECK-NEXT: eor x8, x0, #0x3ffc00
+; CHECK-NEXT: eor x0, x8, #0xffffffffffe007ff
; CHECK-NEXT: ret
entry:
%eor = xor i64 %a, 2098176
@@ -428,9 +423,8 @@ entry:
define i64 @test5_eor(i64 %a) {
; CHECK-LABEL: test5_eor:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov x8, #16384 // =0x4000
-; CHECK-NEXT: movk x8, #2, lsl #32
-; CHECK-NEXT: eor x0, x0, x8
+; CHECK-NEXT: eor x8, x0, #0x3ffffc000
+; CHECK-NEXT: eor x0, x8, #0xfffffffe00007fff
; CHECK-NEXT: ret
entry:
%eor = xor i64 %a, 8589950976
@@ -467,9 +461,8 @@ entry:
define i32 @test1_or(i32 %a) {
; CHECK-LABEL: test1_or:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #1024 // =0x400
-; CHECK-NEXT: movk w8, #32, lsl #16
-; CHECK-NEXT: orr w0, w0, w8
+; CHECK-NEXT: orr w8, w0, #0x3ffc00
+; CHECK-NEXT: orr w0, w8, #0xffe007ff
; CHECK-NEXT: ret
entry:
%or = or i32 %a, 2098176
@@ -505,9 +498,8 @@ entry:
define i64 @test4_or(i64 %a) {
; CHECK-LABEL: test4_or:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #1024 // =0x400
-; CHECK-NEXT: movk w8, #32, lsl #16
-; CHECK-NEXT: orr x0, x0, x8
+; CHECK-NEXT: orr x8, x0, #0x3ffc00
+; CHECK-NEXT: orr x0, x8, #0xffffffffffe007ff
; CHECK-NEXT: ret
entry:
%or = or i64 %a, 2098176
@@ -517,9 +509,8 @@ entry:
define i64 @test5_or(i64 %a) {
; CHECK-LABEL: test5_or:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov x8, #16384 // =0x4000
-; CHECK-NEXT: movk x8, #2, lsl #32
-; CHECK-NEXT: orr x0, x0, x8
+; CHECK-NEXT: orr x8, x0, #0x3ffffc000
+; CHECK-NEXT: orr x0, x8, #0xfffffffe00007fff
; CHECK-NEXT: ret
entry:
%or = or i64 %a, 8589950976
More information about the llvm-commits
mailing list