[llvm] [AArch64] Allow splitting bitmasks for ANDS, EOR and ORR. (PR #149095)
Ricardo Jesus via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 17 08:55:29 PDT 2025
https://github.com/rj-jesus updated https://github.com/llvm/llvm-project/pull/149095
>From fcba958dea86b9134c29553fda66c3b603b4221a Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 16 Jul 2025 03:05:41 -0700
Subject: [PATCH 1/2] Add tests.
These are based on the simple preexisting AND tests.
---
.../aarch64-split-and-bitmask-immediate.ll | 110 ++++++++++++++++++
1 file changed, 110 insertions(+)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
index e31c9a072dc4b..79a97bb805a20 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
@@ -263,3 +263,113 @@ entry:
%conv = zext i1 %cmp to i8
ret i8 %conv
}
+
+; Test ANDS.
+define i32 @test1_ands(i32 %a) {
+; CHECK-LABEL: test1_ands:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1024 // =0x400
+; CHECK-NEXT: movk w8, #32, lsl #16
+; CHECK-NEXT: ands w8, w0, w8
+; CHECK-NEXT: csel w0, w8, wzr, eq
+; CHECK-NEXT: ret
+entry:
+ %ands = and i32 %a, 2098176
+ %c = icmp eq i32 %ands, 0
+ %r = select i1 %c, i32 %ands, i32 0
+ ret i32 %r
+}
+
+; This constant should not be split because it can be handled by one mov.
+define i32 @test2_ands(i32 %a) {
+; CHECK-LABEL: test2_ands:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #135 // =0x87
+; CHECK-NEXT: ands w8, w0, w8
+; CHECK-NEXT: csel w0, w8, wzr, eq
+; CHECK-NEXT: ret
+entry:
+ %ands = and i32 %a, 135
+ %c = icmp eq i32 %ands, 0
+ %r = select i1 %c, i32 %ands, i32 0
+ ret i32 %r
+}
+
+; This constant should not be split because the split immediate is not valid
+; bitmask immediate.
+define i32 @test3_ands(i32 %a) {
+; CHECK-LABEL: test3_ands:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1024 // =0x400
+; CHECK-NEXT: movk w8, #33, lsl #16
+; CHECK-NEXT: ands w8, w0, w8
+; CHECK-NEXT: csel w0, w8, wzr, eq
+; CHECK-NEXT: ret
+entry:
+ %ands = and i32 %a, 2163712
+ %c = icmp eq i32 %ands, 0
+ %r = select i1 %c, i32 %ands, i32 0
+ ret i32 %r
+}
+
+define i64 @test4_ands(i64 %a) {
+; CHECK-LABEL: test4_ands:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1024 // =0x400
+; CHECK-NEXT: movk w8, #32, lsl #16
+; CHECK-NEXT: ands x8, x0, x8
+; CHECK-NEXT: csel x0, x8, xzr, eq
+; CHECK-NEXT: ret
+entry:
+ %ands = and i64 %a, 2098176
+ %c = icmp eq i64 %ands, 0
+ %r = select i1 %c, i64 %ands, i64 0
+ ret i64 %r
+}
+
+define i64 @test5_ands(i64 %a) {
+; CHECK-LABEL: test5_ands:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov x8, #16384 // =0x4000
+; CHECK-NEXT: movk x8, #2, lsl #32
+; CHECK-NEXT: ands x8, x0, x8
+; CHECK-NEXT: csel x0, x8, xzr, eq
+; CHECK-NEXT: ret
+entry:
+ %ands = and i64 %a, 8589950976
+ %c = icmp eq i64 %ands, 0
+ %r = select i1 %c, i64 %ands, i64 0
+ ret i64 %r
+}
+
+; This constant should not be split because it can be handled by one mov.
+define i64 @test6_ands(i64 %a) {
+; CHECK-LABEL: test6_ands:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #135 // =0x87
+; CHECK-NEXT: ands x8, x0, x8
+; CHECK-NEXT: csel x0, x8, xzr, eq
+; CHECK-NEXT: ret
+entry:
+ %ands = and i64 %a, 135
+ %c = icmp eq i64 %ands, 0
+ %r = select i1 %c, i64 %ands, i64 0
+ ret i64 %r
+}
+
+; This constant should not be split because the split immediate is not valid
+; bitmask immediate.
+define i64 @test7_ands(i64 %a) {
+; CHECK-LABEL: test7_ands:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1024 // =0x400
+; CHECK-NEXT: movk w8, #33, lsl #16
+; CHECK-NEXT: ands x8, x0, x8
+; CHECK-NEXT: csel x0, x8, xzr, eq
+; CHECK-NEXT: ret
+entry:
+ %ands = and i64 %a, 2163712
+ %c = icmp eq i64 %ands, 0
+ %r = select i1 %c, i64 %ands, i64 0
+ ret i64 %r
+}
>From 2543774266baf9f154a7bdc7c65fc8b4fba070d8 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 16 Jul 2025 03:07:27 -0700
Subject: [PATCH 2/2] [AArch64] Allow splitting bitmasks for ANDS.
We already do this for AND; we can reuse the same infrastructure for
ANDS so long as the second instruction of the pair is ANDS.
---
.../Target/AArch64/AArch64MIPeepholeOpt.cpp | 28 +++++++++++--------
.../aarch64-split-and-bitmask-immediate.ll | 15 ++++------
2 files changed, 23 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index 0ddd17cee1344..392311ddbbf76 100644
--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -8,8 +8,8 @@
//
// This pass performs below peephole optimizations on MIR level.
//
-// 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
-// MOVi64imm + ANDXrr ==> ANDXri + ANDXri
+// 1. MOVi32imm + ANDS?Wrr ==> ANDWri + ANDS?Wri
+// MOVi64imm + ANDS?Xrr ==> ANDXri + ANDS?Xri
//
// 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
// MOVi64imm + ADDXrr ==> ANDXri + ANDXri
@@ -126,7 +126,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI);
template <typename T>
- bool visitAND(unsigned Opc, MachineInstr &MI);
+ bool visitAND(unsigned Opc, MachineInstr &MI, unsigned OtherOpc = 0);
bool visitORR(MachineInstr &MI);
bool visitCSEL(MachineInstr &MI);
bool visitINSERT(MachineInstr &MI);
@@ -194,24 +194,24 @@ static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
}
template <typename T>
-bool AArch64MIPeepholeOpt::visitAND(
- unsigned Opc, MachineInstr &MI) {
+bool AArch64MIPeepholeOpt::visitAND(unsigned Opc, MachineInstr &MI,
+ unsigned OtherOpc) {
// Try below transformation.
//
- // MOVi32imm + ANDWrr ==> ANDWri + ANDWri
- // MOVi64imm + ANDXrr ==> ANDXri + ANDXri
+ // MOVi32imm + ANDS?Wrr ==> ANDWri + ANDS?Wri
+ // MOVi64imm + ANDS?Xrr ==> ANDXri + ANDS?Xri
//
// The mov pseudo instruction could be expanded to multiple mov instructions
// later. Let's try to split the constant operand of mov instruction into two
// bitmask immediates. It makes only two AND instructions instead of multiple
- // mov + and instructions.
+ // mov + AND instructions.
return splitTwoPartImm<T>(
MI,
- [Opc](T Imm, unsigned RegSize, T &Imm0,
- T &Imm1) -> std::optional<OpcodePair> {
+ [Opc, OtherOpc](T Imm, unsigned RegSize, T &Imm0,
+ T &Imm1) -> std::optional<OpcodePair> {
if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1))
- return std::make_pair(Opc, Opc);
+ return std::make_pair(Opc, !OtherOpc ? Opc : OtherOpc);
return std::nullopt;
},
[&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
@@ -864,6 +864,12 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
case AArch64::ANDXrr:
Changed |= visitAND<uint64_t>(AArch64::ANDXri, MI);
break;
+ case AArch64::ANDSWrr:
+ Changed |= visitAND<uint32_t>(AArch64::ANDWri, MI, AArch64::ANDSWri);
+ break;
+ case AArch64::ANDSXrr:
+ Changed |= visitAND<uint64_t>(AArch64::ANDXri, MI, AArch64::ANDSXri);
+ break;
case AArch64::ORRWrs:
Changed |= visitORR(MI);
break;
diff --git a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
index 79a97bb805a20..062f420d8c453 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
@@ -268,9 +268,8 @@ entry:
define i32 @test1_ands(i32 %a) {
; CHECK-LABEL: test1_ands:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #1024 // =0x400
-; CHECK-NEXT: movk w8, #32, lsl #16
-; CHECK-NEXT: ands w8, w0, w8
+; CHECK-NEXT: and w8, w0, #0x3ffc00
+; CHECK-NEXT: ands w8, w8, #0xffe007ff
; CHECK-NEXT: csel w0, w8, wzr, eq
; CHECK-NEXT: ret
entry:
@@ -315,9 +314,8 @@ entry:
define i64 @test4_ands(i64 %a) {
; CHECK-LABEL: test4_ands:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #1024 // =0x400
-; CHECK-NEXT: movk w8, #32, lsl #16
-; CHECK-NEXT: ands x8, x0, x8
+; CHECK-NEXT: and x8, x0, #0x3ffc00
+; CHECK-NEXT: ands x8, x8, #0xffffffffffe007ff
; CHECK-NEXT: csel x0, x8, xzr, eq
; CHECK-NEXT: ret
entry:
@@ -330,9 +328,8 @@ entry:
define i64 @test5_ands(i64 %a) {
; CHECK-LABEL: test5_ands:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov x8, #16384 // =0x4000
-; CHECK-NEXT: movk x8, #2, lsl #32
-; CHECK-NEXT: ands x8, x0, x8
+; CHECK-NEXT: and x8, x0, #0x3ffffc000
+; CHECK-NEXT: ands x8, x8, #0xfffffffe00007fff
; CHECK-NEXT: csel x0, x8, xzr, eq
; CHECK-NEXT: ret
entry:
More information about the llvm-commits
mailing list