[llvm] [AArch64] Allow splitting bitmasks for ANDS, EOR and ORR. (PR #149095)

Ricardo Jesus via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 17 08:55:29 PDT 2025


https://github.com/rj-jesus updated https://github.com/llvm/llvm-project/pull/149095

>From fcba958dea86b9134c29553fda66c3b603b4221a Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 16 Jul 2025 03:05:41 -0700
Subject: [PATCH 1/2] Add tests.

These are based on the simple preexisting AND tests.
---
 .../aarch64-split-and-bitmask-immediate.ll    | 110 ++++++++++++++++++
 1 file changed, 110 insertions(+)

diff --git a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
index e31c9a072dc4b..79a97bb805a20 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
@@ -263,3 +263,113 @@ entry:
   %conv = zext i1 %cmp to i8
   ret i8 %conv
 }
+
+; Test ANDS.
+define i32 @test1_ands(i32 %a) {
+; CHECK-LABEL: test1_ands:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, #1024 // =0x400
+; CHECK-NEXT:    movk w8, #32, lsl #16
+; CHECK-NEXT:    ands w8, w0, w8
+; CHECK-NEXT:    csel w0, w8, wzr, eq
+; CHECK-NEXT:    ret
+entry:
+  %ands = and i32 %a, 2098176
+  %c = icmp eq i32 %ands, 0
+  %r = select i1 %c, i32 %ands, i32 0
+  ret i32 %r
+}
+
+; This constant should not be split because it can be handled by one mov.
+define i32 @test2_ands(i32 %a) {
+; CHECK-LABEL: test2_ands:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, #135 // =0x87
+; CHECK-NEXT:    ands w8, w0, w8
+; CHECK-NEXT:    csel w0, w8, wzr, eq
+; CHECK-NEXT:    ret
+entry:
+  %ands = and i32 %a, 135
+  %c = icmp eq i32 %ands, 0
+  %r = select i1 %c, i32 %ands, i32 0
+  ret i32 %r
+}
+
+; This constant should not be split because the split immediate is not valid
+; bitmask immediate.
+define i32 @test3_ands(i32 %a) {
+; CHECK-LABEL: test3_ands:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, #1024 // =0x400
+; CHECK-NEXT:    movk w8, #33, lsl #16
+; CHECK-NEXT:    ands w8, w0, w8
+; CHECK-NEXT:    csel w0, w8, wzr, eq
+; CHECK-NEXT:    ret
+entry:
+  %ands = and i32 %a, 2163712
+  %c = icmp eq i32 %ands, 0
+  %r = select i1 %c, i32 %ands, i32 0
+  ret i32 %r
+}
+
+define i64 @test4_ands(i64 %a) {
+; CHECK-LABEL: test4_ands:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, #1024 // =0x400
+; CHECK-NEXT:    movk w8, #32, lsl #16
+; CHECK-NEXT:    ands x8, x0, x8
+; CHECK-NEXT:    csel x0, x8, xzr, eq
+; CHECK-NEXT:    ret
+entry:
+  %ands = and i64 %a, 2098176
+  %c = icmp eq i64 %ands, 0
+  %r = select i1 %c, i64 %ands, i64 0
+  ret i64 %r
+}
+
+define i64 @test5_ands(i64 %a) {
+; CHECK-LABEL: test5_ands:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov x8, #16384 // =0x4000
+; CHECK-NEXT:    movk x8, #2, lsl #32
+; CHECK-NEXT:    ands x8, x0, x8
+; CHECK-NEXT:    csel x0, x8, xzr, eq
+; CHECK-NEXT:    ret
+entry:
+  %ands = and i64 %a, 8589950976
+  %c = icmp eq i64 %ands, 0
+  %r = select i1 %c, i64 %ands, i64 0
+  ret i64 %r
+}
+
+; This constant should not be split because it can be handled by one mov.
+define i64 @test6_ands(i64 %a) {
+; CHECK-LABEL: test6_ands:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, #135 // =0x87
+; CHECK-NEXT:    ands x8, x0, x8
+; CHECK-NEXT:    csel x0, x8, xzr, eq
+; CHECK-NEXT:    ret
+entry:
+  %ands = and i64 %a, 135
+  %c = icmp eq i64 %ands, 0
+  %r = select i1 %c, i64 %ands, i64 0
+  ret i64 %r
+}
+
+; This constant should not be split because the split immediate is not valid
+; bitmask immediate.
+define i64 @test7_ands(i64 %a) {
+; CHECK-LABEL: test7_ands:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, #1024 // =0x400
+; CHECK-NEXT:    movk w8, #33, lsl #16
+; CHECK-NEXT:    ands x8, x0, x8
+; CHECK-NEXT:    csel x0, x8, xzr, eq
+; CHECK-NEXT:    ret
+entry:
+  %ands = and i64 %a, 2163712
+  %c = icmp eq i64 %ands, 0
+  %r = select i1 %c, i64 %ands, i64 0
+  ret i64 %r
+}

>From 2543774266baf9f154a7bdc7c65fc8b4fba070d8 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 16 Jul 2025 03:07:27 -0700
Subject: [PATCH 2/2] [AArch64] Allow splitting bitmasks for ANDS.

We already do this for AND; we can reuse the same infrastructure for
ANDS so long as the second instruction of the pair is ANDS.
---
 .../Target/AArch64/AArch64MIPeepholeOpt.cpp   | 28 +++++++++++--------
 .../aarch64-split-and-bitmask-immediate.ll    | 15 ++++------
 2 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index 0ddd17cee1344..392311ddbbf76 100644
--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -8,8 +8,8 @@
 //
 // This pass performs below peephole optimizations on MIR level.
 //
-// 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
-//    MOVi64imm + ANDXrr ==> ANDXri + ANDXri
+// 1. MOVi32imm + ANDS?Wrr ==> ANDWri + ANDS?Wri
+//    MOVi64imm + ANDS?Xrr ==> ANDXri + ANDS?Xri
 //
 // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
 //    MOVi64imm + ADDXrr ==> ANDXri + ANDXri
@@ -126,7 +126,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
   bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI);
 
   template <typename T>
-  bool visitAND(unsigned Opc, MachineInstr &MI);
+  bool visitAND(unsigned Opc, MachineInstr &MI, unsigned OtherOpc = 0);
   bool visitORR(MachineInstr &MI);
   bool visitCSEL(MachineInstr &MI);
   bool visitINSERT(MachineInstr &MI);
@@ -194,24 +194,24 @@ static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
 }
 
 template <typename T>
-bool AArch64MIPeepholeOpt::visitAND(
-    unsigned Opc, MachineInstr &MI) {
+bool AArch64MIPeepholeOpt::visitAND(unsigned Opc, MachineInstr &MI,
+                                    unsigned OtherOpc) {
   // Try below transformation.
   //
-  // MOVi32imm + ANDWrr ==> ANDWri + ANDWri
-  // MOVi64imm + ANDXrr ==> ANDXri + ANDXri
+  // MOVi32imm + ANDS?Wrr ==> ANDWri + ANDS?Wri
+  // MOVi64imm + ANDS?Xrr ==> ANDXri + ANDS?Xri
   //
   // The mov pseudo instruction could be expanded to multiple mov instructions
   // later. Let's try to split the constant operand of mov instruction into two
   // bitmask immediates. It makes only two AND instructions instead of multiple
-  // mov + and instructions.
+  // mov + AND instructions.
 
   return splitTwoPartImm<T>(
       MI,
-      [Opc](T Imm, unsigned RegSize, T &Imm0,
-            T &Imm1) -> std::optional<OpcodePair> {
+      [Opc, OtherOpc](T Imm, unsigned RegSize, T &Imm0,
+                      T &Imm1) -> std::optional<OpcodePair> {
         if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1))
-          return std::make_pair(Opc, Opc);
+          return std::make_pair(Opc, !OtherOpc ? Opc : OtherOpc);
         return std::nullopt;
       },
       [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
@@ -864,6 +864,12 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
       case AArch64::ANDXrr:
         Changed |= visitAND<uint64_t>(AArch64::ANDXri, MI);
         break;
+      case AArch64::ANDSWrr:
+        Changed |= visitAND<uint32_t>(AArch64::ANDWri, MI, AArch64::ANDSWri);
+        break;
+      case AArch64::ANDSXrr:
+        Changed |= visitAND<uint64_t>(AArch64::ANDXri, MI, AArch64::ANDSXri);
+        break;
       case AArch64::ORRWrs:
         Changed |= visitORR(MI);
         break;
diff --git a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
index 79a97bb805a20..062f420d8c453 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
@@ -268,9 +268,8 @@ entry:
 define i32 @test1_ands(i32 %a) {
 ; CHECK-LABEL: test1_ands:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov w8, #1024 // =0x400
-; CHECK-NEXT:    movk w8, #32, lsl #16
-; CHECK-NEXT:    ands w8, w0, w8
+; CHECK-NEXT:    and w8, w0, #0x3ffc00
+; CHECK-NEXT:    ands w8, w8, #0xffe007ff
 ; CHECK-NEXT:    csel w0, w8, wzr, eq
 ; CHECK-NEXT:    ret
 entry:
@@ -315,9 +314,8 @@ entry:
 define i64 @test4_ands(i64 %a) {
 ; CHECK-LABEL: test4_ands:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov w8, #1024 // =0x400
-; CHECK-NEXT:    movk w8, #32, lsl #16
-; CHECK-NEXT:    ands x8, x0, x8
+; CHECK-NEXT:    and x8, x0, #0x3ffc00
+; CHECK-NEXT:    ands x8, x8, #0xffffffffffe007ff
 ; CHECK-NEXT:    csel x0, x8, xzr, eq
 ; CHECK-NEXT:    ret
 entry:
@@ -330,9 +328,8 @@ entry:
 define i64 @test5_ands(i64 %a) {
 ; CHECK-LABEL: test5_ands:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov x8, #16384 // =0x4000
-; CHECK-NEXT:    movk x8, #2, lsl #32
-; CHECK-NEXT:    ands x8, x0, x8
+; CHECK-NEXT:    and x8, x0, #0x3ffffc000
+; CHECK-NEXT:    ands x8, x8, #0xfffffffe00007fff
 ; CHECK-NEXT:    csel x0, x8, xzr, eq
 ; CHECK-NEXT:    ret
 entry:



More information about the llvm-commits mailing list