[llvm] [AArch64] Allow splitting bitmasks for EOR/ORR. (PR #150394)
Ricardo Jesus via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 6 04:05:59 PDT 2025
https://github.com/rj-jesus updated https://github.com/llvm/llvm-project/pull/150394
>From cfce02ae3b30417d34c328f342b5de874534731e Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 23 Jul 2025 08:47:13 -0700
Subject: [PATCH 1/4] Add tests.
---
... aarch64-split-logic-bitmask-immediate.ll} | 178 ++++++++++++++++++
1 file changed, 178 insertions(+)
rename llvm/test/CodeGen/AArch64/{aarch64-split-and-bitmask-immediate.ll => aarch64-split-logic-bitmask-immediate.ll} (70%)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll b/llvm/test/CodeGen/AArch64/aarch64-split-logic-bitmask-immediate.ll
similarity index 70%
rename from llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
rename to llvm/test/CodeGen/AArch64/aarch64-split-logic-bitmask-immediate.ll
index 113eb14ca4803..3644a78e2b713 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-split-logic-bitmask-immediate.ll
@@ -370,3 +370,181 @@ entry:
%r = select i1 %c, i64 %a, i64 %ands
ret i64 %r
}
+
+; Test EOR.
+define i32 @test1_eor(i32 %a) {
+; CHECK-LABEL: test1_eor:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1024 // =0x400
+; CHECK-NEXT: movk w8, #32, lsl #16
+; CHECK-NEXT: eor w0, w0, w8
+; CHECK-NEXT: ret
+entry:
+ %eor = xor i32 %a, 2098176
+ ret i32 %eor
+}
+
+; This constant should not be split because it can be handled by one mov.
+define i32 @test2_eor(i32 %a) {
+; CHECK-LABEL: test2_eor:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #135 // =0x87
+; CHECK-NEXT: eor w0, w0, w8
+; CHECK-NEXT: ret
+entry:
+ %eor = xor i32 %a, 135
+ ret i32 %eor
+}
+
+; This constant should not be split because the split immediate is not valid
+; bitmask immediate.
+define i32 @test3_eor(i32 %a) {
+; CHECK-LABEL: test3_eor:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1024 // =0x400
+; CHECK-NEXT: movk w8, #33, lsl #16
+; CHECK-NEXT: eor w0, w0, w8
+; CHECK-NEXT: ret
+entry:
+ %eor = xor i32 %a, 2163712
+ ret i32 %eor
+}
+
+define i64 @test4_eor(i64 %a) {
+; CHECK-LABEL: test4_eor:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1024 // =0x400
+; CHECK-NEXT: movk w8, #32, lsl #16
+; CHECK-NEXT: eor x0, x0, x8
+; CHECK-NEXT: ret
+entry:
+ %eor = xor i64 %a, 2098176
+ ret i64 %eor
+}
+
+define i64 @test5_eor(i64 %a) {
+; CHECK-LABEL: test5_eor:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov x8, #16384 // =0x4000
+; CHECK-NEXT: movk x8, #2, lsl #32
+; CHECK-NEXT: eor x0, x0, x8
+; CHECK-NEXT: ret
+entry:
+ %eor = xor i64 %a, 8589950976
+ ret i64 %eor
+}
+
+; This constant should not be split because it can be handled by one mov.
+define i64 @test6_eor(i64 %a) {
+; CHECK-LABEL: test6_eor:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #135 // =0x87
+; CHECK-NEXT: eor x0, x0, x8
+; CHECK-NEXT: ret
+entry:
+ %eor = xor i64 %a, 135
+ ret i64 %eor
+}
+
+; This constant should not be split because the split immediate is not valid
+; bitmask immediate.
+define i64 @test7_eor(i64 %a) {
+; CHECK-LABEL: test7_eor:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1024 // =0x400
+; CHECK-NEXT: movk w8, #33, lsl #16
+; CHECK-NEXT: eor x0, x0, x8
+; CHECK-NEXT: ret
+entry:
+ %eor = xor i64 %a, 2163712
+ ret i64 %eor
+}
+
+; Test ORR.
+define i32 @test1_orr(i32 %a) {
+; CHECK-LABEL: test1_orr:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1024 // =0x400
+; CHECK-NEXT: movk w8, #32, lsl #16
+; CHECK-NEXT: orr w0, w0, w8
+; CHECK-NEXT: ret
+entry:
+ %orr = or i32 %a, 2098176
+ ret i32 %orr
+}
+
+; This constant should not be split because it can be handled by one mov.
+define i32 @test2_orr(i32 %a) {
+; CHECK-LABEL: test2_orr:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #135 // =0x87
+; CHECK-NEXT: orr w0, w0, w8
+; CHECK-NEXT: ret
+entry:
+ %orr = or i32 %a, 135
+ ret i32 %orr
+}
+
+; This constant should not be split because the split immediate is not valid
+; bitmask immediate.
+define i32 @test3_orr(i32 %a) {
+; CHECK-LABEL: test3_orr:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1024 // =0x400
+; CHECK-NEXT: movk w8, #33, lsl #16
+; CHECK-NEXT: orr w0, w0, w8
+; CHECK-NEXT: ret
+entry:
+ %orr = or i32 %a, 2163712
+ ret i32 %orr
+}
+
+define i64 @test4_orr(i64 %a) {
+; CHECK-LABEL: test4_orr:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1024 // =0x400
+; CHECK-NEXT: movk w8, #32, lsl #16
+; CHECK-NEXT: orr x0, x0, x8
+; CHECK-NEXT: ret
+entry:
+ %orr = or i64 %a, 2098176
+ ret i64 %orr
+}
+
+define i64 @test5_orr(i64 %a) {
+; CHECK-LABEL: test5_orr:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov x8, #16384 // =0x4000
+; CHECK-NEXT: movk x8, #2, lsl #32
+; CHECK-NEXT: orr x0, x0, x8
+; CHECK-NEXT: ret
+entry:
+ %orr = or i64 %a, 8589950976
+ ret i64 %orr
+}
+
+; This constant should not be split because it can be handled by one mov.
+define i64 @test6_orr(i64 %a) {
+; CHECK-LABEL: test6_orr:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #135 // =0x87
+; CHECK-NEXT: orr x0, x0, x8
+; CHECK-NEXT: ret
+entry:
+ %orr = or i64 %a, 135
+ ret i64 %orr
+}
+
+; This constant should not be split because the split immediate is not valid
+; bitmask immediate.
+define i64 @test7_orr(i64 %a) {
+; CHECK-LABEL: test7_orr:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1024 // =0x400
+; CHECK-NEXT: movk w8, #33, lsl #16
+; CHECK-NEXT: orr x0, x0, x8
+; CHECK-NEXT: ret
+entry:
+ %orr = or i64 %a, 2163712
+ ret i64 %orr
+}
>From a2424d81f68075ce5b3aff5bbfc08fb0c3a21547 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 16 Jul 2025 03:07:27 -0700
Subject: [PATCH 2/4] [AArch64] Allow splitting bitmasks for EOR/ORR.
This patch extends #149095 for EOR and ORR.
It uses a simple partition scheme to try to find two suitable disjoint
bitmasks that can be used with EOR/ORR to reconstruct the original mask.
---
.../Target/AArch64/AArch64MIPeepholeOpt.cpp | 63 ++++++++++++++++---
.../aarch64-split-logic-bitmask-immediate.ll | 30 ++++-----
2 files changed, 68 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index b97d6229b1d01..a32c21617860c 100644
--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -8,8 +8,8 @@
//
// This pass performs below peephole optimizations on MIR level.
//
-// 1. MOVi32imm + ANDS?Wrr ==> ANDWri + ANDS?Wri
-// MOVi64imm + ANDS?Xrr ==> ANDXri + ANDS?Xri
+// 1. MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri
+// MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri
//
// 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
// MOVi64imm + ADDXrr ==> ADDXri + ADDXri
@@ -128,6 +128,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
// Strategy used to split logical immediate bitmasks.
enum class SplitStrategy {
Intersect,
+ Disjoint,
};
template <typename T>
bool trySplitLogicalImm(unsigned Opc, MachineInstr &MI,
@@ -190,19 +191,48 @@ static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
return true;
}
+template <typename T>
+static bool splitDisjointBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc,
+ T &Imm2Enc) {
+ // Try to split a bitmask of the form 0b00000000011000000000011110000000 into
+ // two disjoint masks such as 0b00000000011000000000000000000000 and
+ // 0b00000000000000000000011110000000 where the inclusive/exclusive OR of the
+ // new masks match the original mask.
+ unsigned LowestBitSet = llvm::countr_zero(Imm);
+ unsigned LowestGapBitUnset =
+ LowestBitSet + llvm::countr_one(Imm >> LowestBitSet);
+
+ // Create a mask for the least significant group of consecutive ones.
+ T NewImm1 = (static_cast<T>(1) << LowestGapBitUnset) -
+ (static_cast<T>(1) << LowestBitSet);
+ // Create a disjoint mask for the remaining ones.
+ T NewImm2 = Imm & ~NewImm1;
+ assert(((NewImm1 & NewImm2) == 0) && "Non-disjoint immediates!");
+
+ if (AArch64_AM::isLogicalImmediate(NewImm2, RegSize)) {
+ assert(((NewImm1 | NewImm2) == Imm) && "Invalid immediates!");
+ assert(((NewImm1 ^ NewImm2) == Imm) && "Invalid immediates!");
+ Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize);
+ Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize);
+ return true;
+ }
+
+ return false;
+}
+
template <typename T>
bool AArch64MIPeepholeOpt::trySplitLogicalImm(unsigned Opc, MachineInstr &MI,
SplitStrategy Strategy,
unsigned OtherOpc) {
- // Try below transformation.
+ // Try below transformations.
//
- // MOVi32imm + ANDS?Wrr ==> ANDWri + ANDS?Wri
- // MOVi64imm + ANDS?Xrr ==> ANDXri + ANDS?Xri
+ // MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri
+ // MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri
//
// The mov pseudo instruction could be expanded to multiple mov instructions
// later. Let's try to split the constant operand of mov instruction into two
- // bitmask immediates. It makes only two AND instructions instead of multiple
- // mov + and instructions.
+ // bitmask immediates based on the given split strategy. It makes only two
+ // logical instructions instead of multiple mov + logic instructions.
return splitTwoPartImm<T>(
MI,
@@ -224,6 +254,9 @@ bool AArch64MIPeepholeOpt::trySplitLogicalImm(unsigned Opc, MachineInstr &MI,
case SplitStrategy::Intersect:
SplitSucc = splitBitmaskImm(Imm, RegSize, Imm0, Imm1);
break;
+ case SplitStrategy::Disjoint:
+ SplitSucc = splitDisjointBitmaskImm(Imm, RegSize, Imm0, Imm1);
+ break;
}
if (SplitSucc)
return std::make_pair(Opc, !OtherOpc ? Opc : OtherOpc);
@@ -889,6 +922,22 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
Changed |= trySplitLogicalImm<uint64_t>(
AArch64::ANDXri, MI, SplitStrategy::Intersect, AArch64::ANDSXri);
break;
+ case AArch64::EORWrr:
+ Changed |= trySplitLogicalImm<uint32_t>(AArch64::EORWri, MI,
+ SplitStrategy::Disjoint);
+ break;
+ case AArch64::EORXrr:
+ Changed |= trySplitLogicalImm<uint64_t>(AArch64::EORXri, MI,
+ SplitStrategy::Disjoint);
+ break;
+ case AArch64::ORRWrr:
+ Changed |= trySplitLogicalImm<uint32_t>(AArch64::ORRWri, MI,
+ SplitStrategy::Disjoint);
+ break;
+ case AArch64::ORRXrr:
+ Changed |= trySplitLogicalImm<uint64_t>(AArch64::ORRXri, MI,
+ SplitStrategy::Disjoint);
+ break;
case AArch64::ORRWrs:
Changed |= visitORR(MI);
break;
diff --git a/llvm/test/CodeGen/AArch64/aarch64-split-logic-bitmask-immediate.ll b/llvm/test/CodeGen/AArch64/aarch64-split-logic-bitmask-immediate.ll
index 3644a78e2b713..4db9db9185206 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-split-logic-bitmask-immediate.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-split-logic-bitmask-immediate.ll
@@ -375,9 +375,8 @@ entry:
define i32 @test1_eor(i32 %a) {
; CHECK-LABEL: test1_eor:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #1024 // =0x400
-; CHECK-NEXT: movk w8, #32, lsl #16
-; CHECK-NEXT: eor w0, w0, w8
+; CHECK-NEXT: eor w8, w0, #0x400
+; CHECK-NEXT: eor w0, w8, #0x200000
; CHECK-NEXT: ret
entry:
%eor = xor i32 %a, 2098176
@@ -413,9 +412,8 @@ entry:
define i64 @test4_eor(i64 %a) {
; CHECK-LABEL: test4_eor:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #1024 // =0x400
-; CHECK-NEXT: movk w8, #32, lsl #16
-; CHECK-NEXT: eor x0, x0, x8
+; CHECK-NEXT: eor x8, x0, #0x400
+; CHECK-NEXT: eor x0, x8, #0x200000
; CHECK-NEXT: ret
entry:
%eor = xor i64 %a, 2098176
@@ -425,9 +423,8 @@ entry:
define i64 @test5_eor(i64 %a) {
; CHECK-LABEL: test5_eor:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov x8, #16384 // =0x4000
-; CHECK-NEXT: movk x8, #2, lsl #32
-; CHECK-NEXT: eor x0, x0, x8
+; CHECK-NEXT: eor x8, x0, #0x4000
+; CHECK-NEXT: eor x0, x8, #0x200000000
; CHECK-NEXT: ret
entry:
%eor = xor i64 %a, 8589950976
@@ -464,9 +461,8 @@ entry:
define i32 @test1_orr(i32 %a) {
; CHECK-LABEL: test1_orr:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #1024 // =0x400
-; CHECK-NEXT: movk w8, #32, lsl #16
-; CHECK-NEXT: orr w0, w0, w8
+; CHECK-NEXT: orr w8, w0, #0x400
+; CHECK-NEXT: orr w0, w8, #0x200000
; CHECK-NEXT: ret
entry:
%orr = or i32 %a, 2098176
@@ -502,9 +498,8 @@ entry:
define i64 @test4_orr(i64 %a) {
; CHECK-LABEL: test4_orr:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #1024 // =0x400
-; CHECK-NEXT: movk w8, #32, lsl #16
-; CHECK-NEXT: orr x0, x0, x8
+; CHECK-NEXT: orr x8, x0, #0x400
+; CHECK-NEXT: orr x0, x8, #0x200000
; CHECK-NEXT: ret
entry:
%orr = or i64 %a, 2098176
@@ -514,9 +509,8 @@ entry:
define i64 @test5_orr(i64 %a) {
; CHECK-LABEL: test5_orr:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov x8, #16384 // =0x4000
-; CHECK-NEXT: movk x8, #2, lsl #32
-; CHECK-NEXT: orr x0, x0, x8
+; CHECK-NEXT: orr x8, x0, #0x4000
+; CHECK-NEXT: orr x0, x8, #0x200000000
; CHECK-NEXT: ret
entry:
%orr = or i64 %a, 8589950976
>From 96275002a38ae7c4426ac3261809c2107726e0d9 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Thu, 31 Jul 2025 10:49:42 -0700
Subject: [PATCH 3/4] Improve asserts and restructure code flow.
---
.../Target/AArch64/AArch64MIPeepholeOpt.cpp | 20 +++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index a32c21617860c..fb7a16de20673 100644
--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -164,6 +164,7 @@ INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",
template <typename T>
static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
T UImm = static_cast<T>(Imm);
+ assert(UImm && (UImm != ~static_cast<T>(0)) && "Invalid immediate!");
// The bitmask immediate consists of consecutive ones. Let's say there is
// constant 0b00000000001000000000010000000000 which does not consist of
@@ -194,6 +195,8 @@ static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
template <typename T>
static bool splitDisjointBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc,
T &Imm2Enc) {
+ assert(Imm && (Imm != ~static_cast<T>(0)) && "Invalid immediate!");
+
// Try to split a bitmask of the form 0b00000000011000000000011110000000 into
// two disjoint masks such as 0b00000000011000000000000000000000 and
// 0b00000000000000000000011110000000 where the inclusive/exclusive OR of the
@@ -207,17 +210,14 @@ static bool splitDisjointBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc,
(static_cast<T>(1) << LowestBitSet);
// Create a disjoint mask for the remaining ones.
T NewImm2 = Imm & ~NewImm1;
- assert(((NewImm1 & NewImm2) == 0) && "Non-disjoint immediates!");
-
- if (AArch64_AM::isLogicalImmediate(NewImm2, RegSize)) {
- assert(((NewImm1 | NewImm2) == Imm) && "Invalid immediates!");
- assert(((NewImm1 ^ NewImm2) == Imm) && "Invalid immediates!");
- Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize);
- Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize);
- return true;
- }
- return false;
+ // Do not split if NewImm2 is not a valid bitmask immediate.
+ if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize))
+ return false;
+
+ Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize);
+ Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize);
+ return true;
}
template <typename T>
>From 7ca15a7ed1a1c5b3b1e271476ed3c58cfcfb4fd9 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 6 Aug 2025 02:57:17 -0700
Subject: [PATCH 4/4] Guard code against UB.
---
llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index fb7a16de20673..fd4ef2aa28f8a 100644
--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -206,6 +206,7 @@ static bool splitDisjointBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc,
LowestBitSet + llvm::countr_one(Imm >> LowestBitSet);
// Create a mask for the least significant group of consecutive ones.
+ assert(LowestGapBitUnset < sizeof(T) * CHAR_BIT && "Undefined behaviour!");
T NewImm1 = (static_cast<T>(1) << LowestGapBitUnset) -
(static_cast<T>(1) << LowestBitSet);
// Create a disjoint mask for the remaining ones.
More information about the llvm-commits
mailing list