[llvm] [ARM][AArch64] shouldFoldMaskToVariableShiftPair should be true (PR #156886)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 4 06:57:03 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: AZero13 (AZero13)
<details>
<summary>Changes</summary>
For ARM, only do this for legal types because this has shown to result in worse code.
For AArch64, this is fine.
---
Full diff: https://github.com/llvm/llvm-project/pull/156886.diff
4 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+5)
- (modified) llvm/lib/Target/ARM/ARMISelLowering.h (+5)
- (added) llvm/test/CodeGen/AArch64/and-mask-variable.ll (+85)
- (added) llvm/test/CodeGen/ARM/and-mask-variable.ll (+90)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 46738365080f9..ddb79f8a97b4a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -300,6 +300,11 @@ class AArch64TargetLowering : public TargetLowering {
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
CombineLevel Level) const override;
+ /// Return true if it is profitable to fold a pair of shifts into a mask.
+ bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override {
+ return true;
+ }
+
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT,
unsigned SelectOpcode, SDValue X,
SDValue Y) const override;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 196ecb1b9f678..5a3baafb57b01 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -773,6 +773,11 @@ class VectorType;
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
CombineLevel Level) const override;
+ /// Return true if it is profitable to fold a pair of shifts into a mask.
+ bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override {
+ return isTypeLegal(Y.getValueType());
+ }
+
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT,
unsigned SelectOpcode, SDValue X,
SDValue Y) const override;
diff --git a/llvm/test/CodeGen/AArch64/and-mask-variable.ll b/llvm/test/CodeGen/AArch64/and-mask-variable.ll
new file mode 100644
index 0000000000000..74c0a91a1d14e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/and-mask-variable.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-none-elf -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-none-elf -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+define i32 @mask_pair(i32 %x, i32 %y) {
+; CHECK-SD-LABEL: mask_pair:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsr w8, w0, w1
+; CHECK-SD-NEXT: lsl w0, w8, w1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: mask_pair:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #-1 // =0xffffffff
+; CHECK-GI-NEXT: lsl w8, w8, w1
+; CHECK-GI-NEXT: and w0, w8, w0
+; CHECK-GI-NEXT: ret
+ %shl = shl nsw i32 -1, %y
+ %and = and i32 %shl, %x
+ ret i32 %and
+}
+
+define i64 @mask_pair_64(i64 %x, i64 %y) {
+; CHECK-SD-LABEL: mask_pair_64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsr x8, x0, x1
+; CHECK-SD-NEXT: lsl x0, x8, x1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: mask_pair_64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov x8, #-1 // =0xffffffffffffffff
+; CHECK-GI-NEXT: lsl x8, x8, x1
+; CHECK-GI-NEXT: and x0, x8, x0
+; CHECK-GI-NEXT: ret
+ %shl = shl nsw i64 -1, %y
+ %and = and i64 %shl, %x
+ ret i64 %and
+}
+
+define i128 @mask_pair_128(i128 %x, i128 %y) {
+; CHECK-SD-LABEL: mask_pair_128:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsl x8, x1, #1
+; CHECK-SD-NEXT: mvn w9, w2
+; CHECK-SD-NEXT: lsr x10, x0, x2
+; CHECK-SD-NEXT: lsr x11, x1, x2
+; CHECK-SD-NEXT: tst x2, #0x40
+; CHECK-SD-NEXT: lsl x8, x8, x9
+; CHECK-SD-NEXT: orr x8, x8, x10
+; CHECK-SD-NEXT: csel x8, x11, x8, ne
+; CHECK-SD-NEXT: csel x11, xzr, x11, ne
+; CHECK-SD-NEXT: lsr x10, x8, #1
+; CHECK-SD-NEXT: lsl x8, x8, x2
+; CHECK-SD-NEXT: lsr x9, x10, x9
+; CHECK-SD-NEXT: lsl x10, x11, x2
+; CHECK-SD-NEXT: csel x0, xzr, x8, ne
+; CHECK-SD-NEXT: orr x9, x10, x9
+; CHECK-SD-NEXT: csel x1, x8, x9, ne
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: mask_pair_128:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #64 // =0x40
+; CHECK-GI-NEXT: mov x9, #-1 // =0xffffffffffffffff
+; CHECK-GI-NEXT: sub x10, x2, #64
+; CHECK-GI-NEXT: sub x8, x8, x2
+; CHECK-GI-NEXT: lsl x11, x9, x2
+; CHECK-GI-NEXT: cmp x2, #64
+; CHECK-GI-NEXT: lsr x8, x9, x8
+; CHECK-GI-NEXT: lsl x9, x9, x10
+; CHECK-GI-NEXT: csel x10, x11, xzr, lo
+; CHECK-GI-NEXT: orr x8, x8, x11
+; CHECK-GI-NEXT: and x0, x10, x0
+; CHECK-GI-NEXT: csel x8, x8, x9, lo
+; CHECK-GI-NEXT: cmp x2, #0
+; CHECK-GI-NEXT: csinv x8, x8, xzr, ne
+; CHECK-GI-NEXT: and x1, x8, x1
+; CHECK-GI-NEXT: ret
+ %shl = shl nsw i128 -1, %y
+ %and = and i128 %shl, %x
+ ret i128 %and
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/ARM/and-mask-variable.ll b/llvm/test/CodeGen/ARM/and-mask-variable.ll
new file mode 100644
index 0000000000000..0f84b76f97a6b
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/and-mask-variable.ll
@@ -0,0 +1,90 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s --check-prefix V7M
+; RUN: llc -mtriple=armv7a-eabi %s -o - | FileCheck %s --check-prefix V7A
+; RUN: llc -mtriple=thumbv7a-eabi %s -o - | FileCheck %s --check-prefix V7A-T
+; RUN: llc -mtriple=armv6m-eabi %s -o - | FileCheck %s --check-prefix V6M
+
+define i32 @mask_pair(i32 %x, i32 %y) {
+; V7M-LABEL: mask_pair:
+; V7M: @ %bb.0:
+; V7M-NEXT: lsrs r0, r1
+; V7M-NEXT: lsls r0, r1
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: mask_pair:
+; V7A: @ %bb.0:
+; V7A-NEXT: lsr r0, r0, r1
+; V7A-NEXT: lsl r0, r0, r1
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: mask_pair:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: lsrs r0, r1
+; V7A-T-NEXT: lsls r0, r1
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: mask_pair:
+; V6M: @ %bb.0:
+; V6M-NEXT: lsrs r0, r1
+; V6M-NEXT: lsls r0, r1
+; V6M-NEXT: bx lr
+ %shl = shl nsw i32 -1, %y
+ %and = and i32 %shl, %x
+ ret i32 %and
+}
+
+define i64 @mask_pair_64(i64 %x, i64 %y) {
+; V7M-LABEL: mask_pair_64:
+; V7M: @ %bb.0:
+; V7M-NEXT: mov.w r3, #-1
+; V7M-NEXT: lsl.w r12, r3, r2
+; V7M-NEXT: subs r2, #32
+; V7M-NEXT: it pl
+; V7M-NEXT: movpl.w r12, #0
+; V7M-NEXT: it pl
+; V7M-NEXT: lslpl r3, r2
+; V7M-NEXT: and.w r0, r0, r12
+; V7M-NEXT: ands r1, r3
+; V7M-NEXT: bx lr
+;
+; V7A-LABEL: mask_pair_64:
+; V7A: @ %bb.0:
+; V7A-NEXT: subs r12, r2, #32
+; V7A-NEXT: mvn r3, #0
+; V7A-NEXT: lsl r2, r3, r2
+; V7A-NEXT: lslpl r3, r3, r12
+; V7A-NEXT: movwpl r2, #0
+; V7A-NEXT: and r1, r3, r1
+; V7A-NEXT: and r0, r2, r0
+; V7A-NEXT: bx lr
+;
+; V7A-T-LABEL: mask_pair_64:
+; V7A-T: @ %bb.0:
+; V7A-T-NEXT: mov.w r3, #-1
+; V7A-T-NEXT: lsl.w r12, r3, r2
+; V7A-T-NEXT: subs r2, #32
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: movpl.w r12, #0
+; V7A-T-NEXT: it pl
+; V7A-T-NEXT: lslpl r3, r2
+; V7A-T-NEXT: and.w r0, r0, r12
+; V7A-T-NEXT: ands r1, r3
+; V7A-T-NEXT: bx lr
+;
+; V6M-LABEL: mask_pair_64:
+; V6M: @ %bb.0:
+; V6M-NEXT: .save {r4, r5, r7, lr}
+; V6M-NEXT: push {r4, r5, r7, lr}
+; V6M-NEXT: mov r4, r1
+; V6M-NEXT: mov r5, r0
+; V6M-NEXT: movs r0, #0
+; V6M-NEXT: mvns r0, r0
+; V6M-NEXT: mov r1, r0
+; V6M-NEXT: bl __aeabi_llsl
+; V6M-NEXT: ands r0, r5
+; V6M-NEXT: ands r1, r4
+; V6M-NEXT: pop {r4, r5, r7, pc}
+ %shl = shl nsw i64 -1, %y
+ %and = and i64 %shl, %x
+ ret i64 %and
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/156886
More information about the llvm-commits
mailing list