[llvm] [AArch64] shouldFoldMaskToVariableShiftPair should be true for scalars up to the biggest legal type (PR #158069)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 11 06:18:39 PDT 2025
https://github.com/AZero13 created https://github.com/llvm/llvm-project/pull/158069
For AArch64, we want to do this up to 64-bits. Otherwise this results in bloated code.
>From 891ba1526a02771bb60477ca495a724ec2a7937b Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Thu, 11 Sep 2025 09:15:23 -0400
Subject: [PATCH 1/2] Pre-commit test (NFC)
---
.../test/CodeGen/AArch64/and-mask-variable.ll | 30 +++++++++++++++++++
1 file changed, 30 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/and-mask-variable.ll
diff --git a/llvm/test/CodeGen/AArch64/and-mask-variable.ll b/llvm/test/CodeGen/AArch64/and-mask-variable.ll
new file mode 100644
index 0000000000000..a92f3cf5ec092
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/and-mask-variable.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-none-elf -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-none-elf -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+define i32 @mask_pair(i32 %x, i32 %y) {
+; CHECK-LABEL: mask_pair:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #-1 // =0xffffffff
+; CHECK-NEXT: lsl w8, w8, w1
+; CHECK-NEXT: and w0, w8, w0
+; CHECK-NEXT: ret
+ %shl = shl nsw i32 -1, %y
+ %and = and i32 %shl, %x
+ ret i32 %and
+}
+
+define i64 @mask_pair_64(i64 %x, i64 %y) {
+; CHECK-LABEL: mask_pair_64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: lsl x8, x8, x1
+; CHECK-NEXT: and x0, x8, x0
+; CHECK-NEXT: ret
+ %shl = shl nsw i64 -1, %y
+ %and = and i64 %shl, %x
+ ret i64 %and
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-GI: {{.*}}
+; CHECK-SD: {{.*}}
>From 90723996e8df05e9399e6f3b62f3be7293ff622a Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Thu, 11 Sep 2025 09:16:29 -0400
Subject: [PATCH 2/2] [AArch64] shouldFoldMaskToVariableShiftPair should be
true for scalars up to the biggest legal type
For AArch64, we want to do this for scalars up to the biggest legal type.
---
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 10 ++
.../test/CodeGen/AArch64/and-mask-variable.ll | 78 ++++++++++++---
llvm/test/CodeGen/AArch64/extract-bits.ll | 98 ++++++++-----------
llvm/test/CodeGen/AArch64/extract-lowbits.ll | 66 ++++++-------
4 files changed, 145 insertions(+), 107 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 46738365080f9..9665bc871b6b9 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -300,6 +300,16 @@ class AArch64TargetLowering : public TargetLowering {
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
CombineLevel Level) const override;
+ /// Return true if it is profitable to fold a pair of shifts into a mask.
+ bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override {
+ EVT VT = Y.getValueType();
+
+ if (VT.isVector())
+ return false;
+
+ return VT.getScalarSizeInBits() <= 64;
+ }
+
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT,
unsigned SelectOpcode, SDValue X,
SDValue Y) const override;
diff --git a/llvm/test/CodeGen/AArch64/and-mask-variable.ll b/llvm/test/CodeGen/AArch64/and-mask-variable.ll
index a92f3cf5ec092..f41cdc6dd241b 100644
--- a/llvm/test/CodeGen/AArch64/and-mask-variable.ll
+++ b/llvm/test/CodeGen/AArch64/and-mask-variable.ll
@@ -3,28 +3,78 @@
; RUN: llc -mtriple=aarch64-none-elf -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define i32 @mask_pair(i32 %x, i32 %y) {
-; CHECK-LABEL: mask_pair:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: lsl w8, w8, w1
-; CHECK-NEXT: and w0, w8, w0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: mask_pair:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsr w8, w0, w1
+; CHECK-SD-NEXT: lsl w0, w8, w1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: mask_pair:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #-1 // =0xffffffff
+; CHECK-GI-NEXT: lsl w8, w8, w1
+; CHECK-GI-NEXT: and w0, w8, w0
+; CHECK-GI-NEXT: ret
%shl = shl nsw i32 -1, %y
%and = and i32 %shl, %x
ret i32 %and
}
define i64 @mask_pair_64(i64 %x, i64 %y) {
-; CHECK-LABEL: mask_pair_64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: lsl x8, x8, x1
-; CHECK-NEXT: and x0, x8, x0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: mask_pair_64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsr x8, x0, x1
+; CHECK-SD-NEXT: lsl x0, x8, x1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: mask_pair_64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov x8, #-1 // =0xffffffffffffffff
+; CHECK-GI-NEXT: lsl x8, x8, x1
+; CHECK-GI-NEXT: and x0, x8, x0
+; CHECK-GI-NEXT: ret
%shl = shl nsw i64 -1, %y
%and = and i64 %shl, %x
ret i64 %and
}
+
+define i128 @mask_pair_128(i128 %x, i128 %y) {
+; CHECK-SD-LABEL: mask_pair_128:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov x8, #-1 // =0xffffffffffffffff
+; CHECK-SD-NEXT: mvn w9, w2
+; CHECK-SD-NEXT: mov x10, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-SD-NEXT: lsl x8, x8, x2
+; CHECK-SD-NEXT: lsr x9, x10, x9
+; CHECK-SD-NEXT: tst x2, #0x40
+; CHECK-SD-NEXT: orr x9, x8, x9
+; CHECK-SD-NEXT: csel x9, x8, x9, ne
+; CHECK-SD-NEXT: csel x8, xzr, x8, ne
+; CHECK-SD-NEXT: and x0, x8, x0
+; CHECK-SD-NEXT: and x1, x9, x1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: mask_pair_128:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #64 // =0x40
+; CHECK-GI-NEXT: mov x9, #-1 // =0xffffffffffffffff
+; CHECK-GI-NEXT: sub x10, x2, #64
+; CHECK-GI-NEXT: sub x8, x8, x2
+; CHECK-GI-NEXT: lsl x11, x9, x2
+; CHECK-GI-NEXT: cmp x2, #64
+; CHECK-GI-NEXT: lsr x8, x9, x8
+; CHECK-GI-NEXT: lsl x9, x9, x10
+; CHECK-GI-NEXT: csel x10, x11, xzr, lo
+; CHECK-GI-NEXT: orr x8, x8, x11
+; CHECK-GI-NEXT: and x0, x10, x0
+; CHECK-GI-NEXT: csel x8, x8, x9, lo
+; CHECK-GI-NEXT: cmp x2, #0
+; CHECK-GI-NEXT: csinv x8, x8, xzr, ne
+; CHECK-GI-NEXT: and x1, x8, x1
+; CHECK-GI-NEXT: ret
+ %shl = shl nsw i128 -1, %y
+ %and = and i128 %shl, %x
+ ret i128 %and
+}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-GI: {{.*}}
-; CHECK-SD: {{.*}}
+; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/extract-bits.ll b/llvm/test/CodeGen/AArch64/extract-bits.ll
index 8e822d19a19b9..5a96116142b51 100644
--- a/llvm/test/CodeGen/AArch64/extract-bits.ll
+++ b/llvm/test/CodeGen/AArch64/extract-bits.ll
@@ -532,11 +532,10 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
; CHECK-LABEL: bextr32_c0:
; CHECK: // %bb.0:
-; CHECK-NEXT: neg w8, w2
-; CHECK-NEXT: mov w9, #-1 // =0xffffffff
-; CHECK-NEXT: lsr w10, w0, w1
-; CHECK-NEXT: lsr w8, w9, w8
-; CHECK-NEXT: and w0, w8, w10
+; CHECK-NEXT: lsr w8, w0, w1
+; CHECK-NEXT: neg w9, w2
+; CHECK-NEXT: lsl w8, w8, w9
+; CHECK-NEXT: lsr w0, w8, w9
; CHECK-NEXT: ret
%shifted = lshr i32 %val, %numskipbits
%numhighbits = sub i32 32, %numlowbits
@@ -548,12 +547,11 @@ define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
; CHECK-LABEL: bextr32_c1_indexzext:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #32 // =0x20
-; CHECK-NEXT: mov w9, #-1 // =0xffffffff
-; CHECK-NEXT: lsr w10, w0, w1
-; CHECK-NEXT: sub w8, w8, w2
-; CHECK-NEXT: lsr w8, w9, w8
-; CHECK-NEXT: and w0, w8, w10
+; CHECK-NEXT: lsr w8, w0, w1
+; CHECK-NEXT: mov w9, #32 // =0x20
+; CHECK-NEXT: sub w9, w9, w2
+; CHECK-NEXT: lsl w8, w8, w9
+; CHECK-NEXT: lsr w0, w8, w9
; CHECK-NEXT: ret
%skip = zext i8 %numskipbits to i32
%shifted = lshr i32 %val, %skip
@@ -569,10 +567,9 @@ define i32 @bextr32_c2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind
; CHECK: // %bb.0:
; CHECK-NEXT: ldr w8, [x0]
; CHECK-NEXT: neg w9, w2
-; CHECK-NEXT: mov w10, #-1 // =0xffffffff
-; CHECK-NEXT: lsr w9, w10, w9
; CHECK-NEXT: lsr w8, w8, w1
-; CHECK-NEXT: and w0, w9, w8
+; CHECK-NEXT: lsl w8, w8, w9
+; CHECK-NEXT: lsr w0, w8, w9
; CHECK-NEXT: ret
%val = load i32, ptr %w
%shifted = lshr i32 %val, %numskipbits
@@ -587,11 +584,10 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
; CHECK: // %bb.0:
; CHECK-NEXT: ldr w8, [x0]
; CHECK-NEXT: mov w9, #32 // =0x20
-; CHECK-NEXT: mov w10, #-1 // =0xffffffff
; CHECK-NEXT: sub w9, w9, w2
; CHECK-NEXT: lsr w8, w8, w1
-; CHECK-NEXT: lsr w9, w10, w9
-; CHECK-NEXT: and w0, w9, w8
+; CHECK-NEXT: lsl w8, w8, w9
+; CHECK-NEXT: lsr w0, w8, w9
; CHECK-NEXT: ret
%val = load i32, ptr %w
%skip = zext i8 %numskipbits to i32
@@ -606,11 +602,10 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
; CHECK-LABEL: bextr32_c4_commutative:
; CHECK: // %bb.0:
-; CHECK-NEXT: neg w8, w2
-; CHECK-NEXT: mov w9, #-1 // =0xffffffff
-; CHECK-NEXT: lsr w10, w0, w1
-; CHECK-NEXT: lsr w8, w9, w8
-; CHECK-NEXT: and w0, w10, w8
+; CHECK-NEXT: lsr w8, w0, w1
+; CHECK-NEXT: neg w9, w2
+; CHECK-NEXT: lsl w8, w8, w9
+; CHECK-NEXT: lsr w0, w8, w9
; CHECK-NEXT: ret
%shifted = lshr i32 %val, %numskipbits
%numhighbits = sub i32 32, %numlowbits
@@ -624,11 +619,10 @@ define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits)
define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
; CHECK-LABEL: bextr64_c0:
; CHECK: // %bb.0:
-; CHECK-NEXT: neg x8, x2
-; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: lsr x10, x0, x1
-; CHECK-NEXT: lsr x8, x9, x8
-; CHECK-NEXT: and x0, x8, x10
+; CHECK-NEXT: lsr x8, x0, x1
+; CHECK-NEXT: neg x9, x2
+; CHECK-NEXT: lsl x8, x8, x9
+; CHECK-NEXT: lsr x0, x8, x9
; CHECK-NEXT: ret
%shifted = lshr i64 %val, %numskipbits
%numhighbits = sub i64 64, %numlowbits
@@ -640,13 +634,12 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
; CHECK-LABEL: bextr64_c1_indexzext:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #64 // =0x40
-; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: lsr x10, x0, x1
-; CHECK-NEXT: sub w8, w8, w2
-; CHECK-NEXT: lsr x8, x9, x8
-; CHECK-NEXT: and x0, x8, x10
+; CHECK-NEXT: lsr x8, x0, x1
+; CHECK-NEXT: mov w9, #64 // =0x40
+; CHECK-NEXT: sub w9, w9, w2
+; CHECK-NEXT: lsl x8, x8, x9
+; CHECK-NEXT: lsr x0, x8, x9
; CHECK-NEXT: ret
%skip = zext i8 %numskipbits to i64
%shifted = lshr i64 %val, %skip
@@ -662,10 +655,9 @@ define i64 @bextr64_c2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind
; CHECK: // %bb.0:
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: neg x9, x2
-; CHECK-NEXT: mov x10, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: lsr x9, x10, x9
; CHECK-NEXT: lsr x8, x8, x1
-; CHECK-NEXT: and x0, x9, x8
+; CHECK-NEXT: lsl x8, x8, x9
+; CHECK-NEXT: lsr x0, x8, x9
; CHECK-NEXT: ret
%val = load i64, ptr %w
%shifted = lshr i64 %val, %numskipbits
@@ -679,13 +671,12 @@ define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
; CHECK-LABEL: bextr64_c3_load_indexzext:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: mov w9, #64 // =0x40
-; CHECK-NEXT: mov x10, #-1 // =0xffffffffffffffff
; CHECK-NEXT: sub w9, w9, w2
-; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: lsr x8, x8, x1
-; CHECK-NEXT: lsr x9, x10, x9
-; CHECK-NEXT: and x0, x9, x8
+; CHECK-NEXT: lsl x8, x8, x9
+; CHECK-NEXT: lsr x0, x8, x9
; CHECK-NEXT: ret
%val = load i64, ptr %w
%skip = zext i8 %numskipbits to i64
@@ -700,11 +691,10 @@ define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
; CHECK-LABEL: bextr64_c4_commutative:
; CHECK: // %bb.0:
-; CHECK-NEXT: neg x8, x2
-; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: lsr x10, x0, x1
-; CHECK-NEXT: lsr x8, x9, x8
-; CHECK-NEXT: and x0, x10, x8
+; CHECK-NEXT: lsr x8, x0, x1
+; CHECK-NEXT: neg x9, x2
+; CHECK-NEXT: lsl x8, x8, x9
+; CHECK-NEXT: lsr x0, x8, x9
; CHECK-NEXT: ret
%shifted = lshr i64 %val, %numskipbits
%numhighbits = sub i64 64, %numlowbits
@@ -737,11 +727,10 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind {
; CHECK-LABEL: bextr64_32_c1:
; CHECK: // %bb.0:
-; CHECK-NEXT: neg w8, w2
-; CHECK-NEXT: mov w9, #-1 // =0xffffffff
-; CHECK-NEXT: lsr x10, x0, x1
-; CHECK-NEXT: lsr w8, w9, w8
-; CHECK-NEXT: and w0, w8, w10
+; CHECK-NEXT: lsr x8, x0, x1
+; CHECK-NEXT: neg w9, w2
+; CHECK-NEXT: lsl w8, w8, w9
+; CHECK-NEXT: lsr w0, w8, w9
; CHECK-NEXT: ret
%shifted = lshr i64 %val, %numskipbits
%truncshifted = trunc i64 %shifted to i32
@@ -756,11 +745,10 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind {
; CHECK-LABEL: bextr64_32_c2:
; CHECK: // %bb.0:
-; CHECK-NEXT: neg w8, w2
-; CHECK-NEXT: mov w9, #-1 // =0xffffffff
-; CHECK-NEXT: lsr x10, x0, x1
-; CHECK-NEXT: lsr w8, w9, w8
-; CHECK-NEXT: and w0, w8, w10
+; CHECK-NEXT: lsr x8, x0, x1
+; CHECK-NEXT: neg w9, w2
+; CHECK-NEXT: lsl w8, w8, w9
+; CHECK-NEXT: lsr w0, w8, w9
; CHECK-NEXT: ret
%shifted = lshr i64 %val, %numskipbits
%numhighbits = sub i32 32, %numlowbits
diff --git a/llvm/test/CodeGen/AArch64/extract-lowbits.ll b/llvm/test/CodeGen/AArch64/extract-lowbits.ll
index 4b8f3e86b5fef..368440c65df84 100644
--- a/llvm/test/CodeGen/AArch64/extract-lowbits.ll
+++ b/llvm/test/CodeGen/AArch64/extract-lowbits.ll
@@ -347,10 +347,9 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
; CHECK-LABEL: bzhi32_c0:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: neg w9, w1
-; CHECK-NEXT: lsr w8, w8, w9
-; CHECK-NEXT: and w0, w8, w0
+; CHECK-NEXT: neg w8, w1
+; CHECK-NEXT: lsl w9, w0, w8
+; CHECK-NEXT: lsr w0, w9, w8
; CHECK-NEXT: ret
%numhighbits = sub i32 32, %numlowbits
%mask = lshr i32 -1, %numhighbits
@@ -362,10 +361,9 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
; CHECK-LABEL: bzhi32_c1_indexzext:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #32 // =0x20
-; CHECK-NEXT: mov w9, #-1 // =0xffffffff
; CHECK-NEXT: sub w8, w8, w1
-; CHECK-NEXT: lsr w8, w9, w8
-; CHECK-NEXT: and w0, w8, w0
+; CHECK-NEXT: lsl w9, w0, w8
+; CHECK-NEXT: lsr w0, w9, w8
; CHECK-NEXT: ret
%numhighbits = sub i8 32, %numlowbits
%sh_prom = zext i8 %numhighbits to i32
@@ -377,11 +375,10 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
define i32 @bzhi32_c2_load(ptr %w, i32 %numlowbits) nounwind {
; CHECK-LABEL: bzhi32_c2_load:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
+; CHECK-NEXT: ldr w8, [x0]
; CHECK-NEXT: neg w9, w1
-; CHECK-NEXT: ldr w10, [x0]
-; CHECK-NEXT: lsr w8, w8, w9
-; CHECK-NEXT: and w0, w8, w10
+; CHECK-NEXT: lsl w8, w8, w9
+; CHECK-NEXT: lsr w0, w8, w9
; CHECK-NEXT: ret
%val = load i32, ptr %w
%numhighbits = sub i32 32, %numlowbits
@@ -394,11 +391,10 @@ define i32 @bzhi32_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
; CHECK-LABEL: bzhi32_c3_load_indexzext:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #32 // =0x20
-; CHECK-NEXT: mov w9, #-1 // =0xffffffff
-; CHECK-NEXT: ldr w10, [x0]
+; CHECK-NEXT: ldr w9, [x0]
; CHECK-NEXT: sub w8, w8, w1
-; CHECK-NEXT: lsr w8, w9, w8
-; CHECK-NEXT: and w0, w8, w10
+; CHECK-NEXT: lsl w9, w9, w8
+; CHECK-NEXT: lsr w0, w9, w8
; CHECK-NEXT: ret
%val = load i32, ptr %w
%numhighbits = sub i8 32, %numlowbits
@@ -411,10 +407,9 @@ define i32 @bzhi32_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
; CHECK-LABEL: bzhi32_c4_commutative:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: neg w9, w1
-; CHECK-NEXT: lsr w8, w8, w9
-; CHECK-NEXT: and w0, w0, w8
+; CHECK-NEXT: neg w8, w1
+; CHECK-NEXT: lsl w9, w0, w8
+; CHECK-NEXT: lsr w0, w9, w8
; CHECK-NEXT: ret
%numhighbits = sub i32 32, %numlowbits
%mask = lshr i32 -1, %numhighbits
@@ -427,10 +422,9 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
; CHECK-LABEL: bzhi64_c0:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: neg x9, x1
-; CHECK-NEXT: lsr x8, x8, x9
-; CHECK-NEXT: and x0, x8, x0
+; CHECK-NEXT: neg x8, x1
+; CHECK-NEXT: lsl x9, x0, x8
+; CHECK-NEXT: lsr x0, x9, x8
; CHECK-NEXT: ret
%numhighbits = sub i64 64, %numlowbits
%mask = lshr i64 -1, %numhighbits
@@ -442,10 +436,9 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
; CHECK-LABEL: bzhi64_c1_indexzext:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #64 // =0x40
-; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff
; CHECK-NEXT: sub w8, w8, w1
-; CHECK-NEXT: lsr x8, x9, x8
-; CHECK-NEXT: and x0, x8, x0
+; CHECK-NEXT: lsl x9, x0, x8
+; CHECK-NEXT: lsr x0, x9, x8
; CHECK-NEXT: ret
%numhighbits = sub i8 64, %numlowbits
%sh_prom = zext i8 %numhighbits to i64
@@ -457,11 +450,10 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
define i64 @bzhi64_c2_load(ptr %w, i64 %numlowbits) nounwind {
; CHECK-LABEL: bzhi64_c2_load:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: neg x9, x1
-; CHECK-NEXT: ldr x10, [x0]
-; CHECK-NEXT: lsr x8, x8, x9
-; CHECK-NEXT: and x0, x8, x10
+; CHECK-NEXT: lsl x8, x8, x9
+; CHECK-NEXT: lsr x0, x8, x9
; CHECK-NEXT: ret
%val = load i64, ptr %w
%numhighbits = sub i64 64, %numlowbits
@@ -474,11 +466,10 @@ define i64 @bzhi64_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
; CHECK-LABEL: bzhi64_c3_load_indexzext:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #64 // =0x40
-; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ldr x10, [x0]
+; CHECK-NEXT: ldr x9, [x0]
; CHECK-NEXT: sub w8, w8, w1
-; CHECK-NEXT: lsr x8, x9, x8
-; CHECK-NEXT: and x0, x8, x10
+; CHECK-NEXT: lsl x9, x9, x8
+; CHECK-NEXT: lsr x0, x9, x8
; CHECK-NEXT: ret
%val = load i64, ptr %w
%numhighbits = sub i8 64, %numlowbits
@@ -491,10 +482,9 @@ define i64 @bzhi64_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
; CHECK-LABEL: bzhi64_c4_commutative:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: neg x9, x1
-; CHECK-NEXT: lsr x8, x8, x9
-; CHECK-NEXT: and x0, x0, x8
+; CHECK-NEXT: neg x8, x1
+; CHECK-NEXT: lsl x9, x0, x8
+; CHECK-NEXT: lsr x0, x9, x8
; CHECK-NEXT: ret
%numhighbits = sub i64 64, %numlowbits
%mask = lshr i64 -1, %numhighbits
More information about the llvm-commits
mailing list