[llvm] [AArch64] shouldFoldMaskToVariableShiftPair should be true for scalars up to the biggest legal type (PR #158069)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 11 06:19:13 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: AZero13 (AZero13)
<details>
<summary>Changes</summary>
For AArch64, we want to do this up to 64-bits. Otherwise this results in bloated code.
---
Full diff: https://github.com/llvm/llvm-project/pull/158069.diff
4 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+10)
- (added) llvm/test/CodeGen/AArch64/and-mask-variable.ll (+80)
- (modified) llvm/test/CodeGen/AArch64/extract-bits.ll (+43-55)
- (modified) llvm/test/CodeGen/AArch64/extract-lowbits.ll (+28-38)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 46738365080f9..9665bc871b6b9 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -300,6 +300,16 @@ class AArch64TargetLowering : public TargetLowering {
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
CombineLevel Level) const override;
+ /// Return true if it is profitable to fold a pair of shifts into a mask.
+ bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override {
+ EVT VT = Y.getValueType();
+
+ if (VT.isVector())
+ return false;
+
+ return VT.getScalarSizeInBits() <= 64;
+ }
+
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT,
unsigned SelectOpcode, SDValue X,
SDValue Y) const override;
diff --git a/llvm/test/CodeGen/AArch64/and-mask-variable.ll b/llvm/test/CodeGen/AArch64/and-mask-variable.ll
new file mode 100644
index 0000000000000..f41cdc6dd241b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/and-mask-variable.ll
@@ -0,0 +1,80 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-none-elf -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-none-elf -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+define i32 @mask_pair(i32 %x, i32 %y) {
+; CHECK-SD-LABEL: mask_pair:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsr w8, w0, w1
+; CHECK-SD-NEXT: lsl w0, w8, w1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: mask_pair:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #-1 // =0xffffffff
+; CHECK-GI-NEXT: lsl w8, w8, w1
+; CHECK-GI-NEXT: and w0, w8, w0
+; CHECK-GI-NEXT: ret
+ %shl = shl nsw i32 -1, %y
+ %and = and i32 %shl, %x
+ ret i32 %and
+}
+
+define i64 @mask_pair_64(i64 %x, i64 %y) {
+; CHECK-SD-LABEL: mask_pair_64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: lsr x8, x0, x1
+; CHECK-SD-NEXT: lsl x0, x8, x1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: mask_pair_64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov x8, #-1 // =0xffffffffffffffff
+; CHECK-GI-NEXT: lsl x8, x8, x1
+; CHECK-GI-NEXT: and x0, x8, x0
+; CHECK-GI-NEXT: ret
+ %shl = shl nsw i64 -1, %y
+ %and = and i64 %shl, %x
+ ret i64 %and
+}
+
+define i128 @mask_pair_128(i128 %x, i128 %y) {
+; CHECK-SD-LABEL: mask_pair_128:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov x8, #-1 // =0xffffffffffffffff
+; CHECK-SD-NEXT: mvn w9, w2
+; CHECK-SD-NEXT: mov x10, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-SD-NEXT: lsl x8, x8, x2
+; CHECK-SD-NEXT: lsr x9, x10, x9
+; CHECK-SD-NEXT: tst x2, #0x40
+; CHECK-SD-NEXT: orr x9, x8, x9
+; CHECK-SD-NEXT: csel x9, x8, x9, ne
+; CHECK-SD-NEXT: csel x8, xzr, x8, ne
+; CHECK-SD-NEXT: and x0, x8, x0
+; CHECK-SD-NEXT: and x1, x9, x1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: mask_pair_128:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #64 // =0x40
+; CHECK-GI-NEXT: mov x9, #-1 // =0xffffffffffffffff
+; CHECK-GI-NEXT: sub x10, x2, #64
+; CHECK-GI-NEXT: sub x8, x8, x2
+; CHECK-GI-NEXT: lsl x11, x9, x2
+; CHECK-GI-NEXT: cmp x2, #64
+; CHECK-GI-NEXT: lsr x8, x9, x8
+; CHECK-GI-NEXT: lsl x9, x9, x10
+; CHECK-GI-NEXT: csel x10, x11, xzr, lo
+; CHECK-GI-NEXT: orr x8, x8, x11
+; CHECK-GI-NEXT: and x0, x10, x0
+; CHECK-GI-NEXT: csel x8, x8, x9, lo
+; CHECK-GI-NEXT: cmp x2, #0
+; CHECK-GI-NEXT: csinv x8, x8, xzr, ne
+; CHECK-GI-NEXT: and x1, x8, x1
+; CHECK-GI-NEXT: ret
+ %shl = shl nsw i128 -1, %y
+ %and = and i128 %shl, %x
+ ret i128 %and
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/extract-bits.ll b/llvm/test/CodeGen/AArch64/extract-bits.ll
index 8e822d19a19b9..5a96116142b51 100644
--- a/llvm/test/CodeGen/AArch64/extract-bits.ll
+++ b/llvm/test/CodeGen/AArch64/extract-bits.ll
@@ -532,11 +532,10 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
; CHECK-LABEL: bextr32_c0:
; CHECK: // %bb.0:
-; CHECK-NEXT: neg w8, w2
-; CHECK-NEXT: mov w9, #-1 // =0xffffffff
-; CHECK-NEXT: lsr w10, w0, w1
-; CHECK-NEXT: lsr w8, w9, w8
-; CHECK-NEXT: and w0, w8, w10
+; CHECK-NEXT: lsr w8, w0, w1
+; CHECK-NEXT: neg w9, w2
+; CHECK-NEXT: lsl w8, w8, w9
+; CHECK-NEXT: lsr w0, w8, w9
; CHECK-NEXT: ret
%shifted = lshr i32 %val, %numskipbits
%numhighbits = sub i32 32, %numlowbits
@@ -548,12 +547,11 @@ define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
; CHECK-LABEL: bextr32_c1_indexzext:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #32 // =0x20
-; CHECK-NEXT: mov w9, #-1 // =0xffffffff
-; CHECK-NEXT: lsr w10, w0, w1
-; CHECK-NEXT: sub w8, w8, w2
-; CHECK-NEXT: lsr w8, w9, w8
-; CHECK-NEXT: and w0, w8, w10
+; CHECK-NEXT: lsr w8, w0, w1
+; CHECK-NEXT: mov w9, #32 // =0x20
+; CHECK-NEXT: sub w9, w9, w2
+; CHECK-NEXT: lsl w8, w8, w9
+; CHECK-NEXT: lsr w0, w8, w9
; CHECK-NEXT: ret
%skip = zext i8 %numskipbits to i32
%shifted = lshr i32 %val, %skip
@@ -569,10 +567,9 @@ define i32 @bextr32_c2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind
; CHECK: // %bb.0:
; CHECK-NEXT: ldr w8, [x0]
; CHECK-NEXT: neg w9, w2
-; CHECK-NEXT: mov w10, #-1 // =0xffffffff
-; CHECK-NEXT: lsr w9, w10, w9
; CHECK-NEXT: lsr w8, w8, w1
-; CHECK-NEXT: and w0, w9, w8
+; CHECK-NEXT: lsl w8, w8, w9
+; CHECK-NEXT: lsr w0, w8, w9
; CHECK-NEXT: ret
%val = load i32, ptr %w
%shifted = lshr i32 %val, %numskipbits
@@ -587,11 +584,10 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
; CHECK: // %bb.0:
; CHECK-NEXT: ldr w8, [x0]
; CHECK-NEXT: mov w9, #32 // =0x20
-; CHECK-NEXT: mov w10, #-1 // =0xffffffff
; CHECK-NEXT: sub w9, w9, w2
; CHECK-NEXT: lsr w8, w8, w1
-; CHECK-NEXT: lsr w9, w10, w9
-; CHECK-NEXT: and w0, w9, w8
+; CHECK-NEXT: lsl w8, w8, w9
+; CHECK-NEXT: lsr w0, w8, w9
; CHECK-NEXT: ret
%val = load i32, ptr %w
%skip = zext i8 %numskipbits to i32
@@ -606,11 +602,10 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
; CHECK-LABEL: bextr32_c4_commutative:
; CHECK: // %bb.0:
-; CHECK-NEXT: neg w8, w2
-; CHECK-NEXT: mov w9, #-1 // =0xffffffff
-; CHECK-NEXT: lsr w10, w0, w1
-; CHECK-NEXT: lsr w8, w9, w8
-; CHECK-NEXT: and w0, w10, w8
+; CHECK-NEXT: lsr w8, w0, w1
+; CHECK-NEXT: neg w9, w2
+; CHECK-NEXT: lsl w8, w8, w9
+; CHECK-NEXT: lsr w0, w8, w9
; CHECK-NEXT: ret
%shifted = lshr i32 %val, %numskipbits
%numhighbits = sub i32 32, %numlowbits
@@ -624,11 +619,10 @@ define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits)
define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
; CHECK-LABEL: bextr64_c0:
; CHECK: // %bb.0:
-; CHECK-NEXT: neg x8, x2
-; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: lsr x10, x0, x1
-; CHECK-NEXT: lsr x8, x9, x8
-; CHECK-NEXT: and x0, x8, x10
+; CHECK-NEXT: lsr x8, x0, x1
+; CHECK-NEXT: neg x9, x2
+; CHECK-NEXT: lsl x8, x8, x9
+; CHECK-NEXT: lsr x0, x8, x9
; CHECK-NEXT: ret
%shifted = lshr i64 %val, %numskipbits
%numhighbits = sub i64 64, %numlowbits
@@ -640,13 +634,12 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
; CHECK-LABEL: bextr64_c1_indexzext:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #64 // =0x40
-; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: lsr x10, x0, x1
-; CHECK-NEXT: sub w8, w8, w2
-; CHECK-NEXT: lsr x8, x9, x8
-; CHECK-NEXT: and x0, x8, x10
+; CHECK-NEXT: lsr x8, x0, x1
+; CHECK-NEXT: mov w9, #64 // =0x40
+; CHECK-NEXT: sub w9, w9, w2
+; CHECK-NEXT: lsl x8, x8, x9
+; CHECK-NEXT: lsr x0, x8, x9
; CHECK-NEXT: ret
%skip = zext i8 %numskipbits to i64
%shifted = lshr i64 %val, %skip
@@ -662,10 +655,9 @@ define i64 @bextr64_c2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind
; CHECK: // %bb.0:
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: neg x9, x2
-; CHECK-NEXT: mov x10, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: lsr x9, x10, x9
; CHECK-NEXT: lsr x8, x8, x1
-; CHECK-NEXT: and x0, x9, x8
+; CHECK-NEXT: lsl x8, x8, x9
+; CHECK-NEXT: lsr x0, x8, x9
; CHECK-NEXT: ret
%val = load i64, ptr %w
%shifted = lshr i64 %val, %numskipbits
@@ -679,13 +671,12 @@ define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
; CHECK-LABEL: bextr64_c3_load_indexzext:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr x8, [x0]
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: mov w9, #64 // =0x40
-; CHECK-NEXT: mov x10, #-1 // =0xffffffffffffffff
; CHECK-NEXT: sub w9, w9, w2
-; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: lsr x8, x8, x1
-; CHECK-NEXT: lsr x9, x10, x9
-; CHECK-NEXT: and x0, x9, x8
+; CHECK-NEXT: lsl x8, x8, x9
+; CHECK-NEXT: lsr x0, x8, x9
; CHECK-NEXT: ret
%val = load i64, ptr %w
%skip = zext i8 %numskipbits to i64
@@ -700,11 +691,10 @@ define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
; CHECK-LABEL: bextr64_c4_commutative:
; CHECK: // %bb.0:
-; CHECK-NEXT: neg x8, x2
-; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: lsr x10, x0, x1
-; CHECK-NEXT: lsr x8, x9, x8
-; CHECK-NEXT: and x0, x10, x8
+; CHECK-NEXT: lsr x8, x0, x1
+; CHECK-NEXT: neg x9, x2
+; CHECK-NEXT: lsl x8, x8, x9
+; CHECK-NEXT: lsr x0, x8, x9
; CHECK-NEXT: ret
%shifted = lshr i64 %val, %numskipbits
%numhighbits = sub i64 64, %numlowbits
@@ -737,11 +727,10 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind {
; CHECK-LABEL: bextr64_32_c1:
; CHECK: // %bb.0:
-; CHECK-NEXT: neg w8, w2
-; CHECK-NEXT: mov w9, #-1 // =0xffffffff
-; CHECK-NEXT: lsr x10, x0, x1
-; CHECK-NEXT: lsr w8, w9, w8
-; CHECK-NEXT: and w0, w8, w10
+; CHECK-NEXT: lsr x8, x0, x1
+; CHECK-NEXT: neg w9, w2
+; CHECK-NEXT: lsl w8, w8, w9
+; CHECK-NEXT: lsr w0, w8, w9
; CHECK-NEXT: ret
%shifted = lshr i64 %val, %numskipbits
%truncshifted = trunc i64 %shifted to i32
@@ -756,11 +745,10 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind {
; CHECK-LABEL: bextr64_32_c2:
; CHECK: // %bb.0:
-; CHECK-NEXT: neg w8, w2
-; CHECK-NEXT: mov w9, #-1 // =0xffffffff
-; CHECK-NEXT: lsr x10, x0, x1
-; CHECK-NEXT: lsr w8, w9, w8
-; CHECK-NEXT: and w0, w8, w10
+; CHECK-NEXT: lsr x8, x0, x1
+; CHECK-NEXT: neg w9, w2
+; CHECK-NEXT: lsl w8, w8, w9
+; CHECK-NEXT: lsr w0, w8, w9
; CHECK-NEXT: ret
%shifted = lshr i64 %val, %numskipbits
%numhighbits = sub i32 32, %numlowbits
diff --git a/llvm/test/CodeGen/AArch64/extract-lowbits.ll b/llvm/test/CodeGen/AArch64/extract-lowbits.ll
index 4b8f3e86b5fef..368440c65df84 100644
--- a/llvm/test/CodeGen/AArch64/extract-lowbits.ll
+++ b/llvm/test/CodeGen/AArch64/extract-lowbits.ll
@@ -347,10 +347,9 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
; CHECK-LABEL: bzhi32_c0:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: neg w9, w1
-; CHECK-NEXT: lsr w8, w8, w9
-; CHECK-NEXT: and w0, w8, w0
+; CHECK-NEXT: neg w8, w1
+; CHECK-NEXT: lsl w9, w0, w8
+; CHECK-NEXT: lsr w0, w9, w8
; CHECK-NEXT: ret
%numhighbits = sub i32 32, %numlowbits
%mask = lshr i32 -1, %numhighbits
@@ -362,10 +361,9 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
; CHECK-LABEL: bzhi32_c1_indexzext:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #32 // =0x20
-; CHECK-NEXT: mov w9, #-1 // =0xffffffff
; CHECK-NEXT: sub w8, w8, w1
-; CHECK-NEXT: lsr w8, w9, w8
-; CHECK-NEXT: and w0, w8, w0
+; CHECK-NEXT: lsl w9, w0, w8
+; CHECK-NEXT: lsr w0, w9, w8
; CHECK-NEXT: ret
%numhighbits = sub i8 32, %numlowbits
%sh_prom = zext i8 %numhighbits to i32
@@ -377,11 +375,10 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
define i32 @bzhi32_c2_load(ptr %w, i32 %numlowbits) nounwind {
; CHECK-LABEL: bzhi32_c2_load:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
+; CHECK-NEXT: ldr w8, [x0]
; CHECK-NEXT: neg w9, w1
-; CHECK-NEXT: ldr w10, [x0]
-; CHECK-NEXT: lsr w8, w8, w9
-; CHECK-NEXT: and w0, w8, w10
+; CHECK-NEXT: lsl w8, w8, w9
+; CHECK-NEXT: lsr w0, w8, w9
; CHECK-NEXT: ret
%val = load i32, ptr %w
%numhighbits = sub i32 32, %numlowbits
@@ -394,11 +391,10 @@ define i32 @bzhi32_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
; CHECK-LABEL: bzhi32_c3_load_indexzext:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #32 // =0x20
-; CHECK-NEXT: mov w9, #-1 // =0xffffffff
-; CHECK-NEXT: ldr w10, [x0]
+; CHECK-NEXT: ldr w9, [x0]
; CHECK-NEXT: sub w8, w8, w1
-; CHECK-NEXT: lsr w8, w9, w8
-; CHECK-NEXT: and w0, w8, w10
+; CHECK-NEXT: lsl w9, w9, w8
+; CHECK-NEXT: lsr w0, w9, w8
; CHECK-NEXT: ret
%val = load i32, ptr %w
%numhighbits = sub i8 32, %numlowbits
@@ -411,10 +407,9 @@ define i32 @bzhi32_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
; CHECK-LABEL: bzhi32_c4_commutative:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: neg w9, w1
-; CHECK-NEXT: lsr w8, w8, w9
-; CHECK-NEXT: and w0, w0, w8
+; CHECK-NEXT: neg w8, w1
+; CHECK-NEXT: lsl w9, w0, w8
+; CHECK-NEXT: lsr w0, w9, w8
; CHECK-NEXT: ret
%numhighbits = sub i32 32, %numlowbits
%mask = lshr i32 -1, %numhighbits
@@ -427,10 +422,9 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
; CHECK-LABEL: bzhi64_c0:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: neg x9, x1
-; CHECK-NEXT: lsr x8, x8, x9
-; CHECK-NEXT: and x0, x8, x0
+; CHECK-NEXT: neg x8, x1
+; CHECK-NEXT: lsl x9, x0, x8
+; CHECK-NEXT: lsr x0, x9, x8
; CHECK-NEXT: ret
%numhighbits = sub i64 64, %numlowbits
%mask = lshr i64 -1, %numhighbits
@@ -442,10 +436,9 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
; CHECK-LABEL: bzhi64_c1_indexzext:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #64 // =0x40
-; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff
; CHECK-NEXT: sub w8, w8, w1
-; CHECK-NEXT: lsr x8, x9, x8
-; CHECK-NEXT: and x0, x8, x0
+; CHECK-NEXT: lsl x9, x0, x8
+; CHECK-NEXT: lsr x0, x9, x8
; CHECK-NEXT: ret
%numhighbits = sub i8 64, %numlowbits
%sh_prom = zext i8 %numhighbits to i64
@@ -457,11 +450,10 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
define i64 @bzhi64_c2_load(ptr %w, i64 %numlowbits) nounwind {
; CHECK-LABEL: bzhi64_c2_load:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: neg x9, x1
-; CHECK-NEXT: ldr x10, [x0]
-; CHECK-NEXT: lsr x8, x8, x9
-; CHECK-NEXT: and x0, x8, x10
+; CHECK-NEXT: lsl x8, x8, x9
+; CHECK-NEXT: lsr x0, x8, x9
; CHECK-NEXT: ret
%val = load i64, ptr %w
%numhighbits = sub i64 64, %numlowbits
@@ -474,11 +466,10 @@ define i64 @bzhi64_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
; CHECK-LABEL: bzhi64_c3_load_indexzext:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #64 // =0x40
-; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ldr x10, [x0]
+; CHECK-NEXT: ldr x9, [x0]
; CHECK-NEXT: sub w8, w8, w1
-; CHECK-NEXT: lsr x8, x9, x8
-; CHECK-NEXT: and x0, x8, x10
+; CHECK-NEXT: lsl x9, x9, x8
+; CHECK-NEXT: lsr x0, x9, x8
; CHECK-NEXT: ret
%val = load i64, ptr %w
%numhighbits = sub i8 64, %numlowbits
@@ -491,10 +482,9 @@ define i64 @bzhi64_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
; CHECK-LABEL: bzhi64_c4_commutative:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: neg x9, x1
-; CHECK-NEXT: lsr x8, x8, x9
-; CHECK-NEXT: and x0, x0, x8
+; CHECK-NEXT: neg x8, x1
+; CHECK-NEXT: lsl x9, x0, x8
+; CHECK-NEXT: lsr x0, x9, x8
; CHECK-NEXT: ret
%numhighbits = sub i64 64, %numlowbits
%mask = lshr i64 -1, %numhighbits
``````````
</details>
https://github.com/llvm/llvm-project/pull/158069
More information about the llvm-commits
mailing list