[llvm] [AArch64] Combine and and lsl into ubfiz (PR #118974)
Cullen Rhodes via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 13 04:35:00 PST 2024
https://github.com/c-rhodes updated https://github.com/llvm/llvm-project/pull/118974
>From 4cbde56129cc14c5a776a70efcfc6a8899b68127 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes at arm.com>
Date: Fri, 6 Dec 2024 13:46:58 +0000
Subject: [PATCH 1/2] [AArch64] Combine and and lsl into ubfiz
Fixes #118132.
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 9 +++++++++
.../test/CodeGen/AArch64/aarch64-fold-lslfast.ll | 10 ++++------
llvm/test/CodeGen/AArch64/xbfiz.ll | 16 ++++++++++++++++
3 files changed, 29 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 7614f6215b803c..9f980615caff5a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -8989,6 +8989,15 @@ def : Pat<(shl (i64 (zext GPR32:$Rn)), (i64 imm0_63:$imm)),
(i64 (i64shift_a imm0_63:$imm)),
(i64 (i64shift_sext_i32 imm0_63:$imm)))>;
+def : Pat<(shl (i64 (and (i64 (anyext GPR32:$Rn)), 0xff)), (i64 imm0_63:$imm)),
+ (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
+ (i64 (i64shift_a imm0_63:$imm)),
+ (i64 (i64shift_sext_i8 imm0_63:$imm)))>;
+def : Pat<(shl (i64 (and (i64 (anyext GPR32:$Rn)), 0xffff)), (i64 imm0_63:$imm)),
+ (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
+ (i64 (i64shift_a imm0_63:$imm)),
+ (i64 (i64shift_sext_i16 imm0_63:$imm)))>;
+
// sra patterns have an AddedComplexity of 10, so make sure we have a higher
// AddedComplexity for the following patterns since we want to match sext + sra
// patterns before we attempt to match a single sra node.
diff --git a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
index 63dcafed2320a0..abc5c0876e80b7 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
@@ -13,11 +13,10 @@ define i16 @halfword(ptr %ctx, i32 %xor72) nounwind {
; CHECK0-SDAG-LABEL: halfword:
; CHECK0-SDAG: // %bb.0:
; CHECK0-SDAG-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
-; CHECK0-SDAG-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK0-SDAG-NEXT: ubfx x8, x1, #9, #8
+; CHECK0-SDAG-NEXT: lsr w8, w1, #9
; CHECK0-SDAG-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
; CHECK0-SDAG-NEXT: mov x19, x0
-; CHECK0-SDAG-NEXT: lsl x21, x8, #1
+; CHECK0-SDAG-NEXT: ubfiz x21, x8, #1, #8
; CHECK0-SDAG-NEXT: ldrh w20, [x0, x21]
; CHECK0-SDAG-NEXT: bl foo
; CHECK0-SDAG-NEXT: mov w0, w20
@@ -231,10 +230,9 @@ define i16 @multi_use_half_word(ptr %ctx, i32 %xor72) {
; CHECK0-SDAG-NEXT: .cfi_offset w21, -24
; CHECK0-SDAG-NEXT: .cfi_offset w22, -32
; CHECK0-SDAG-NEXT: .cfi_offset w30, -48
-; CHECK0-SDAG-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK0-SDAG-NEXT: ubfx x8, x1, #9, #8
+; CHECK0-SDAG-NEXT: lsr w8, w1, #9
; CHECK0-SDAG-NEXT: mov x19, x0
-; CHECK0-SDAG-NEXT: lsl x21, x8, #1
+; CHECK0-SDAG-NEXT: ubfiz x21, x8, #1, #8
; CHECK0-SDAG-NEXT: ldrh w20, [x0, x21]
; CHECK0-SDAG-NEXT: add w22, w20, #1
; CHECK0-SDAG-NEXT: bl foo
diff --git a/llvm/test/CodeGen/AArch64/xbfiz.ll b/llvm/test/CodeGen/AArch64/xbfiz.ll
index b777ddcb7efcc4..05567e34258402 100644
--- a/llvm/test/CodeGen/AArch64/xbfiz.ll
+++ b/llvm/test/CodeGen/AArch64/xbfiz.ll
@@ -69,3 +69,19 @@ define i64 @lsl32_not_ubfiz64(i64 %v) {
%and = and i64 %shl, 4294967295
ret i64 %and
}
+
+define i64 @lsl_zext_i8_i64(i8 %b) {
+; CHECK-LABEL: lsl_zext_i8_i64:
+; CHECK: ubfiz x0, x0, #1, #8
+ %1 = zext i8 %b to i64
+ %2 = shl i64 %1, 1
+ ret i64 %2
+}
+
+define i64 @lsl_zext_i16_i64(i16 %b) {
+; CHECK-LABEL: lsl_zext_i16_i64:
+; CHECK: ubfiz x0, x0, #1, #16
+ %1 = zext i16 %b to i64
+ %2 = shl i64 %1, 1
+ ret i64 %2
+}
>From 84762e46bd10a0b3c8e6b49c250f4f8ca8f22286 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes at arm.com>
Date: Fri, 13 Dec 2024 12:30:25 +0000
Subject: [PATCH 2/2] Move to target DAG-combine
---
.../Target/AArch64/AArch64ISelLowering.cpp | 34 ++++++
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 9 --
.../CodeGen/AArch64/aarch64-fold-lslfast.ll | 10 +-
.../AArch64/const-shift-of-constmasked.ll | 101 ++++++++----------
llvm/test/CodeGen/AArch64/extract-bits.ll | 16 +--
llvm/test/CodeGen/AArch64/fpenv.ll | 6 +-
.../CodeGen/AArch64/swap-compare-operands.ll | 42 +++++---
7 files changed, 123 insertions(+), 95 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e1be825fcf7bf3..41cb42881cb9f2 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1136,6 +1136,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
+ setTargetDAGCombine(ISD::SHL);
+
// In case of strict alignment, avoid an excessive number of byte wide stores.
MaxStoresPerMemsetOptSize = 8;
MaxStoresPerMemset =
@@ -26057,6 +26059,36 @@ performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
return NVCAST;
}
+static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ // If the operand is a bitwise AND with a constant RHS, and the shift is the
+ // only use, we can pull it out of the shift.
+ //
+ // (shl (and X, C1), C2) -> (and (shl X, C2), (shl C1, C2))
+ if (!Op0.hasOneUse() || Op0.getOpcode() != ISD::AND)
+ return SDValue();
+
+ ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
+ ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(Op1);
+ if (!C1 || !C2)
+ return SDValue();
+
+ // Might be folded into shifted add/sub, do not lower.
+ if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ADD ||
+ N->use_begin()->getOpcode() == ISD::SUB))
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue NewRHS = DAG.getNode(ISD::SHL, DL, VT, Op0.getOperand(1), Op1);
+ SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, Op0->getOperand(0), Op1);
+ return DAG.getNode(ISD::AND, DL, VT, NewShift, NewRHS);
+}
+
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -26402,6 +26434,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performCTLZCombine(N, DAG, Subtarget);
case ISD::SCALAR_TO_VECTOR:
return performScalarToVectorCombine(N, DCI, DAG);
+ case ISD::SHL:
+ return performSHLCombine(N, DAG);
}
return SDValue();
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 9f980615caff5a..7614f6215b803c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -8989,15 +8989,6 @@ def : Pat<(shl (i64 (zext GPR32:$Rn)), (i64 imm0_63:$imm)),
(i64 (i64shift_a imm0_63:$imm)),
(i64 (i64shift_sext_i32 imm0_63:$imm)))>;
-def : Pat<(shl (i64 (and (i64 (anyext GPR32:$Rn)), 0xff)), (i64 imm0_63:$imm)),
- (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
- (i64 (i64shift_a imm0_63:$imm)),
- (i64 (i64shift_sext_i8 imm0_63:$imm)))>;
-def : Pat<(shl (i64 (and (i64 (anyext GPR32:$Rn)), 0xffff)), (i64 imm0_63:$imm)),
- (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
- (i64 (i64shift_a imm0_63:$imm)),
- (i64 (i64shift_sext_i16 imm0_63:$imm)))>;
-
// sra patterns have an AddedComplexity of 10, so make sure we have a higher
// AddedComplexity for the following patterns since we want to match sext + sra
// patterns before we attempt to match a single sra node.
diff --git a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
index abc5c0876e80b7..63dcafed2320a0 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
@@ -13,10 +13,11 @@ define i16 @halfword(ptr %ctx, i32 %xor72) nounwind {
; CHECK0-SDAG-LABEL: halfword:
; CHECK0-SDAG: // %bb.0:
; CHECK0-SDAG-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
-; CHECK0-SDAG-NEXT: lsr w8, w1, #9
+; CHECK0-SDAG-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK0-SDAG-NEXT: ubfx x8, x1, #9, #8
; CHECK0-SDAG-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
; CHECK0-SDAG-NEXT: mov x19, x0
-; CHECK0-SDAG-NEXT: ubfiz x21, x8, #1, #8
+; CHECK0-SDAG-NEXT: lsl x21, x8, #1
; CHECK0-SDAG-NEXT: ldrh w20, [x0, x21]
; CHECK0-SDAG-NEXT: bl foo
; CHECK0-SDAG-NEXT: mov w0, w20
@@ -230,9 +231,10 @@ define i16 @multi_use_half_word(ptr %ctx, i32 %xor72) {
; CHECK0-SDAG-NEXT: .cfi_offset w21, -24
; CHECK0-SDAG-NEXT: .cfi_offset w22, -32
; CHECK0-SDAG-NEXT: .cfi_offset w30, -48
-; CHECK0-SDAG-NEXT: lsr w8, w1, #9
+; CHECK0-SDAG-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK0-SDAG-NEXT: ubfx x8, x1, #9, #8
; CHECK0-SDAG-NEXT: mov x19, x0
-; CHECK0-SDAG-NEXT: ubfiz x21, x8, #1, #8
+; CHECK0-SDAG-NEXT: lsl x21, x8, #1
; CHECK0-SDAG-NEXT: ldrh w20, [x0, x21]
; CHECK0-SDAG-NEXT: add w22, w20, #1
; CHECK0-SDAG-NEXT: bl foo
diff --git a/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll b/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll
index 66a6745cda8f76..1fffcdda4b4167 100644
--- a/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll
+++ b/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll
@@ -190,8 +190,7 @@ define i8 @test_i8_224_mask_ashr_6(i8 %a0) {
define i8 @test_i8_7_mask_shl_1(i8 %a0) {
; CHECK-LABEL: test_i8_7_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7
-; CHECK-NEXT: lsl w0, w8, #1
+; CHECK-NEXT: ubfiz w0, w0, #1, #3
; CHECK-NEXT: ret
%t0 = and i8 %a0, 7
%t1 = shl i8 %t0, 1
@@ -200,8 +199,7 @@ define i8 @test_i8_7_mask_shl_1(i8 %a0) {
define i8 @test_i8_7_mask_shl_4(i8 %a0) {
; CHECK-LABEL: test_i8_7_mask_shl_4:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7
-; CHECK-NEXT: lsl w0, w8, #4
+; CHECK-NEXT: ubfiz w0, w0, #4, #3
; CHECK-NEXT: ret
%t0 = and i8 %a0, 7
%t1 = shl i8 %t0, 4
@@ -229,8 +227,8 @@ define i8 @test_i8_7_mask_shl_6(i8 %a0) {
define i8 @test_i8_28_mask_shl_1(i8 %a0) {
; CHECK-LABEL: test_i8_28_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x1c
-; CHECK-NEXT: lsl w0, w8, #1
+; CHECK-NEXT: lsl w8, w0, #1
+; CHECK-NEXT: and w0, w8, #0x38
; CHECK-NEXT: ret
%t0 = and i8 %a0, 28
%t1 = shl i8 %t0, 1
@@ -239,8 +237,8 @@ define i8 @test_i8_28_mask_shl_1(i8 %a0) {
define i8 @test_i8_28_mask_shl_2(i8 %a0) {
; CHECK-LABEL: test_i8_28_mask_shl_2:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x1c
-; CHECK-NEXT: lsl w0, w8, #2
+; CHECK-NEXT: lsl w8, w0, #2
+; CHECK-NEXT: and w0, w8, #0x70
; CHECK-NEXT: ret
%t0 = and i8 %a0, 28
%t1 = shl i8 %t0, 2
@@ -249,8 +247,8 @@ define i8 @test_i8_28_mask_shl_2(i8 %a0) {
define i8 @test_i8_28_mask_shl_3(i8 %a0) {
; CHECK-LABEL: test_i8_28_mask_shl_3:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x1c
-; CHECK-NEXT: lsl w0, w8, #3
+; CHECK-NEXT: lsl w8, w0, #3
+; CHECK-NEXT: and w0, w8, #0xe0
; CHECK-NEXT: ret
%t0 = and i8 %a0, 28
%t1 = shl i8 %t0, 3
@@ -259,8 +257,8 @@ define i8 @test_i8_28_mask_shl_3(i8 %a0) {
define i8 @test_i8_28_mask_shl_4(i8 %a0) {
; CHECK-LABEL: test_i8_28_mask_shl_4:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xc
-; CHECK-NEXT: lsl w0, w8, #4
+; CHECK-NEXT: lsl w8, w0, #4
+; CHECK-NEXT: and w0, w8, #0xc0
; CHECK-NEXT: ret
%t0 = and i8 %a0, 28
%t1 = shl i8 %t0, 4
@@ -270,8 +268,8 @@ define i8 @test_i8_28_mask_shl_4(i8 %a0) {
define i8 @test_i8_224_mask_shl_1(i8 %a0) {
; CHECK-LABEL: test_i8_224_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x60
-; CHECK-NEXT: lsl w0, w8, #1
+; CHECK-NEXT: lsl w8, w0, #1
+; CHECK-NEXT: and w0, w8, #0xc0
; CHECK-NEXT: ret
%t0 = and i8 %a0, 224
%t1 = shl i8 %t0, 1
@@ -465,8 +463,7 @@ define i16 @test_i16_65024_mask_ashr_10(i16 %a0) {
define i16 @test_i16_127_mask_shl_1(i16 %a0) {
; CHECK-LABEL: test_i16_127_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7f
-; CHECK-NEXT: lsl w0, w8, #1
+; CHECK-NEXT: ubfiz w0, w0, #1, #7
; CHECK-NEXT: ret
%t0 = and i16 %a0, 127
%t1 = shl i16 %t0, 1
@@ -475,8 +472,7 @@ define i16 @test_i16_127_mask_shl_1(i16 %a0) {
define i16 @test_i16_127_mask_shl_8(i16 %a0) {
; CHECK-LABEL: test_i16_127_mask_shl_8:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7f
-; CHECK-NEXT: lsl w0, w8, #8
+; CHECK-NEXT: ubfiz w0, w0, #8, #7
; CHECK-NEXT: ret
%t0 = and i16 %a0, 127
%t1 = shl i16 %t0, 8
@@ -504,8 +500,8 @@ define i16 @test_i16_127_mask_shl_10(i16 %a0) {
define i16 @test_i16_2032_mask_shl_3(i16 %a0) {
; CHECK-LABEL: test_i16_2032_mask_shl_3:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7f0
-; CHECK-NEXT: lsl w0, w8, #3
+; CHECK-NEXT: lsl w8, w0, #3
+; CHECK-NEXT: and w0, w8, #0x3f80
; CHECK-NEXT: ret
%t0 = and i16 %a0, 2032
%t1 = shl i16 %t0, 3
@@ -514,8 +510,8 @@ define i16 @test_i16_2032_mask_shl_3(i16 %a0) {
define i16 @test_i16_2032_mask_shl_4(i16 %a0) {
; CHECK-LABEL: test_i16_2032_mask_shl_4:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7f0
-; CHECK-NEXT: lsl w0, w8, #4
+; CHECK-NEXT: lsl w8, w0, #4
+; CHECK-NEXT: and w0, w8, #0x7f00
; CHECK-NEXT: ret
%t0 = and i16 %a0, 2032
%t1 = shl i16 %t0, 4
@@ -524,8 +520,8 @@ define i16 @test_i16_2032_mask_shl_4(i16 %a0) {
define i16 @test_i16_2032_mask_shl_5(i16 %a0) {
; CHECK-LABEL: test_i16_2032_mask_shl_5:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7f0
-; CHECK-NEXT: lsl w0, w8, #5
+; CHECK-NEXT: lsl w8, w0, #5
+; CHECK-NEXT: and w0, w8, #0xfe00
; CHECK-NEXT: ret
%t0 = and i16 %a0, 2032
%t1 = shl i16 %t0, 5
@@ -534,8 +530,8 @@ define i16 @test_i16_2032_mask_shl_5(i16 %a0) {
define i16 @test_i16_2032_mask_shl_6(i16 %a0) {
; CHECK-LABEL: test_i16_2032_mask_shl_6:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x3f0
-; CHECK-NEXT: lsl w0, w8, #6
+; CHECK-NEXT: lsl w8, w0, #6
+; CHECK-NEXT: and w0, w8, #0xfc00
; CHECK-NEXT: ret
%t0 = and i16 %a0, 2032
%t1 = shl i16 %t0, 6
@@ -545,8 +541,8 @@ define i16 @test_i16_2032_mask_shl_6(i16 %a0) {
define i16 @test_i16_65024_mask_shl_1(i16 %a0) {
; CHECK-LABEL: test_i16_65024_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7e00
-; CHECK-NEXT: lsl w0, w8, #1
+; CHECK-NEXT: lsl w8, w0, #1
+; CHECK-NEXT: and w0, w8, #0xfc00
; CHECK-NEXT: ret
%t0 = and i16 %a0, 65024
%t1 = shl i16 %t0, 1
@@ -740,8 +736,7 @@ define i32 @test_i32_4294836224_mask_ashr_18(i32 %a0) {
define i32 @test_i32_32767_mask_shl_1(i32 %a0) {
; CHECK-LABEL: test_i32_32767_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7fff
-; CHECK-NEXT: lsl w0, w8, #1
+; CHECK-NEXT: ubfiz w0, w0, #1, #15
; CHECK-NEXT: ret
%t0 = and i32 %a0, 32767
%t1 = shl i32 %t0, 1
@@ -750,8 +745,7 @@ define i32 @test_i32_32767_mask_shl_1(i32 %a0) {
define i32 @test_i32_32767_mask_shl_16(i32 %a0) {
; CHECK-LABEL: test_i32_32767_mask_shl_16:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7fff
-; CHECK-NEXT: lsl w0, w8, #16
+; CHECK-NEXT: ubfiz w0, w0, #16, #15
; CHECK-NEXT: ret
%t0 = and i32 %a0, 32767
%t1 = shl i32 %t0, 16
@@ -779,8 +773,8 @@ define i32 @test_i32_32767_mask_shl_18(i32 %a0) {
define i32 @test_i32_8388352_mask_shl_7(i32 %a0) {
; CHECK-LABEL: test_i32_8388352_mask_shl_7:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7fff00
-; CHECK-NEXT: lsl w0, w8, #7
+; CHECK-NEXT: lsl w8, w0, #7
+; CHECK-NEXT: and w0, w8, #0x3fff8000
; CHECK-NEXT: ret
%t0 = and i32 %a0, 8388352
%t1 = shl i32 %t0, 7
@@ -789,8 +783,8 @@ define i32 @test_i32_8388352_mask_shl_7(i32 %a0) {
define i32 @test_i32_8388352_mask_shl_8(i32 %a0) {
; CHECK-LABEL: test_i32_8388352_mask_shl_8:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7fff00
-; CHECK-NEXT: lsl w0, w8, #8
+; CHECK-NEXT: lsl w8, w0, #8
+; CHECK-NEXT: and w0, w8, #0x7fff0000
; CHECK-NEXT: ret
%t0 = and i32 %a0, 8388352
%t1 = shl i32 %t0, 8
@@ -799,8 +793,8 @@ define i32 @test_i32_8388352_mask_shl_8(i32 %a0) {
define i32 @test_i32_8388352_mask_shl_9(i32 %a0) {
; CHECK-LABEL: test_i32_8388352_mask_shl_9:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7fff00
-; CHECK-NEXT: lsl w0, w8, #9
+; CHECK-NEXT: lsl w8, w0, #9
+; CHECK-NEXT: and w0, w8, #0xfffe0000
; CHECK-NEXT: ret
%t0 = and i32 %a0, 8388352
%t1 = shl i32 %t0, 9
@@ -809,8 +803,8 @@ define i32 @test_i32_8388352_mask_shl_9(i32 %a0) {
define i32 @test_i32_8388352_mask_shl_10(i32 %a0) {
; CHECK-LABEL: test_i32_8388352_mask_shl_10:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x3fff00
-; CHECK-NEXT: lsl w0, w8, #10
+; CHECK-NEXT: lsl w8, w0, #10
+; CHECK-NEXT: and w0, w8, #0xfffc0000
; CHECK-NEXT: ret
%t0 = and i32 %a0, 8388352
%t1 = shl i32 %t0, 10
@@ -820,8 +814,8 @@ define i32 @test_i32_8388352_mask_shl_10(i32 %a0) {
define i32 @test_i32_4294836224_mask_shl_1(i32 %a0) {
; CHECK-LABEL: test_i32_4294836224_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7ffe0000
-; CHECK-NEXT: lsl w0, w8, #1
+; CHECK-NEXT: lsl w8, w0, #1
+; CHECK-NEXT: and w0, w8, #0xfffc0000
; CHECK-NEXT: ret
%t0 = and i32 %a0, 4294836224
%t1 = shl i32 %t0, 1
@@ -1015,8 +1009,7 @@ define i64 @test_i64_18446744065119617024_mask_ashr_34(i64 %a0) {
define i64 @test_i64_2147483647_mask_shl_1(i64 %a0) {
; CHECK-LABEL: test_i64_2147483647_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and x8, x0, #0x7fffffff
-; CHECK-NEXT: lsl x0, x8, #1
+; CHECK-NEXT: lsl w0, w0, #1
; CHECK-NEXT: ret
%t0 = and i64 %a0, 2147483647
%t1 = shl i64 %t0, 1
@@ -1054,8 +1047,8 @@ define i64 @test_i64_2147483647_mask_shl_34(i64 %a0) {
define i64 @test_i64_140737488289792_mask_shl_15(i64 %a0) {
; CHECK-LABEL: test_i64_140737488289792_mask_shl_15:
; CHECK: // %bb.0:
-; CHECK-NEXT: and x8, x0, #0x7fffffff0000
-; CHECK-NEXT: lsl x0, x8, #15
+; CHECK-NEXT: lsl x8, x0, #15
+; CHECK-NEXT: and x0, x8, #0x3fffffff80000000
; CHECK-NEXT: ret
%t0 = and i64 %a0, 140737488289792
%t1 = shl i64 %t0, 15
@@ -1064,8 +1057,8 @@ define i64 @test_i64_140737488289792_mask_shl_15(i64 %a0) {
define i64 @test_i64_140737488289792_mask_shl_16(i64 %a0) {
; CHECK-LABEL: test_i64_140737488289792_mask_shl_16:
; CHECK: // %bb.0:
-; CHECK-NEXT: and x8, x0, #0x7fffffff0000
-; CHECK-NEXT: lsl x0, x8, #16
+; CHECK-NEXT: lsl x8, x0, #16
+; CHECK-NEXT: and x0, x8, #0x7fffffff00000000
; CHECK-NEXT: ret
%t0 = and i64 %a0, 140737488289792
%t1 = shl i64 %t0, 16
@@ -1074,8 +1067,8 @@ define i64 @test_i64_140737488289792_mask_shl_16(i64 %a0) {
define i64 @test_i64_140737488289792_mask_shl_17(i64 %a0) {
; CHECK-LABEL: test_i64_140737488289792_mask_shl_17:
; CHECK: // %bb.0:
-; CHECK-NEXT: and x8, x0, #0x7fffffff0000
-; CHECK-NEXT: lsl x0, x8, #17
+; CHECK-NEXT: lsl x8, x0, #17
+; CHECK-NEXT: and x0, x8, #0xfffffffe00000000
; CHECK-NEXT: ret
%t0 = and i64 %a0, 140737488289792
%t1 = shl i64 %t0, 17
@@ -1084,8 +1077,8 @@ define i64 @test_i64_140737488289792_mask_shl_17(i64 %a0) {
define i64 @test_i64_140737488289792_mask_shl_18(i64 %a0) {
; CHECK-LABEL: test_i64_140737488289792_mask_shl_18:
; CHECK: // %bb.0:
-; CHECK-NEXT: and x8, x0, #0x3fffffff0000
-; CHECK-NEXT: lsl x0, x8, #18
+; CHECK-NEXT: lsl x8, x0, #18
+; CHECK-NEXT: and x0, x8, #0xfffffffc00000000
; CHECK-NEXT: ret
%t0 = and i64 %a0, 140737488289792
%t1 = shl i64 %t0, 18
@@ -1095,8 +1088,8 @@ define i64 @test_i64_140737488289792_mask_shl_18(i64 %a0) {
define i64 @test_i64_18446744065119617024_mask_shl_1(i64 %a0) {
; CHECK-LABEL: test_i64_18446744065119617024_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and x8, x0, #0x7ffffffe00000000
-; CHECK-NEXT: lsl x0, x8, #1
+; CHECK-NEXT: lsl x8, x0, #1
+; CHECK-NEXT: and x0, x8, #0xfffffffc00000000
; CHECK-NEXT: ret
%t0 = and i64 %a0, 18446744065119617024
%t1 = shl i64 %t0, 1
diff --git a/llvm/test/CodeGen/AArch64/extract-bits.ll b/llvm/test/CodeGen/AArch64/extract-bits.ll
index b87157a183835d..aaa6c7eb4a30f4 100644
--- a/llvm/test/CodeGen/AArch64/extract-bits.ll
+++ b/llvm/test/CodeGen/AArch64/extract-bits.ll
@@ -1013,8 +1013,8 @@ define i32 @c1_i32(i32 %arg) nounwind {
define i32 @c2_i32(i32 %arg) nounwind {
; CHECK-LABEL: c2_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ubfx w8, w0, #19, #10
-; CHECK-NEXT: lsl w0, w8, #2
+; CHECK-NEXT: lsr w8, w0, #17
+; CHECK-NEXT: and w0, w8, #0xffc
; CHECK-NEXT: ret
%tmp0 = lshr i32 %arg, 19
%tmp1 = and i32 %tmp0, 1023
@@ -1063,8 +1063,8 @@ define i64 @c1_i64(i64 %arg) nounwind {
define i64 @c2_i64(i64 %arg) nounwind {
; CHECK-LABEL: c2_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ubfx x8, x0, #51, #10
-; CHECK-NEXT: lsl x0, x8, #2
+; CHECK-NEXT: lsr x8, x0, #49
+; CHECK-NEXT: and x0, x8, #0xffc
; CHECK-NEXT: ret
%tmp0 = lshr i64 %arg, 51
%tmp1 = and i64 %tmp0, 1023
@@ -1120,8 +1120,8 @@ define void @c6_i32(i32 %arg, ptr %ptr) nounwind {
define void @c7_i32(i32 %arg, ptr %ptr) nounwind {
; CHECK-LABEL: c7_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ubfx w8, w0, #19, #10
-; CHECK-NEXT: lsl w8, w8, #2
+; CHECK-NEXT: lsr w8, w0, #17
+; CHECK-NEXT: and w8, w8, #0xffc
; CHECK-NEXT: str w8, [x1]
; CHECK-NEXT: ret
%tmp0 = lshr i32 %arg, 19
@@ -1163,8 +1163,8 @@ define void @c6_i64(i64 %arg, ptr %ptr) nounwind {
define void @c7_i64(i64 %arg, ptr %ptr) nounwind {
; CHECK-LABEL: c7_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ubfx x8, x0, #51, #10
-; CHECK-NEXT: lsl x8, x8, #2
+; CHECK-NEXT: lsr x8, x0, #49
+; CHECK-NEXT: and x8, x8, #0xffc
; CHECK-NEXT: str x8, [x1]
; CHECK-NEXT: ret
%tmp0 = lshr i64 %arg, 51
diff --git a/llvm/test/CodeGen/AArch64/fpenv.ll b/llvm/test/CodeGen/AArch64/fpenv.ll
index 3a307f7731037a..3351565d8dd89d 100644
--- a/llvm/test/CodeGen/AArch64/fpenv.ll
+++ b/llvm/test/CodeGen/AArch64/fpenv.ll
@@ -4,11 +4,11 @@
define void @func_set_rounding_dyn(i32 %rm) {
; CHECK-LABEL: func_set_rounding_dyn:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w9, w0, #1
+; CHECK-NEXT: lsl w9, w0, #22
; CHECK-NEXT: mrs x8, FPCR
-; CHECK-NEXT: and w9, w9, #0x3
; CHECK-NEXT: and x8, x8, #0xffffffffff3fffff
-; CHECK-NEXT: lsl w9, w9, #22
+; CHECK-NEXT: sub w9, w9, #1024, lsl #12 // =4194304
+; CHECK-NEXT: and w9, w9, #0xc00000
; CHECK-NEXT: orr x8, x8, x9
; CHECK-NEXT: msr FPCR, x8
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/swap-compare-operands.ll b/llvm/test/CodeGen/AArch64/swap-compare-operands.ll
index b106e15c23e30a..a45881f2034b21 100644
--- a/llvm/test/CodeGen/AArch64/swap-compare-operands.ll
+++ b/llvm/test/CodeGen/AArch64/swap-compare-operands.ll
@@ -133,8 +133,9 @@ entry:
define i1 @testSwapCmpWithShiftedZeroExtend16_64(i16 %a, i64 %b) {
; CHECK-LABEL: testSwapCmpWithShiftedZeroExtend16_64
-; CHECK: cmp x1, w0, uxth #2
-; CHECK-NEXT: cset w0, lo
+; CHECK: ubfiz x8, x0, #2, #16
+; CHECK: cmp x8, x1
+; CHECK-NEXT: cset w0, hi
entry:
%a64 = zext i16 %a to i64
%shl.0 = shl i64 %a64, 2
@@ -144,8 +145,9 @@ entry:
define i1 @testSwapCmpWithShiftedZeroExtend8_64(i8 %a, i64 %b) {
; CHECK-LABEL: testSwapCmpWithShiftedZeroExtend8_64
-; CHECK: cmp x1, w0, uxtb #4
-; CHECK-NEXT: cset w0, lo
+; CHECK: ubfiz x8, x0, #4, #8
+; CHECK: cmp x8, x1
+; CHECK-NEXT: cset w0, hi
entry:
%a64 = zext i8 %a to i64
%shl.2 = shl i64 %a64, 4
@@ -155,8 +157,9 @@ entry:
define i1 @testSwapCmpWithShiftedZeroExtend16_32(i16 %a, i32 %b) {
; CHECK-LABEL: testSwapCmpWithShiftedZeroExtend16_32
-; CHECK: cmp w1, w0, uxth #3
-; CHECK-NEXT: cset w0, lo
+; CHECK: ubfiz w8, w0, #3, #16
+; CHECK: cmp w8, w1
+; CHECK-NEXT: cset w0, hi
entry:
%a32 = zext i16 %a to i32
%shl = shl i32 %a32, 3
@@ -166,8 +169,9 @@ entry:
define i1 @testSwapCmpWithShiftedZeroExtend8_32(i8 %a, i32 %b) {
; CHECK-LABEL: testSwapCmpWithShiftedZeroExtend8_32
-; CHECK: cmp w1, w0, uxtb #4
-; CHECK-NEXT: cset w0, lo
+; CHECK: ubfiz w8, w0, #4, #8
+; CHECK: cmp w8, w1
+; CHECK-NEXT: cset w0, hi
entry:
%a32 = zext i8 %a to i32
%shl = shl i32 %a32, 4
@@ -177,9 +181,9 @@ entry:
define i1 @testSwapCmpWithTooLargeShiftedZeroExtend8_32(i8 %a, i32 %b) {
; CHECK-LABEL: testSwapCmpWithTooLargeShiftedZeroExtend8_32
-; CHECK: and [[REG:w[0-9]+]], w0, #0xff
-; CHECK: cmp w1, [[REG]], lsl #5
-; CHECK-NEXT: cset w0, lo
+; CHECK: ubfiz w8, w0, #5, #8
+; CHECK: cmp w8, w1
+; CHECK-NEXT: cset w0, hi
entry:
%a32 = zext i8 %a to i32
%shl = shl i32 %a32, 5
@@ -517,7 +521,8 @@ t1:
%shl1 = shl i64 %conv1, 4
%na1 = sub i64 0, %shl1
%cmp1 = icmp ne i64 %na1, %b64
-; CHECK: cmn x3, w1, uxth #4
+; CHECK: ubfiz x8, x1, #4, #16
+; CHECK: cmn x3, x8
br i1 %cmp1, label %t2, label %end
t2:
@@ -525,7 +530,8 @@ t2:
%shl2 = shl i64 %conv2, 3
%na2 = sub i64 0, %shl2
%cmp2 = icmp ne i64 %na2, %b64
-; CHECK: cmn x3, w2, uxtb #3
+; CHECK: ubfiz x8, x2, #3, #8
+; CHECK: cmn x3, x8
br i1 %cmp2, label %t3, label %end
t3:
@@ -533,7 +539,8 @@ t3:
%shl3 = shl i32 %conv3, 2
%na3 = sub i32 0, %shl3
%cmp3 = icmp ne i32 %na3, %b32
-; CHECK: cmn w4, w1, uxth #2
+; CHECK: ubfiz w8, w1, #2, #16
+; CHECK: cmn w4, w8
br i1 %cmp3, label %t4, label %end
t4:
@@ -541,7 +548,8 @@ t4:
%shl4 = shl i32 %conv4, 1
%na4 = sub i32 0, %shl4
%cmp4 = icmp ne i32 %na4, %b32
-; CHECK: cmn w4, w2, uxtb #1
+; CHECK: ubfiz w8, w2, #1, #8
+; CHECK: cmn w4, w8
br i1 %cmp4, label %t5, label %end
t5:
@@ -549,8 +557,8 @@ t5:
%shl5 = shl i32 %conv5, 5
%na5 = sub i32 0, %shl5
%cmp5 = icmp ne i32 %na5, %b32
-; CHECK: and [[REG:w[0-9]+]], w2, #0xff
-; CHECK: cmn w4, [[REG]], lsl #5
+; CHECK: ubfiz w8, w2, #5, #8
+; CHECK: cmn w4, w8
br i1 %cmp5, label %t6, label %end
t6:
More information about the llvm-commits
mailing list