[llvm] [AArch64] Combine and and lsl into ubfiz (PR #118974)
Cullen Rhodes via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 8 06:58:26 PST 2025
https://github.com/c-rhodes updated https://github.com/llvm/llvm-project/pull/118974
>From c41f6655d5c3e8fcf476fbb3c9181ae72dfda84f Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes at arm.com>
Date: Fri, 6 Dec 2024 13:46:58 +0000
Subject: [PATCH 1/6] [AArch64] Combine and and lsl into ubfiz
Fixes #118132.
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 9 +++++++++
.../test/CodeGen/AArch64/aarch64-fold-lslfast.ll | 10 ++++------
llvm/test/CodeGen/AArch64/xbfiz.ll | 16 ++++++++++++++++
3 files changed, 29 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index c6f5cdcd1d5fe7..6acac914dbbba6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -8968,6 +8968,15 @@ def : Pat<(shl (i64 (zext GPR32:$Rn)), (i64 imm0_63:$imm)),
(i64 (i64shift_a imm0_63:$imm)),
(i64 (i64shift_sext_i32 imm0_63:$imm)))>;
+def : Pat<(shl (i64 (and (i64 (anyext GPR32:$Rn)), 0xff)), (i64 imm0_63:$imm)),
+ (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
+ (i64 (i64shift_a imm0_63:$imm)),
+ (i64 (i64shift_sext_i8 imm0_63:$imm)))>;
+def : Pat<(shl (i64 (and (i64 (anyext GPR32:$Rn)), 0xffff)), (i64 imm0_63:$imm)),
+ (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
+ (i64 (i64shift_a imm0_63:$imm)),
+ (i64 (i64shift_sext_i16 imm0_63:$imm)))>;
+
// sra patterns have an AddedComplexity of 10, so make sure we have a higher
// AddedComplexity for the following patterns since we want to match sext + sra
// patterns before we attempt to match a single sra node.
diff --git a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
index 63dcafed2320a0..abc5c0876e80b7 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
@@ -13,11 +13,10 @@ define i16 @halfword(ptr %ctx, i32 %xor72) nounwind {
; CHECK0-SDAG-LABEL: halfword:
; CHECK0-SDAG: // %bb.0:
; CHECK0-SDAG-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
-; CHECK0-SDAG-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK0-SDAG-NEXT: ubfx x8, x1, #9, #8
+; CHECK0-SDAG-NEXT: lsr w8, w1, #9
; CHECK0-SDAG-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
; CHECK0-SDAG-NEXT: mov x19, x0
-; CHECK0-SDAG-NEXT: lsl x21, x8, #1
+; CHECK0-SDAG-NEXT: ubfiz x21, x8, #1, #8
; CHECK0-SDAG-NEXT: ldrh w20, [x0, x21]
; CHECK0-SDAG-NEXT: bl foo
; CHECK0-SDAG-NEXT: mov w0, w20
@@ -231,10 +230,9 @@ define i16 @multi_use_half_word(ptr %ctx, i32 %xor72) {
; CHECK0-SDAG-NEXT: .cfi_offset w21, -24
; CHECK0-SDAG-NEXT: .cfi_offset w22, -32
; CHECK0-SDAG-NEXT: .cfi_offset w30, -48
-; CHECK0-SDAG-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK0-SDAG-NEXT: ubfx x8, x1, #9, #8
+; CHECK0-SDAG-NEXT: lsr w8, w1, #9
; CHECK0-SDAG-NEXT: mov x19, x0
-; CHECK0-SDAG-NEXT: lsl x21, x8, #1
+; CHECK0-SDAG-NEXT: ubfiz x21, x8, #1, #8
; CHECK0-SDAG-NEXT: ldrh w20, [x0, x21]
; CHECK0-SDAG-NEXT: add w22, w20, #1
; CHECK0-SDAG-NEXT: bl foo
diff --git a/llvm/test/CodeGen/AArch64/xbfiz.ll b/llvm/test/CodeGen/AArch64/xbfiz.ll
index b777ddcb7efcc4..05567e34258402 100644
--- a/llvm/test/CodeGen/AArch64/xbfiz.ll
+++ b/llvm/test/CodeGen/AArch64/xbfiz.ll
@@ -69,3 +69,19 @@ define i64 @lsl32_not_ubfiz64(i64 %v) {
%and = and i64 %shl, 4294967295
ret i64 %and
}
+
+define i64 @lsl_zext_i8_i64(i8 %b) {
+; CHECK-LABEL: lsl_zext_i8_i64:
+; CHECK: ubfiz x0, x0, #1, #8
+ %1 = zext i8 %b to i64
+ %2 = shl i64 %1, 1
+ ret i64 %2
+}
+
+define i64 @lsl_zext_i16_i64(i16 %b) {
+; CHECK-LABEL: lsl_zext_i16_i64:
+; CHECK: ubfiz x0, x0, #1, #16
+ %1 = zext i16 %b to i64
+ %2 = shl i64 %1, 1
+ ret i64 %2
+}
>From 50993210203ac2070a3dbea27258c10629ef71e7 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes at arm.com>
Date: Fri, 13 Dec 2024 12:30:25 +0000
Subject: [PATCH 2/6] Move to target DAG-combine
---
.../Target/AArch64/AArch64ISelLowering.cpp | 34 ++++++
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 9 --
.../CodeGen/AArch64/aarch64-fold-lslfast.ll | 10 +-
.../AArch64/const-shift-of-constmasked.ll | 101 ++++++++----------
llvm/test/CodeGen/AArch64/extract-bits.ll | 16 +--
llvm/test/CodeGen/AArch64/fpenv.ll | 6 +-
.../CodeGen/AArch64/swap-compare-operands.ll | 42 +++++---
7 files changed, 123 insertions(+), 95 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 3ad2905ce52076..5c6b04d637b5c4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1140,6 +1140,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
+ setTargetDAGCombine(ISD::SHL);
+
// In case of strict alignment, avoid an excessive number of byte wide stores.
MaxStoresPerMemsetOptSize = 8;
MaxStoresPerMemset =
@@ -26365,6 +26367,36 @@ performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
return NVCAST;
}
+static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ // If the operand is a bitwise AND with a constant RHS, and the shift is the
+ // only use, we can pull it out of the shift.
+ //
+ // (shl (and X, C1), C2) -> (and (shl X, C2), (shl C1, C2))
+ if (!Op0.hasOneUse() || Op0.getOpcode() != ISD::AND)
+ return SDValue();
+
+ ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
+ ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(Op1);
+ if (!C1 || !C2)
+ return SDValue();
+
+ // Might be folded into shifted add/sub, do not lower.
+ if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ADD ||
+ N->use_begin()->getOpcode() == ISD::SUB))
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue NewRHS = DAG.getNode(ISD::SHL, DL, VT, Op0.getOperand(1), Op1);
+ SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, Op0->getOperand(0), Op1);
+ return DAG.getNode(ISD::AND, DL, VT, NewShift, NewRHS);
+}
+
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -26710,6 +26742,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performCTLZCombine(N, DAG, Subtarget);
case ISD::SCALAR_TO_VECTOR:
return performScalarToVectorCombine(N, DCI, DAG);
+ case ISD::SHL:
+ return performSHLCombine(N, DAG);
}
return SDValue();
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 6acac914dbbba6..c6f5cdcd1d5fe7 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -8968,15 +8968,6 @@ def : Pat<(shl (i64 (zext GPR32:$Rn)), (i64 imm0_63:$imm)),
(i64 (i64shift_a imm0_63:$imm)),
(i64 (i64shift_sext_i32 imm0_63:$imm)))>;
-def : Pat<(shl (i64 (and (i64 (anyext GPR32:$Rn)), 0xff)), (i64 imm0_63:$imm)),
- (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
- (i64 (i64shift_a imm0_63:$imm)),
- (i64 (i64shift_sext_i8 imm0_63:$imm)))>;
-def : Pat<(shl (i64 (and (i64 (anyext GPR32:$Rn)), 0xffff)), (i64 imm0_63:$imm)),
- (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
- (i64 (i64shift_a imm0_63:$imm)),
- (i64 (i64shift_sext_i16 imm0_63:$imm)))>;
-
// sra patterns have an AddedComplexity of 10, so make sure we have a higher
// AddedComplexity for the following patterns since we want to match sext + sra
// patterns before we attempt to match a single sra node.
diff --git a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
index abc5c0876e80b7..63dcafed2320a0 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
@@ -13,10 +13,11 @@ define i16 @halfword(ptr %ctx, i32 %xor72) nounwind {
; CHECK0-SDAG-LABEL: halfword:
; CHECK0-SDAG: // %bb.0:
; CHECK0-SDAG-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
-; CHECK0-SDAG-NEXT: lsr w8, w1, #9
+; CHECK0-SDAG-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK0-SDAG-NEXT: ubfx x8, x1, #9, #8
; CHECK0-SDAG-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
; CHECK0-SDAG-NEXT: mov x19, x0
-; CHECK0-SDAG-NEXT: ubfiz x21, x8, #1, #8
+; CHECK0-SDAG-NEXT: lsl x21, x8, #1
; CHECK0-SDAG-NEXT: ldrh w20, [x0, x21]
; CHECK0-SDAG-NEXT: bl foo
; CHECK0-SDAG-NEXT: mov w0, w20
@@ -230,9 +231,10 @@ define i16 @multi_use_half_word(ptr %ctx, i32 %xor72) {
; CHECK0-SDAG-NEXT: .cfi_offset w21, -24
; CHECK0-SDAG-NEXT: .cfi_offset w22, -32
; CHECK0-SDAG-NEXT: .cfi_offset w30, -48
-; CHECK0-SDAG-NEXT: lsr w8, w1, #9
+; CHECK0-SDAG-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK0-SDAG-NEXT: ubfx x8, x1, #9, #8
; CHECK0-SDAG-NEXT: mov x19, x0
-; CHECK0-SDAG-NEXT: ubfiz x21, x8, #1, #8
+; CHECK0-SDAG-NEXT: lsl x21, x8, #1
; CHECK0-SDAG-NEXT: ldrh w20, [x0, x21]
; CHECK0-SDAG-NEXT: add w22, w20, #1
; CHECK0-SDAG-NEXT: bl foo
diff --git a/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll b/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll
index 66a6745cda8f76..1fffcdda4b4167 100644
--- a/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll
+++ b/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll
@@ -190,8 +190,7 @@ define i8 @test_i8_224_mask_ashr_6(i8 %a0) {
define i8 @test_i8_7_mask_shl_1(i8 %a0) {
; CHECK-LABEL: test_i8_7_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7
-; CHECK-NEXT: lsl w0, w8, #1
+; CHECK-NEXT: ubfiz w0, w0, #1, #3
; CHECK-NEXT: ret
%t0 = and i8 %a0, 7
%t1 = shl i8 %t0, 1
@@ -200,8 +199,7 @@ define i8 @test_i8_7_mask_shl_1(i8 %a0) {
define i8 @test_i8_7_mask_shl_4(i8 %a0) {
; CHECK-LABEL: test_i8_7_mask_shl_4:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7
-; CHECK-NEXT: lsl w0, w8, #4
+; CHECK-NEXT: ubfiz w0, w0, #4, #3
; CHECK-NEXT: ret
%t0 = and i8 %a0, 7
%t1 = shl i8 %t0, 4
@@ -229,8 +227,8 @@ define i8 @test_i8_7_mask_shl_6(i8 %a0) {
define i8 @test_i8_28_mask_shl_1(i8 %a0) {
; CHECK-LABEL: test_i8_28_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x1c
-; CHECK-NEXT: lsl w0, w8, #1
+; CHECK-NEXT: lsl w8, w0, #1
+; CHECK-NEXT: and w0, w8, #0x38
; CHECK-NEXT: ret
%t0 = and i8 %a0, 28
%t1 = shl i8 %t0, 1
@@ -239,8 +237,8 @@ define i8 @test_i8_28_mask_shl_1(i8 %a0) {
define i8 @test_i8_28_mask_shl_2(i8 %a0) {
; CHECK-LABEL: test_i8_28_mask_shl_2:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x1c
-; CHECK-NEXT: lsl w0, w8, #2
+; CHECK-NEXT: lsl w8, w0, #2
+; CHECK-NEXT: and w0, w8, #0x70
; CHECK-NEXT: ret
%t0 = and i8 %a0, 28
%t1 = shl i8 %t0, 2
@@ -249,8 +247,8 @@ define i8 @test_i8_28_mask_shl_2(i8 %a0) {
define i8 @test_i8_28_mask_shl_3(i8 %a0) {
; CHECK-LABEL: test_i8_28_mask_shl_3:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x1c
-; CHECK-NEXT: lsl w0, w8, #3
+; CHECK-NEXT: lsl w8, w0, #3
+; CHECK-NEXT: and w0, w8, #0xe0
; CHECK-NEXT: ret
%t0 = and i8 %a0, 28
%t1 = shl i8 %t0, 3
@@ -259,8 +257,8 @@ define i8 @test_i8_28_mask_shl_3(i8 %a0) {
define i8 @test_i8_28_mask_shl_4(i8 %a0) {
; CHECK-LABEL: test_i8_28_mask_shl_4:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xc
-; CHECK-NEXT: lsl w0, w8, #4
+; CHECK-NEXT: lsl w8, w0, #4
+; CHECK-NEXT: and w0, w8, #0xc0
; CHECK-NEXT: ret
%t0 = and i8 %a0, 28
%t1 = shl i8 %t0, 4
@@ -270,8 +268,8 @@ define i8 @test_i8_28_mask_shl_4(i8 %a0) {
define i8 @test_i8_224_mask_shl_1(i8 %a0) {
; CHECK-LABEL: test_i8_224_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x60
-; CHECK-NEXT: lsl w0, w8, #1
+; CHECK-NEXT: lsl w8, w0, #1
+; CHECK-NEXT: and w0, w8, #0xc0
; CHECK-NEXT: ret
%t0 = and i8 %a0, 224
%t1 = shl i8 %t0, 1
@@ -465,8 +463,7 @@ define i16 @test_i16_65024_mask_ashr_10(i16 %a0) {
define i16 @test_i16_127_mask_shl_1(i16 %a0) {
; CHECK-LABEL: test_i16_127_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7f
-; CHECK-NEXT: lsl w0, w8, #1
+; CHECK-NEXT: ubfiz w0, w0, #1, #7
; CHECK-NEXT: ret
%t0 = and i16 %a0, 127
%t1 = shl i16 %t0, 1
@@ -475,8 +472,7 @@ define i16 @test_i16_127_mask_shl_1(i16 %a0) {
define i16 @test_i16_127_mask_shl_8(i16 %a0) {
; CHECK-LABEL: test_i16_127_mask_shl_8:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7f
-; CHECK-NEXT: lsl w0, w8, #8
+; CHECK-NEXT: ubfiz w0, w0, #8, #7
; CHECK-NEXT: ret
%t0 = and i16 %a0, 127
%t1 = shl i16 %t0, 8
@@ -504,8 +500,8 @@ define i16 @test_i16_127_mask_shl_10(i16 %a0) {
define i16 @test_i16_2032_mask_shl_3(i16 %a0) {
; CHECK-LABEL: test_i16_2032_mask_shl_3:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7f0
-; CHECK-NEXT: lsl w0, w8, #3
+; CHECK-NEXT: lsl w8, w0, #3
+; CHECK-NEXT: and w0, w8, #0x3f80
; CHECK-NEXT: ret
%t0 = and i16 %a0, 2032
%t1 = shl i16 %t0, 3
@@ -514,8 +510,8 @@ define i16 @test_i16_2032_mask_shl_3(i16 %a0) {
define i16 @test_i16_2032_mask_shl_4(i16 %a0) {
; CHECK-LABEL: test_i16_2032_mask_shl_4:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7f0
-; CHECK-NEXT: lsl w0, w8, #4
+; CHECK-NEXT: lsl w8, w0, #4
+; CHECK-NEXT: and w0, w8, #0x7f00
; CHECK-NEXT: ret
%t0 = and i16 %a0, 2032
%t1 = shl i16 %t0, 4
@@ -524,8 +520,8 @@ define i16 @test_i16_2032_mask_shl_4(i16 %a0) {
define i16 @test_i16_2032_mask_shl_5(i16 %a0) {
; CHECK-LABEL: test_i16_2032_mask_shl_5:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7f0
-; CHECK-NEXT: lsl w0, w8, #5
+; CHECK-NEXT: lsl w8, w0, #5
+; CHECK-NEXT: and w0, w8, #0xfe00
; CHECK-NEXT: ret
%t0 = and i16 %a0, 2032
%t1 = shl i16 %t0, 5
@@ -534,8 +530,8 @@ define i16 @test_i16_2032_mask_shl_5(i16 %a0) {
define i16 @test_i16_2032_mask_shl_6(i16 %a0) {
; CHECK-LABEL: test_i16_2032_mask_shl_6:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x3f0
-; CHECK-NEXT: lsl w0, w8, #6
+; CHECK-NEXT: lsl w8, w0, #6
+; CHECK-NEXT: and w0, w8, #0xfc00
; CHECK-NEXT: ret
%t0 = and i16 %a0, 2032
%t1 = shl i16 %t0, 6
@@ -545,8 +541,8 @@ define i16 @test_i16_2032_mask_shl_6(i16 %a0) {
define i16 @test_i16_65024_mask_shl_1(i16 %a0) {
; CHECK-LABEL: test_i16_65024_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7e00
-; CHECK-NEXT: lsl w0, w8, #1
+; CHECK-NEXT: lsl w8, w0, #1
+; CHECK-NEXT: and w0, w8, #0xfc00
; CHECK-NEXT: ret
%t0 = and i16 %a0, 65024
%t1 = shl i16 %t0, 1
@@ -740,8 +736,7 @@ define i32 @test_i32_4294836224_mask_ashr_18(i32 %a0) {
define i32 @test_i32_32767_mask_shl_1(i32 %a0) {
; CHECK-LABEL: test_i32_32767_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7fff
-; CHECK-NEXT: lsl w0, w8, #1
+; CHECK-NEXT: ubfiz w0, w0, #1, #15
; CHECK-NEXT: ret
%t0 = and i32 %a0, 32767
%t1 = shl i32 %t0, 1
@@ -750,8 +745,7 @@ define i32 @test_i32_32767_mask_shl_1(i32 %a0) {
define i32 @test_i32_32767_mask_shl_16(i32 %a0) {
; CHECK-LABEL: test_i32_32767_mask_shl_16:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7fff
-; CHECK-NEXT: lsl w0, w8, #16
+; CHECK-NEXT: ubfiz w0, w0, #16, #15
; CHECK-NEXT: ret
%t0 = and i32 %a0, 32767
%t1 = shl i32 %t0, 16
@@ -779,8 +773,8 @@ define i32 @test_i32_32767_mask_shl_18(i32 %a0) {
define i32 @test_i32_8388352_mask_shl_7(i32 %a0) {
; CHECK-LABEL: test_i32_8388352_mask_shl_7:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7fff00
-; CHECK-NEXT: lsl w0, w8, #7
+; CHECK-NEXT: lsl w8, w0, #7
+; CHECK-NEXT: and w0, w8, #0x3fff8000
; CHECK-NEXT: ret
%t0 = and i32 %a0, 8388352
%t1 = shl i32 %t0, 7
@@ -789,8 +783,8 @@ define i32 @test_i32_8388352_mask_shl_7(i32 %a0) {
define i32 @test_i32_8388352_mask_shl_8(i32 %a0) {
; CHECK-LABEL: test_i32_8388352_mask_shl_8:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7fff00
-; CHECK-NEXT: lsl w0, w8, #8
+; CHECK-NEXT: lsl w8, w0, #8
+; CHECK-NEXT: and w0, w8, #0x7fff0000
; CHECK-NEXT: ret
%t0 = and i32 %a0, 8388352
%t1 = shl i32 %t0, 8
@@ -799,8 +793,8 @@ define i32 @test_i32_8388352_mask_shl_8(i32 %a0) {
define i32 @test_i32_8388352_mask_shl_9(i32 %a0) {
; CHECK-LABEL: test_i32_8388352_mask_shl_9:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7fff00
-; CHECK-NEXT: lsl w0, w8, #9
+; CHECK-NEXT: lsl w8, w0, #9
+; CHECK-NEXT: and w0, w8, #0xfffe0000
; CHECK-NEXT: ret
%t0 = and i32 %a0, 8388352
%t1 = shl i32 %t0, 9
@@ -809,8 +803,8 @@ define i32 @test_i32_8388352_mask_shl_9(i32 %a0) {
define i32 @test_i32_8388352_mask_shl_10(i32 %a0) {
; CHECK-LABEL: test_i32_8388352_mask_shl_10:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x3fff00
-; CHECK-NEXT: lsl w0, w8, #10
+; CHECK-NEXT: lsl w8, w0, #10
+; CHECK-NEXT: and w0, w8, #0xfffc0000
; CHECK-NEXT: ret
%t0 = and i32 %a0, 8388352
%t1 = shl i32 %t0, 10
@@ -820,8 +814,8 @@ define i32 @test_i32_8388352_mask_shl_10(i32 %a0) {
define i32 @test_i32_4294836224_mask_shl_1(i32 %a0) {
; CHECK-LABEL: test_i32_4294836224_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7ffe0000
-; CHECK-NEXT: lsl w0, w8, #1
+; CHECK-NEXT: lsl w8, w0, #1
+; CHECK-NEXT: and w0, w8, #0xfffc0000
; CHECK-NEXT: ret
%t0 = and i32 %a0, 4294836224
%t1 = shl i32 %t0, 1
@@ -1015,8 +1009,7 @@ define i64 @test_i64_18446744065119617024_mask_ashr_34(i64 %a0) {
define i64 @test_i64_2147483647_mask_shl_1(i64 %a0) {
; CHECK-LABEL: test_i64_2147483647_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and x8, x0, #0x7fffffff
-; CHECK-NEXT: lsl x0, x8, #1
+; CHECK-NEXT: lsl w0, w0, #1
; CHECK-NEXT: ret
%t0 = and i64 %a0, 2147483647
%t1 = shl i64 %t0, 1
@@ -1054,8 +1047,8 @@ define i64 @test_i64_2147483647_mask_shl_34(i64 %a0) {
define i64 @test_i64_140737488289792_mask_shl_15(i64 %a0) {
; CHECK-LABEL: test_i64_140737488289792_mask_shl_15:
; CHECK: // %bb.0:
-; CHECK-NEXT: and x8, x0, #0x7fffffff0000
-; CHECK-NEXT: lsl x0, x8, #15
+; CHECK-NEXT: lsl x8, x0, #15
+; CHECK-NEXT: and x0, x8, #0x3fffffff80000000
; CHECK-NEXT: ret
%t0 = and i64 %a0, 140737488289792
%t1 = shl i64 %t0, 15
@@ -1064,8 +1057,8 @@ define i64 @test_i64_140737488289792_mask_shl_15(i64 %a0) {
define i64 @test_i64_140737488289792_mask_shl_16(i64 %a0) {
; CHECK-LABEL: test_i64_140737488289792_mask_shl_16:
; CHECK: // %bb.0:
-; CHECK-NEXT: and x8, x0, #0x7fffffff0000
-; CHECK-NEXT: lsl x0, x8, #16
+; CHECK-NEXT: lsl x8, x0, #16
+; CHECK-NEXT: and x0, x8, #0x7fffffff00000000
; CHECK-NEXT: ret
%t0 = and i64 %a0, 140737488289792
%t1 = shl i64 %t0, 16
@@ -1074,8 +1067,8 @@ define i64 @test_i64_140737488289792_mask_shl_16(i64 %a0) {
define i64 @test_i64_140737488289792_mask_shl_17(i64 %a0) {
; CHECK-LABEL: test_i64_140737488289792_mask_shl_17:
; CHECK: // %bb.0:
-; CHECK-NEXT: and x8, x0, #0x7fffffff0000
-; CHECK-NEXT: lsl x0, x8, #17
+; CHECK-NEXT: lsl x8, x0, #17
+; CHECK-NEXT: and x0, x8, #0xfffffffe00000000
; CHECK-NEXT: ret
%t0 = and i64 %a0, 140737488289792
%t1 = shl i64 %t0, 17
@@ -1084,8 +1077,8 @@ define i64 @test_i64_140737488289792_mask_shl_17(i64 %a0) {
define i64 @test_i64_140737488289792_mask_shl_18(i64 %a0) {
; CHECK-LABEL: test_i64_140737488289792_mask_shl_18:
; CHECK: // %bb.0:
-; CHECK-NEXT: and x8, x0, #0x3fffffff0000
-; CHECK-NEXT: lsl x0, x8, #18
+; CHECK-NEXT: lsl x8, x0, #18
+; CHECK-NEXT: and x0, x8, #0xfffffffc00000000
; CHECK-NEXT: ret
%t0 = and i64 %a0, 140737488289792
%t1 = shl i64 %t0, 18
@@ -1095,8 +1088,8 @@ define i64 @test_i64_140737488289792_mask_shl_18(i64 %a0) {
define i64 @test_i64_18446744065119617024_mask_shl_1(i64 %a0) {
; CHECK-LABEL: test_i64_18446744065119617024_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and x8, x0, #0x7ffffffe00000000
-; CHECK-NEXT: lsl x0, x8, #1
+; CHECK-NEXT: lsl x8, x0, #1
+; CHECK-NEXT: and x0, x8, #0xfffffffc00000000
; CHECK-NEXT: ret
%t0 = and i64 %a0, 18446744065119617024
%t1 = shl i64 %t0, 1
diff --git a/llvm/test/CodeGen/AArch64/extract-bits.ll b/llvm/test/CodeGen/AArch64/extract-bits.ll
index b87157a183835d..aaa6c7eb4a30f4 100644
--- a/llvm/test/CodeGen/AArch64/extract-bits.ll
+++ b/llvm/test/CodeGen/AArch64/extract-bits.ll
@@ -1013,8 +1013,8 @@ define i32 @c1_i32(i32 %arg) nounwind {
define i32 @c2_i32(i32 %arg) nounwind {
; CHECK-LABEL: c2_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ubfx w8, w0, #19, #10
-; CHECK-NEXT: lsl w0, w8, #2
+; CHECK-NEXT: lsr w8, w0, #17
+; CHECK-NEXT: and w0, w8, #0xffc
; CHECK-NEXT: ret
%tmp0 = lshr i32 %arg, 19
%tmp1 = and i32 %tmp0, 1023
@@ -1063,8 +1063,8 @@ define i64 @c1_i64(i64 %arg) nounwind {
define i64 @c2_i64(i64 %arg) nounwind {
; CHECK-LABEL: c2_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ubfx x8, x0, #51, #10
-; CHECK-NEXT: lsl x0, x8, #2
+; CHECK-NEXT: lsr x8, x0, #49
+; CHECK-NEXT: and x0, x8, #0xffc
; CHECK-NEXT: ret
%tmp0 = lshr i64 %arg, 51
%tmp1 = and i64 %tmp0, 1023
@@ -1120,8 +1120,8 @@ define void @c6_i32(i32 %arg, ptr %ptr) nounwind {
define void @c7_i32(i32 %arg, ptr %ptr) nounwind {
; CHECK-LABEL: c7_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ubfx w8, w0, #19, #10
-; CHECK-NEXT: lsl w8, w8, #2
+; CHECK-NEXT: lsr w8, w0, #17
+; CHECK-NEXT: and w8, w8, #0xffc
; CHECK-NEXT: str w8, [x1]
; CHECK-NEXT: ret
%tmp0 = lshr i32 %arg, 19
@@ -1163,8 +1163,8 @@ define void @c6_i64(i64 %arg, ptr %ptr) nounwind {
define void @c7_i64(i64 %arg, ptr %ptr) nounwind {
; CHECK-LABEL: c7_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ubfx x8, x0, #51, #10
-; CHECK-NEXT: lsl x8, x8, #2
+; CHECK-NEXT: lsr x8, x0, #49
+; CHECK-NEXT: and x8, x8, #0xffc
; CHECK-NEXT: str x8, [x1]
; CHECK-NEXT: ret
%tmp0 = lshr i64 %arg, 51
diff --git a/llvm/test/CodeGen/AArch64/fpenv.ll b/llvm/test/CodeGen/AArch64/fpenv.ll
index 3a307f7731037a..3351565d8dd89d 100644
--- a/llvm/test/CodeGen/AArch64/fpenv.ll
+++ b/llvm/test/CodeGen/AArch64/fpenv.ll
@@ -4,11 +4,11 @@
define void @func_set_rounding_dyn(i32 %rm) {
; CHECK-LABEL: func_set_rounding_dyn:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w9, w0, #1
+; CHECK-NEXT: lsl w9, w0, #22
; CHECK-NEXT: mrs x8, FPCR
-; CHECK-NEXT: and w9, w9, #0x3
; CHECK-NEXT: and x8, x8, #0xffffffffff3fffff
-; CHECK-NEXT: lsl w9, w9, #22
+; CHECK-NEXT: sub w9, w9, #1024, lsl #12 // =4194304
+; CHECK-NEXT: and w9, w9, #0xc00000
; CHECK-NEXT: orr x8, x8, x9
; CHECK-NEXT: msr FPCR, x8
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/swap-compare-operands.ll b/llvm/test/CodeGen/AArch64/swap-compare-operands.ll
index b106e15c23e30a..a45881f2034b21 100644
--- a/llvm/test/CodeGen/AArch64/swap-compare-operands.ll
+++ b/llvm/test/CodeGen/AArch64/swap-compare-operands.ll
@@ -133,8 +133,9 @@ entry:
define i1 @testSwapCmpWithShiftedZeroExtend16_64(i16 %a, i64 %b) {
; CHECK-LABEL: testSwapCmpWithShiftedZeroExtend16_64
-; CHECK: cmp x1, w0, uxth #2
-; CHECK-NEXT: cset w0, lo
+; CHECK: ubfiz x8, x0, #2, #16
+; CHECK: cmp x8, x1
+; CHECK-NEXT: cset w0, hi
entry:
%a64 = zext i16 %a to i64
%shl.0 = shl i64 %a64, 2
@@ -144,8 +145,9 @@ entry:
define i1 @testSwapCmpWithShiftedZeroExtend8_64(i8 %a, i64 %b) {
; CHECK-LABEL: testSwapCmpWithShiftedZeroExtend8_64
-; CHECK: cmp x1, w0, uxtb #4
-; CHECK-NEXT: cset w0, lo
+; CHECK: ubfiz x8, x0, #4, #8
+; CHECK: cmp x8, x1
+; CHECK-NEXT: cset w0, hi
entry:
%a64 = zext i8 %a to i64
%shl.2 = shl i64 %a64, 4
@@ -155,8 +157,9 @@ entry:
define i1 @testSwapCmpWithShiftedZeroExtend16_32(i16 %a, i32 %b) {
; CHECK-LABEL: testSwapCmpWithShiftedZeroExtend16_32
-; CHECK: cmp w1, w0, uxth #3
-; CHECK-NEXT: cset w0, lo
+; CHECK: ubfiz w8, w0, #3, #16
+; CHECK: cmp w8, w1
+; CHECK-NEXT: cset w0, hi
entry:
%a32 = zext i16 %a to i32
%shl = shl i32 %a32, 3
@@ -166,8 +169,9 @@ entry:
define i1 @testSwapCmpWithShiftedZeroExtend8_32(i8 %a, i32 %b) {
; CHECK-LABEL: testSwapCmpWithShiftedZeroExtend8_32
-; CHECK: cmp w1, w0, uxtb #4
-; CHECK-NEXT: cset w0, lo
+; CHECK: ubfiz w8, w0, #4, #8
+; CHECK: cmp w8, w1
+; CHECK-NEXT: cset w0, hi
entry:
%a32 = zext i8 %a to i32
%shl = shl i32 %a32, 4
@@ -177,9 +181,9 @@ entry:
define i1 @testSwapCmpWithTooLargeShiftedZeroExtend8_32(i8 %a, i32 %b) {
; CHECK-LABEL: testSwapCmpWithTooLargeShiftedZeroExtend8_32
-; CHECK: and [[REG:w[0-9]+]], w0, #0xff
-; CHECK: cmp w1, [[REG]], lsl #5
-; CHECK-NEXT: cset w0, lo
+; CHECK: ubfiz w8, w0, #5, #8
+; CHECK: cmp w8, w1
+; CHECK-NEXT: cset w0, hi
entry:
%a32 = zext i8 %a to i32
%shl = shl i32 %a32, 5
@@ -517,7 +521,8 @@ t1:
%shl1 = shl i64 %conv1, 4
%na1 = sub i64 0, %shl1
%cmp1 = icmp ne i64 %na1, %b64
-; CHECK: cmn x3, w1, uxth #4
+; CHECK: ubfiz x8, x1, #4, #16
+; CHECK: cmn x3, x8
br i1 %cmp1, label %t2, label %end
t2:
@@ -525,7 +530,8 @@ t2:
%shl2 = shl i64 %conv2, 3
%na2 = sub i64 0, %shl2
%cmp2 = icmp ne i64 %na2, %b64
-; CHECK: cmn x3, w2, uxtb #3
+; CHECK: ubfiz x8, x2, #3, #8
+; CHECK: cmn x3, x8
br i1 %cmp2, label %t3, label %end
t3:
@@ -533,7 +539,8 @@ t3:
%shl3 = shl i32 %conv3, 2
%na3 = sub i32 0, %shl3
%cmp3 = icmp ne i32 %na3, %b32
-; CHECK: cmn w4, w1, uxth #2
+; CHECK: ubfiz w8, w1, #2, #16
+; CHECK: cmn w4, w8
br i1 %cmp3, label %t4, label %end
t4:
@@ -541,7 +548,8 @@ t4:
%shl4 = shl i32 %conv4, 1
%na4 = sub i32 0, %shl4
%cmp4 = icmp ne i32 %na4, %b32
-; CHECK: cmn w4, w2, uxtb #1
+; CHECK: ubfiz w8, w2, #1, #8
+; CHECK: cmn w4, w8
br i1 %cmp4, label %t5, label %end
t5:
@@ -549,8 +557,8 @@ t5:
%shl5 = shl i32 %conv5, 5
%na5 = sub i32 0, %shl5
%cmp5 = icmp ne i32 %na5, %b32
-; CHECK: and [[REG:w[0-9]+]], w2, #0xff
-; CHECK: cmn w4, [[REG]], lsl #5
+; CHECK: ubfiz w8, w2, #5, #8
+; CHECK: cmn w4, w8
br i1 %cmp5, label %t6, label %end
t6:
>From 69be8118952f76baf2accd695aee14121ab67b59 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes at arm.com>
Date: Fri, 13 Dec 2024 14:44:37 +0000
Subject: [PATCH 3/6] Exclude more uses of SHL that might be combined
---
.../Target/AArch64/AArch64ISelLowering.cpp | 8 ++--
.../CodeGen/AArch64/swap-compare-operands.ll | 42 ++++++++-----------
2 files changed, 22 insertions(+), 28 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 5c6b04d637b5c4..0cae5a536b6f39 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -26386,9 +26386,11 @@ static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG) {
if (!C1 || !C2)
return SDValue();
- // Might be folded into shifted add/sub, do not lower.
- if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ADD ||
- N->use_begin()->getOpcode() == ISD::SUB))
+ // Might be folded into shifted op, do not lower.
+ unsigned UseOpc = N->use_begin()->getOpcode();
+ if (N->hasOneUse() &&
+ (UseOpc == ISD::ADD || UseOpc == ISD::SUB || UseOpc == ISD::SETCC ||
+ UseOpc == AArch64ISD::ADDS || UseOpc == AArch64ISD::SUBS))
return SDValue();
SDLoc DL(N);
diff --git a/llvm/test/CodeGen/AArch64/swap-compare-operands.ll b/llvm/test/CodeGen/AArch64/swap-compare-operands.ll
index a45881f2034b21..b106e15c23e30a 100644
--- a/llvm/test/CodeGen/AArch64/swap-compare-operands.ll
+++ b/llvm/test/CodeGen/AArch64/swap-compare-operands.ll
@@ -133,9 +133,8 @@ entry:
define i1 @testSwapCmpWithShiftedZeroExtend16_64(i16 %a, i64 %b) {
; CHECK-LABEL: testSwapCmpWithShiftedZeroExtend16_64
-; CHECK: ubfiz x8, x0, #2, #16
-; CHECK: cmp x8, x1
-; CHECK-NEXT: cset w0, hi
+; CHECK: cmp x1, w0, uxth #2
+; CHECK-NEXT: cset w0, lo
entry:
%a64 = zext i16 %a to i64
%shl.0 = shl i64 %a64, 2
@@ -145,9 +144,8 @@ entry:
define i1 @testSwapCmpWithShiftedZeroExtend8_64(i8 %a, i64 %b) {
; CHECK-LABEL: testSwapCmpWithShiftedZeroExtend8_64
-; CHECK: ubfiz x8, x0, #4, #8
-; CHECK: cmp x8, x1
-; CHECK-NEXT: cset w0, hi
+; CHECK: cmp x1, w0, uxtb #4
+; CHECK-NEXT: cset w0, lo
entry:
%a64 = zext i8 %a to i64
%shl.2 = shl i64 %a64, 4
@@ -157,9 +155,8 @@ entry:
define i1 @testSwapCmpWithShiftedZeroExtend16_32(i16 %a, i32 %b) {
; CHECK-LABEL: testSwapCmpWithShiftedZeroExtend16_32
-; CHECK: ubfiz w8, w0, #3, #16
-; CHECK: cmp w8, w1
-; CHECK-NEXT: cset w0, hi
+; CHECK: cmp w1, w0, uxth #3
+; CHECK-NEXT: cset w0, lo
entry:
%a32 = zext i16 %a to i32
%shl = shl i32 %a32, 3
@@ -169,9 +166,8 @@ entry:
define i1 @testSwapCmpWithShiftedZeroExtend8_32(i8 %a, i32 %b) {
; CHECK-LABEL: testSwapCmpWithShiftedZeroExtend8_32
-; CHECK: ubfiz w8, w0, #4, #8
-; CHECK: cmp w8, w1
-; CHECK-NEXT: cset w0, hi
+; CHECK: cmp w1, w0, uxtb #4
+; CHECK-NEXT: cset w0, lo
entry:
%a32 = zext i8 %a to i32
%shl = shl i32 %a32, 4
@@ -181,9 +177,9 @@ entry:
define i1 @testSwapCmpWithTooLargeShiftedZeroExtend8_32(i8 %a, i32 %b) {
; CHECK-LABEL: testSwapCmpWithTooLargeShiftedZeroExtend8_32
-; CHECK: ubfiz w8, w0, #5, #8
-; CHECK: cmp w8, w1
-; CHECK-NEXT: cset w0, hi
+; CHECK: and [[REG:w[0-9]+]], w0, #0xff
+; CHECK: cmp w1, [[REG]], lsl #5
+; CHECK-NEXT: cset w0, lo
entry:
%a32 = zext i8 %a to i32
%shl = shl i32 %a32, 5
@@ -521,8 +517,7 @@ t1:
%shl1 = shl i64 %conv1, 4
%na1 = sub i64 0, %shl1
%cmp1 = icmp ne i64 %na1, %b64
-; CHECK: ubfiz x8, x1, #4, #16
-; CHECK: cmn x3, x8
+; CHECK: cmn x3, w1, uxth #4
br i1 %cmp1, label %t2, label %end
t2:
@@ -530,8 +525,7 @@ t2:
%shl2 = shl i64 %conv2, 3
%na2 = sub i64 0, %shl2
%cmp2 = icmp ne i64 %na2, %b64
-; CHECK: ubfiz x8, x2, #3, #8
-; CHECK: cmn x3, x8
+; CHECK: cmn x3, w2, uxtb #3
br i1 %cmp2, label %t3, label %end
t3:
@@ -539,8 +533,7 @@ t3:
%shl3 = shl i32 %conv3, 2
%na3 = sub i32 0, %shl3
%cmp3 = icmp ne i32 %na3, %b32
-; CHECK: ubfiz w8, w1, #2, #16
-; CHECK: cmn w4, w8
+; CHECK: cmn w4, w1, uxth #2
br i1 %cmp3, label %t4, label %end
t4:
@@ -548,8 +541,7 @@ t4:
%shl4 = shl i32 %conv4, 1
%na4 = sub i32 0, %shl4
%cmp4 = icmp ne i32 %na4, %b32
-; CHECK: ubfiz w8, w2, #1, #8
-; CHECK: cmn w4, w8
+; CHECK: cmn w4, w2, uxtb #1
br i1 %cmp4, label %t5, label %end
t5:
@@ -557,8 +549,8 @@ t5:
%shl5 = shl i32 %conv5, 5
%na5 = sub i32 0, %shl5
%cmp5 = icmp ne i32 %na5, %b32
-; CHECK: ubfiz w8, w2, #5, #8
-; CHECK: cmn w4, w8
+; CHECK: and [[REG:w[0-9]+]], w2, #0xff
+; CHECK: cmn w4, [[REG]], lsl #5
br i1 %cmp5, label %t6, label %end
t6:
>From fdbe823e14a1cf12a2358bcb141de5c469bc3b01 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes at arm.com>
Date: Fri, 13 Dec 2024 16:50:25 +0000
Subject: [PATCH 4/6] address comments
---
.../Target/AArch64/AArch64ISelLowering.cpp | 35 ++++++++++---------
1 file changed, 19 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0cae5a536b6f39..13871b149c2b91 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -26367,35 +26367,38 @@ performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
return NVCAST;
}
+/// If the operand is a bitwise AND with a constant RHS, and the shift has a
+/// constant RHS and is the only use, we can pull it out of the shift, i.e.
+///
+/// (shl (and X, C1), C2) -> (and (shl X, C2), (shl C1, C2))
+///
+/// We prefer this canonical form to match existing isel patterns.
static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG) {
- SDValue Op0 = N->getOperand(0);
- SDValue Op1 = N->getOperand(1);
EVT VT = N->getValueType(0);
if (VT != MVT::i32 && VT != MVT::i64)
return SDValue();
- // If the operand is a bitwise AND with a constant RHS, and the shift is the
- // only use, we can pull it out of the shift.
- //
- // (shl (and X, C1), C2) -> (and (shl X, C2), (shl C1, C2))
+ SDValue Op0 = N->getOperand(0);
if (!Op0.hasOneUse() || Op0.getOpcode() != ISD::AND)
return SDValue();
- ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
- ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(Op1);
- if (!C1 || !C2)
+ SDValue C1 = Op0->getOperand(1);
+ SDValue C2 = N->getOperand(1);
+ if (!isa<ConstantSDNode>(C1) || !isa<ConstantSDNode>(C2))
return SDValue();
// Might be folded into shifted op, do not lower.
- unsigned UseOpc = N->use_begin()->getOpcode();
- if (N->hasOneUse() &&
- (UseOpc == ISD::ADD || UseOpc == ISD::SUB || UseOpc == ISD::SETCC ||
- UseOpc == AArch64ISD::ADDS || UseOpc == AArch64ISD::SUBS))
- return SDValue();
+ if (N->hasOneUse()) {
+ unsigned UseOpc = N->use_begin()->getOpcode();
+ if (UseOpc == ISD::ADD || UseOpc == ISD::SUB || UseOpc == ISD::SETCC ||
+ UseOpc == AArch64ISD::ADDS || UseOpc == AArch64ISD::SUBS)
+ return SDValue();
+ }
SDLoc DL(N);
- SDValue NewRHS = DAG.getNode(ISD::SHL, DL, VT, Op0.getOperand(1), Op1);
- SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, Op0->getOperand(0), Op1);
+ SDValue X = Op0->getOperand(0);
+ SDValue NewRHS = DAG.getNode(ISD::SHL, DL, VT, C1, C2);
+ SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, X, C2);
return DAG.getNode(ISD::AND, DL, VT, NewShift, NewRHS);
}
>From f0cbfdda1a8a0c09904969767078fcf7c5c9a9e5 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes at arm.com>
Date: Wed, 8 Jan 2025 14:17:43 +0000
Subject: [PATCH 5/6] canonicalize after legalization
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 13871b149c2b91..fa13f8c6d513e6 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -26373,9 +26373,10 @@ performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
/// (shl (and X, C1), C2) -> (and (shl X, C2), (shl C1, C2))
///
/// We prefer this canonical form to match existing isel patterns.
-static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG) {
- EVT VT = N->getValueType(0);
- if (VT != MVT::i32 && VT != MVT::i64)
+static SDValue performSHLCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ if (DCI.isBeforeLegalizeOps())
return SDValue();
SDValue Op0 = N->getOperand(0);
@@ -26389,13 +26390,14 @@ static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG) {
// Might be folded into shifted op, do not lower.
if (N->hasOneUse()) {
- unsigned UseOpc = N->use_begin()->getOpcode();
+ unsigned UseOpc = N->user_begin()->getOpcode();
if (UseOpc == ISD::ADD || UseOpc == ISD::SUB || UseOpc == ISD::SETCC ||
UseOpc == AArch64ISD::ADDS || UseOpc == AArch64ISD::SUBS)
return SDValue();
}
SDLoc DL(N);
+ EVT VT = N->getValueType(0);
SDValue X = Op0->getOperand(0);
SDValue NewRHS = DAG.getNode(ISD::SHL, DL, VT, C1, C2);
SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, X, C2);
@@ -26748,7 +26750,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SCALAR_TO_VECTOR:
return performScalarToVectorCombine(N, DCI, DAG);
case ISD::SHL:
- return performSHLCombine(N, DAG);
+ return performSHLCombine(N, DCI, DAG);
}
return SDValue();
}
>From ed9338a48b28a7e9cd8040354473be0c3ec7d675 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes at arm.com>
Date: Wed, 8 Jan 2025 14:56:37 +0000
Subject: [PATCH 6/6] address comments
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index fa13f8c6d513e6..23671c9ffcf199 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -26380,7 +26380,7 @@ static SDValue performSHLCombine(SDNode *N,
return SDValue();
SDValue Op0 = N->getOperand(0);
- if (!Op0.hasOneUse() || Op0.getOpcode() != ISD::AND)
+ if (Op0.getOpcode() != ISD::AND || !Op0.hasOneUse())
return SDValue();
SDValue C1 = Op0->getOperand(1);
More information about the llvm-commits
mailing list