[llvm] Reapply "[AArch64] Combine and and lsl into ubfiz" (#123356) (PR #124576)
Cullen Rhodes via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 27 08:12:28 PST 2025
https://github.com/c-rhodes updated https://github.com/llvm/llvm-project/pull/124576
>From 9f292d73cc11993499347eefce99bf2e02741faf Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes at arm.com>
Date: Mon, 20 Jan 2025 11:54:32 +0000
Subject: [PATCH 1/3] Reapply "[AArch64] Combine and and lsl into ubfiz"
(#123356)
Patch was reverted due to test case (added) exposing an infinite loop in
combiner, where (shl C1, C2) create by performSHLCombine isn't
constant-folded:
Combining: t14: i64 = shl t12, Constant:i64<1>
Creating new node: t36: i64 = shl OpaqueConstant:i64<-2401053089408754003>, Constant:i64<1>
Creating new node: t37: i64 = shl t6, Constant:i64<1>
Creating new node: t38: i64 = and t37, t36
... into: t38: i64 = and t37, t36
...
Combining: t38: i64 = and t37, t36
Creating new node: t39: i64 = and t6, OpaqueConstant:i64<-2401053089408754003>
Creating new node: t40: i64 = shl t39, Constant:i64<1>
... into: t40: i64 = shl t39, Constant:i64<1>
and subsequently gets simplified by DAGCombiner::visitAND:
// Simplify: (and (op x...), (op y...)) -> (op (and x, y))
if (N0.getOpcode() == N1.getOpcode())
if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
return V;
before being folded by performSHLCombine once again and so on.
The combine in performSHLCombine should only be done if (shl C1, C2) can
be constant-folded, it may otherwise be unsafe and generally have a
worse end result. Thanks to Dave Sherwood for his insight on this one.
This reverts commit f719771f251d7c30eca448133fe85730f19a6bd1.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 41 +++++++
.../AArch64/const-shift-of-constmasked.ll | 101 ++++++++----------
llvm/test/CodeGen/AArch64/extract-bits.ll | 16 +--
llvm/test/CodeGen/AArch64/fpenv.ll | 6 +-
llvm/test/CodeGen/AArch64/xbfiz.ll | 16 +++
5 files changed, 115 insertions(+), 65 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4ede1fb93fe5f2..f209c186c05425 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1140,6 +1140,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
+ setTargetDAGCombine(ISD::SHL);
+
// In case of strict alignment, avoid an excessive number of byte wide stores.
MaxStoresPerMemsetOptSize = 8;
MaxStoresPerMemset =
@@ -26339,6 +26341,43 @@ performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
return NVCAST;
}
+/// If the operand is a bitwise AND with a constant RHS, and the shift has a
+/// constant RHS and is the only use, we can pull it out of the shift, i.e.
+///
+/// (shl (and X, C1), C2) -> (and (shl X, C2), (shl C1, C2))
+///
+/// We prefer this canonical form to match existing isel patterns.
+static SDValue performSHLCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue Op0 = N->getOperand(0);
+ if (Op0.getOpcode() != ISD::AND || !Op0.hasOneUse())
+ return SDValue();
+
+ SDValue C1 = Op0->getOperand(1);
+ SDValue C2 = N->getOperand(1);
+ if (!isa<ConstantSDNode>(C1) || !isa<ConstantSDNode>(C2))
+ return SDValue();
+
+ // Might be folded into shifted op, do not lower.
+ if (N->hasOneUse()) {
+ unsigned UseOpc = N->user_begin()->getOpcode();
+ if (UseOpc == ISD::ADD || UseOpc == ISD::SUB || UseOpc == ISD::SETCC ||
+ UseOpc == AArch64ISD::ADDS || UseOpc == AArch64ISD::SUBS)
+ return SDValue();
+ }
+
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SDValue X = Op0->getOperand(0);
+ SDValue NewRHS = DAG.getNode(ISD::SHL, DL, VT, C1, C2);
+ SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, X, C2);
+ return DAG.getNode(ISD::AND, DL, VT, NewShift, NewRHS);
+}
+
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -26684,6 +26723,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performCTLZCombine(N, DAG, Subtarget);
case ISD::SCALAR_TO_VECTOR:
return performScalarToVectorCombine(N, DCI, DAG);
+ case ISD::SHL:
+ return performSHLCombine(N, DCI, DAG);
}
return SDValue();
}
diff --git a/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll b/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll
index 66a6745cda8f76..1fffcdda4b4167 100644
--- a/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll
+++ b/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll
@@ -190,8 +190,7 @@ define i8 @test_i8_224_mask_ashr_6(i8 %a0) {
define i8 @test_i8_7_mask_shl_1(i8 %a0) {
; CHECK-LABEL: test_i8_7_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7
-; CHECK-NEXT: lsl w0, w8, #1
+; CHECK-NEXT: ubfiz w0, w0, #1, #3
; CHECK-NEXT: ret
%t0 = and i8 %a0, 7
%t1 = shl i8 %t0, 1
@@ -200,8 +199,7 @@ define i8 @test_i8_7_mask_shl_1(i8 %a0) {
define i8 @test_i8_7_mask_shl_4(i8 %a0) {
; CHECK-LABEL: test_i8_7_mask_shl_4:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7
-; CHECK-NEXT: lsl w0, w8, #4
+; CHECK-NEXT: ubfiz w0, w0, #4, #3
; CHECK-NEXT: ret
%t0 = and i8 %a0, 7
%t1 = shl i8 %t0, 4
@@ -229,8 +227,8 @@ define i8 @test_i8_7_mask_shl_6(i8 %a0) {
define i8 @test_i8_28_mask_shl_1(i8 %a0) {
; CHECK-LABEL: test_i8_28_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x1c
-; CHECK-NEXT: lsl w0, w8, #1
+; CHECK-NEXT: lsl w8, w0, #1
+; CHECK-NEXT: and w0, w8, #0x38
; CHECK-NEXT: ret
%t0 = and i8 %a0, 28
%t1 = shl i8 %t0, 1
@@ -239,8 +237,8 @@ define i8 @test_i8_28_mask_shl_1(i8 %a0) {
define i8 @test_i8_28_mask_shl_2(i8 %a0) {
; CHECK-LABEL: test_i8_28_mask_shl_2:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x1c
-; CHECK-NEXT: lsl w0, w8, #2
+; CHECK-NEXT: lsl w8, w0, #2
+; CHECK-NEXT: and w0, w8, #0x70
; CHECK-NEXT: ret
%t0 = and i8 %a0, 28
%t1 = shl i8 %t0, 2
@@ -249,8 +247,8 @@ define i8 @test_i8_28_mask_shl_2(i8 %a0) {
define i8 @test_i8_28_mask_shl_3(i8 %a0) {
; CHECK-LABEL: test_i8_28_mask_shl_3:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x1c
-; CHECK-NEXT: lsl w0, w8, #3
+; CHECK-NEXT: lsl w8, w0, #3
+; CHECK-NEXT: and w0, w8, #0xe0
; CHECK-NEXT: ret
%t0 = and i8 %a0, 28
%t1 = shl i8 %t0, 3
@@ -259,8 +257,8 @@ define i8 @test_i8_28_mask_shl_3(i8 %a0) {
define i8 @test_i8_28_mask_shl_4(i8 %a0) {
; CHECK-LABEL: test_i8_28_mask_shl_4:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xc
-; CHECK-NEXT: lsl w0, w8, #4
+; CHECK-NEXT: lsl w8, w0, #4
+; CHECK-NEXT: and w0, w8, #0xc0
; CHECK-NEXT: ret
%t0 = and i8 %a0, 28
%t1 = shl i8 %t0, 4
@@ -270,8 +268,8 @@ define i8 @test_i8_28_mask_shl_4(i8 %a0) {
define i8 @test_i8_224_mask_shl_1(i8 %a0) {
; CHECK-LABEL: test_i8_224_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x60
-; CHECK-NEXT: lsl w0, w8, #1
+; CHECK-NEXT: lsl w8, w0, #1
+; CHECK-NEXT: and w0, w8, #0xc0
; CHECK-NEXT: ret
%t0 = and i8 %a0, 224
%t1 = shl i8 %t0, 1
@@ -465,8 +463,7 @@ define i16 @test_i16_65024_mask_ashr_10(i16 %a0) {
define i16 @test_i16_127_mask_shl_1(i16 %a0) {
; CHECK-LABEL: test_i16_127_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7f
-; CHECK-NEXT: lsl w0, w8, #1
+; CHECK-NEXT: ubfiz w0, w0, #1, #7
; CHECK-NEXT: ret
%t0 = and i16 %a0, 127
%t1 = shl i16 %t0, 1
@@ -475,8 +472,7 @@ define i16 @test_i16_127_mask_shl_1(i16 %a0) {
define i16 @test_i16_127_mask_shl_8(i16 %a0) {
; CHECK-LABEL: test_i16_127_mask_shl_8:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7f
-; CHECK-NEXT: lsl w0, w8, #8
+; CHECK-NEXT: ubfiz w0, w0, #8, #7
; CHECK-NEXT: ret
%t0 = and i16 %a0, 127
%t1 = shl i16 %t0, 8
@@ -504,8 +500,8 @@ define i16 @test_i16_127_mask_shl_10(i16 %a0) {
define i16 @test_i16_2032_mask_shl_3(i16 %a0) {
; CHECK-LABEL: test_i16_2032_mask_shl_3:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7f0
-; CHECK-NEXT: lsl w0, w8, #3
+; CHECK-NEXT: lsl w8, w0, #3
+; CHECK-NEXT: and w0, w8, #0x3f80
; CHECK-NEXT: ret
%t0 = and i16 %a0, 2032
%t1 = shl i16 %t0, 3
@@ -514,8 +510,8 @@ define i16 @test_i16_2032_mask_shl_3(i16 %a0) {
define i16 @test_i16_2032_mask_shl_4(i16 %a0) {
; CHECK-LABEL: test_i16_2032_mask_shl_4:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7f0
-; CHECK-NEXT: lsl w0, w8, #4
+; CHECK-NEXT: lsl w8, w0, #4
+; CHECK-NEXT: and w0, w8, #0x7f00
; CHECK-NEXT: ret
%t0 = and i16 %a0, 2032
%t1 = shl i16 %t0, 4
@@ -524,8 +520,8 @@ define i16 @test_i16_2032_mask_shl_4(i16 %a0) {
define i16 @test_i16_2032_mask_shl_5(i16 %a0) {
; CHECK-LABEL: test_i16_2032_mask_shl_5:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7f0
-; CHECK-NEXT: lsl w0, w8, #5
+; CHECK-NEXT: lsl w8, w0, #5
+; CHECK-NEXT: and w0, w8, #0xfe00
; CHECK-NEXT: ret
%t0 = and i16 %a0, 2032
%t1 = shl i16 %t0, 5
@@ -534,8 +530,8 @@ define i16 @test_i16_2032_mask_shl_5(i16 %a0) {
define i16 @test_i16_2032_mask_shl_6(i16 %a0) {
; CHECK-LABEL: test_i16_2032_mask_shl_6:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x3f0
-; CHECK-NEXT: lsl w0, w8, #6
+; CHECK-NEXT: lsl w8, w0, #6
+; CHECK-NEXT: and w0, w8, #0xfc00
; CHECK-NEXT: ret
%t0 = and i16 %a0, 2032
%t1 = shl i16 %t0, 6
@@ -545,8 +541,8 @@ define i16 @test_i16_2032_mask_shl_6(i16 %a0) {
define i16 @test_i16_65024_mask_shl_1(i16 %a0) {
; CHECK-LABEL: test_i16_65024_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7e00
-; CHECK-NEXT: lsl w0, w8, #1
+; CHECK-NEXT: lsl w8, w0, #1
+; CHECK-NEXT: and w0, w8, #0xfc00
; CHECK-NEXT: ret
%t0 = and i16 %a0, 65024
%t1 = shl i16 %t0, 1
@@ -740,8 +736,7 @@ define i32 @test_i32_4294836224_mask_ashr_18(i32 %a0) {
define i32 @test_i32_32767_mask_shl_1(i32 %a0) {
; CHECK-LABEL: test_i32_32767_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7fff
-; CHECK-NEXT: lsl w0, w8, #1
+; CHECK-NEXT: ubfiz w0, w0, #1, #15
; CHECK-NEXT: ret
%t0 = and i32 %a0, 32767
%t1 = shl i32 %t0, 1
@@ -750,8 +745,7 @@ define i32 @test_i32_32767_mask_shl_1(i32 %a0) {
define i32 @test_i32_32767_mask_shl_16(i32 %a0) {
; CHECK-LABEL: test_i32_32767_mask_shl_16:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7fff
-; CHECK-NEXT: lsl w0, w8, #16
+; CHECK-NEXT: ubfiz w0, w0, #16, #15
; CHECK-NEXT: ret
%t0 = and i32 %a0, 32767
%t1 = shl i32 %t0, 16
@@ -779,8 +773,8 @@ define i32 @test_i32_32767_mask_shl_18(i32 %a0) {
define i32 @test_i32_8388352_mask_shl_7(i32 %a0) {
; CHECK-LABEL: test_i32_8388352_mask_shl_7:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7fff00
-; CHECK-NEXT: lsl w0, w8, #7
+; CHECK-NEXT: lsl w8, w0, #7
+; CHECK-NEXT: and w0, w8, #0x3fff8000
; CHECK-NEXT: ret
%t0 = and i32 %a0, 8388352
%t1 = shl i32 %t0, 7
@@ -789,8 +783,8 @@ define i32 @test_i32_8388352_mask_shl_7(i32 %a0) {
define i32 @test_i32_8388352_mask_shl_8(i32 %a0) {
; CHECK-LABEL: test_i32_8388352_mask_shl_8:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7fff00
-; CHECK-NEXT: lsl w0, w8, #8
+; CHECK-NEXT: lsl w8, w0, #8
+; CHECK-NEXT: and w0, w8, #0x7fff0000
; CHECK-NEXT: ret
%t0 = and i32 %a0, 8388352
%t1 = shl i32 %t0, 8
@@ -799,8 +793,8 @@ define i32 @test_i32_8388352_mask_shl_8(i32 %a0) {
define i32 @test_i32_8388352_mask_shl_9(i32 %a0) {
; CHECK-LABEL: test_i32_8388352_mask_shl_9:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7fff00
-; CHECK-NEXT: lsl w0, w8, #9
+; CHECK-NEXT: lsl w8, w0, #9
+; CHECK-NEXT: and w0, w8, #0xfffe0000
; CHECK-NEXT: ret
%t0 = and i32 %a0, 8388352
%t1 = shl i32 %t0, 9
@@ -809,8 +803,8 @@ define i32 @test_i32_8388352_mask_shl_9(i32 %a0) {
define i32 @test_i32_8388352_mask_shl_10(i32 %a0) {
; CHECK-LABEL: test_i32_8388352_mask_shl_10:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x3fff00
-; CHECK-NEXT: lsl w0, w8, #10
+; CHECK-NEXT: lsl w8, w0, #10
+; CHECK-NEXT: and w0, w8, #0xfffc0000
; CHECK-NEXT: ret
%t0 = and i32 %a0, 8388352
%t1 = shl i32 %t0, 10
@@ -820,8 +814,8 @@ define i32 @test_i32_8388352_mask_shl_10(i32 %a0) {
define i32 @test_i32_4294836224_mask_shl_1(i32 %a0) {
; CHECK-LABEL: test_i32_4294836224_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0x7ffe0000
-; CHECK-NEXT: lsl w0, w8, #1
+; CHECK-NEXT: lsl w8, w0, #1
+; CHECK-NEXT: and w0, w8, #0xfffc0000
; CHECK-NEXT: ret
%t0 = and i32 %a0, 4294836224
%t1 = shl i32 %t0, 1
@@ -1015,8 +1009,7 @@ define i64 @test_i64_18446744065119617024_mask_ashr_34(i64 %a0) {
define i64 @test_i64_2147483647_mask_shl_1(i64 %a0) {
; CHECK-LABEL: test_i64_2147483647_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and x8, x0, #0x7fffffff
-; CHECK-NEXT: lsl x0, x8, #1
+; CHECK-NEXT: lsl w0, w0, #1
; CHECK-NEXT: ret
%t0 = and i64 %a0, 2147483647
%t1 = shl i64 %t0, 1
@@ -1054,8 +1047,8 @@ define i64 @test_i64_2147483647_mask_shl_34(i64 %a0) {
define i64 @test_i64_140737488289792_mask_shl_15(i64 %a0) {
; CHECK-LABEL: test_i64_140737488289792_mask_shl_15:
; CHECK: // %bb.0:
-; CHECK-NEXT: and x8, x0, #0x7fffffff0000
-; CHECK-NEXT: lsl x0, x8, #15
+; CHECK-NEXT: lsl x8, x0, #15
+; CHECK-NEXT: and x0, x8, #0x3fffffff80000000
; CHECK-NEXT: ret
%t0 = and i64 %a0, 140737488289792
%t1 = shl i64 %t0, 15
@@ -1064,8 +1057,8 @@ define i64 @test_i64_140737488289792_mask_shl_15(i64 %a0) {
define i64 @test_i64_140737488289792_mask_shl_16(i64 %a0) {
; CHECK-LABEL: test_i64_140737488289792_mask_shl_16:
; CHECK: // %bb.0:
-; CHECK-NEXT: and x8, x0, #0x7fffffff0000
-; CHECK-NEXT: lsl x0, x8, #16
+; CHECK-NEXT: lsl x8, x0, #16
+; CHECK-NEXT: and x0, x8, #0x7fffffff00000000
; CHECK-NEXT: ret
%t0 = and i64 %a0, 140737488289792
%t1 = shl i64 %t0, 16
@@ -1074,8 +1067,8 @@ define i64 @test_i64_140737488289792_mask_shl_16(i64 %a0) {
define i64 @test_i64_140737488289792_mask_shl_17(i64 %a0) {
; CHECK-LABEL: test_i64_140737488289792_mask_shl_17:
; CHECK: // %bb.0:
-; CHECK-NEXT: and x8, x0, #0x7fffffff0000
-; CHECK-NEXT: lsl x0, x8, #17
+; CHECK-NEXT: lsl x8, x0, #17
+; CHECK-NEXT: and x0, x8, #0xfffffffe00000000
; CHECK-NEXT: ret
%t0 = and i64 %a0, 140737488289792
%t1 = shl i64 %t0, 17
@@ -1084,8 +1077,8 @@ define i64 @test_i64_140737488289792_mask_shl_17(i64 %a0) {
define i64 @test_i64_140737488289792_mask_shl_18(i64 %a0) {
; CHECK-LABEL: test_i64_140737488289792_mask_shl_18:
; CHECK: // %bb.0:
-; CHECK-NEXT: and x8, x0, #0x3fffffff0000
-; CHECK-NEXT: lsl x0, x8, #18
+; CHECK-NEXT: lsl x8, x0, #18
+; CHECK-NEXT: and x0, x8, #0xfffffffc00000000
; CHECK-NEXT: ret
%t0 = and i64 %a0, 140737488289792
%t1 = shl i64 %t0, 18
@@ -1095,8 +1088,8 @@ define i64 @test_i64_140737488289792_mask_shl_18(i64 %a0) {
define i64 @test_i64_18446744065119617024_mask_shl_1(i64 %a0) {
; CHECK-LABEL: test_i64_18446744065119617024_mask_shl_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and x8, x0, #0x7ffffffe00000000
-; CHECK-NEXT: lsl x0, x8, #1
+; CHECK-NEXT: lsl x8, x0, #1
+; CHECK-NEXT: and x0, x8, #0xfffffffc00000000
; CHECK-NEXT: ret
%t0 = and i64 %a0, 18446744065119617024
%t1 = shl i64 %t0, 1
diff --git a/llvm/test/CodeGen/AArch64/extract-bits.ll b/llvm/test/CodeGen/AArch64/extract-bits.ll
index b87157a183835d..aaa6c7eb4a30f4 100644
--- a/llvm/test/CodeGen/AArch64/extract-bits.ll
+++ b/llvm/test/CodeGen/AArch64/extract-bits.ll
@@ -1013,8 +1013,8 @@ define i32 @c1_i32(i32 %arg) nounwind {
define i32 @c2_i32(i32 %arg) nounwind {
; CHECK-LABEL: c2_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ubfx w8, w0, #19, #10
-; CHECK-NEXT: lsl w0, w8, #2
+; CHECK-NEXT: lsr w8, w0, #17
+; CHECK-NEXT: and w0, w8, #0xffc
; CHECK-NEXT: ret
%tmp0 = lshr i32 %arg, 19
%tmp1 = and i32 %tmp0, 1023
@@ -1063,8 +1063,8 @@ define i64 @c1_i64(i64 %arg) nounwind {
define i64 @c2_i64(i64 %arg) nounwind {
; CHECK-LABEL: c2_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ubfx x8, x0, #51, #10
-; CHECK-NEXT: lsl x0, x8, #2
+; CHECK-NEXT: lsr x8, x0, #49
+; CHECK-NEXT: and x0, x8, #0xffc
; CHECK-NEXT: ret
%tmp0 = lshr i64 %arg, 51
%tmp1 = and i64 %tmp0, 1023
@@ -1120,8 +1120,8 @@ define void @c6_i32(i32 %arg, ptr %ptr) nounwind {
define void @c7_i32(i32 %arg, ptr %ptr) nounwind {
; CHECK-LABEL: c7_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ubfx w8, w0, #19, #10
-; CHECK-NEXT: lsl w8, w8, #2
+; CHECK-NEXT: lsr w8, w0, #17
+; CHECK-NEXT: and w8, w8, #0xffc
; CHECK-NEXT: str w8, [x1]
; CHECK-NEXT: ret
%tmp0 = lshr i32 %arg, 19
@@ -1163,8 +1163,8 @@ define void @c6_i64(i64 %arg, ptr %ptr) nounwind {
define void @c7_i64(i64 %arg, ptr %ptr) nounwind {
; CHECK-LABEL: c7_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ubfx x8, x0, #51, #10
-; CHECK-NEXT: lsl x8, x8, #2
+; CHECK-NEXT: lsr x8, x0, #49
+; CHECK-NEXT: and x8, x8, #0xffc
; CHECK-NEXT: str x8, [x1]
; CHECK-NEXT: ret
%tmp0 = lshr i64 %arg, 51
diff --git a/llvm/test/CodeGen/AArch64/fpenv.ll b/llvm/test/CodeGen/AArch64/fpenv.ll
index 3a307f7731037a..3351565d8dd89d 100644
--- a/llvm/test/CodeGen/AArch64/fpenv.ll
+++ b/llvm/test/CodeGen/AArch64/fpenv.ll
@@ -4,11 +4,11 @@
define void @func_set_rounding_dyn(i32 %rm) {
; CHECK-LABEL: func_set_rounding_dyn:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w9, w0, #1
+; CHECK-NEXT: lsl w9, w0, #22
; CHECK-NEXT: mrs x8, FPCR
-; CHECK-NEXT: and w9, w9, #0x3
; CHECK-NEXT: and x8, x8, #0xffffffffff3fffff
-; CHECK-NEXT: lsl w9, w9, #22
+; CHECK-NEXT: sub w9, w9, #1024, lsl #12 // =4194304
+; CHECK-NEXT: and w9, w9, #0xc00000
; CHECK-NEXT: orr x8, x8, x9
; CHECK-NEXT: msr FPCR, x8
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/xbfiz.ll b/llvm/test/CodeGen/AArch64/xbfiz.ll
index b777ddcb7efcc4..05567e34258402 100644
--- a/llvm/test/CodeGen/AArch64/xbfiz.ll
+++ b/llvm/test/CodeGen/AArch64/xbfiz.ll
@@ -69,3 +69,19 @@ define i64 @lsl32_not_ubfiz64(i64 %v) {
%and = and i64 %shl, 4294967295
ret i64 %and
}
+
+define i64 @lsl_zext_i8_i64(i8 %b) {
+; CHECK-LABEL: lsl_zext_i8_i64:
+; CHECK: ubfiz x0, x0, #1, #8
+ %1 = zext i8 %b to i64
+ %2 = shl i64 %1, 1
+ ret i64 %2
+}
+
+define i64 @lsl_zext_i16_i64(i16 %b) {
+; CHECK-LABEL: lsl_zext_i16_i64:
+; CHECK: ubfiz x0, x0, #1, #16
+ %1 = zext i16 %b to i64
+ %2 = shl i64 %1, 1
+ ret i64 %2
+}
>From 48359416355cfb78cf57f3ca5475a6b77784b8b5 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes at arm.com>
Date: Mon, 27 Jan 2025 15:12:51 +0000
Subject: [PATCH 2/3] fix
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 9 ++++++++-
llvm/test/CodeGen/AArch64/xbfiz.ll | 17 +++++++++++++++++
2 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index f209c186c05425..bd9994bcb669ca 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -26372,8 +26372,15 @@ static SDValue performSHLCombine(SDNode *N,
SDLoc DL(N);
EVT VT = N->getValueType(0);
- SDValue X = Op0->getOperand(0);
+
+ // Don't combine unless (shl C1, C2) can be constant folded. Otherwise,
+ // DAGCombiner will simplify (and (op x...), (op y...)) -> (op (and x, y))
+ // causing infinite loop. Result may also be worse.
SDValue NewRHS = DAG.getNode(ISD::SHL, DL, VT, C1, C2);
+ if (!isa<ConstantSDNode>(NewRHS))
+ return SDValue();
+
+ SDValue X = Op0->getOperand(0);
SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, X, C2);
return DAG.getNode(ISD::AND, DL, VT, NewShift, NewRHS);
}
diff --git a/llvm/test/CodeGen/AArch64/xbfiz.ll b/llvm/test/CodeGen/AArch64/xbfiz.ll
index 05567e34258402..bb71f5f1f7f944 100644
--- a/llvm/test/CodeGen/AArch64/xbfiz.ll
+++ b/llvm/test/CodeGen/AArch64/xbfiz.ll
@@ -85,3 +85,20 @@ define i64 @lsl_zext_i16_i64(i16 %b) {
%2 = shl i64 %1, 1
ret i64 %2
}
+
+; Regression test for:
+; https://github.com/llvm/llvm-project/pull/118974#issuecomment-2598521878
+; that exposed infinite loop in DAGCombiner.
+define void @_f(ptr %0, ptr %1, i64 %2) {
+; CHECK-LABEL: @_f
+ store i64 -2401053089408754003, ptr %1, align 8
+ %4 = and i64 %2, -2401053089408754003
+ %5 = shl i64 %4, 1
+ store i64 %5, ptr %0, align 1
+ %6 = lshr i64 %4, 54
+ %7 = shl i64 %2, 10
+ %8 = and i64 %7, 131072
+ %9 = or i64 %8, %6
+ store i64 %9, ptr %1, align 1
+ ret void
+}
>From 3df21b63c7b05de718a6141960a927ecbd118cc0 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes at arm.com>
Date: Mon, 27 Jan 2025 16:11:29 +0000
Subject: [PATCH 3/3] address comments
---
llvm/test/CodeGen/AArch64/xbfiz.ll | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/test/CodeGen/AArch64/xbfiz.ll b/llvm/test/CodeGen/AArch64/xbfiz.ll
index bb71f5f1f7f944..d3bebf7c6637c5 100644
--- a/llvm/test/CodeGen/AArch64/xbfiz.ll
+++ b/llvm/test/CodeGen/AArch64/xbfiz.ll
@@ -91,6 +91,7 @@ define i64 @lsl_zext_i16_i64(i16 %b) {
; that exposed infinite loop in DAGCombiner.
define void @_f(ptr %0, ptr %1, i64 %2) {
; CHECK-LABEL: @_f
+; CHECK-NOT: ubfiz
store i64 -2401053089408754003, ptr %1, align 8
%4 = and i64 %2, -2401053089408754003
%5 = shl i64 %4, 1
More information about the llvm-commits
mailing list