[llvm] [SDAG] Don't treat ISD::SHL as a uniform binary operator in `ShrinkDemandedOp` (PR #92753)
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Wed May 22 04:31:27 PDT 2024
https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/92753
>From a9dcfd813ebe24d5c958ac9e73ee943a8463246b Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 20 May 2024 21:37:41 +0800
Subject: [PATCH 1/2] [SDAG] Don't treat ISD::SHL as a uniform binary operator
in `ShrinkDemandedOp`
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 31 ++++-
llvm/test/CodeGen/AArch64/bitfield-insert.ll | 11 +-
llvm/test/CodeGen/AArch64/trunc-to-tbl.ll | 118 +++++++++---------
llvm/test/CodeGen/X86/pr92720.ll | 15 +++
4 files changed, 104 insertions(+), 71 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/pr92720.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 3ec6b9b795079..83dee6db3787e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -587,6 +587,10 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
if (VT.isVector())
return false;
+ assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
+ Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
+ "ShrinkDemandedOp only supports operands that have the same size!");
+
// Don't do this if the node has another user, which may require the
// full value.
if (!Op.getNode()->hasOneUse())
@@ -1834,9 +1838,30 @@ bool TargetLowering::SimplifyDemandedBits(
// Try shrinking the operation as long as the shift amount will still be
// in range.
- if ((ShAmt < DemandedBits.getActiveBits()) &&
- ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
- return true;
+ if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
+ Op.getNode()->hasOneUse()) {
+ // Search for the smallest integer type with free casts to and from
+ // Op's type. For expedience, just check power-of-2 integer types.
+ unsigned DemandedSize = DemandedBits.getActiveBits();
+ for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
+ SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
+ EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
+ if (isNarrowingProfitable(VT, SmallVT) &&
+ isTypeDesirableForOp(ISD::SHL, SmallVT) &&
+ isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
+ (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
+ assert(DemandedSize <= SmallVTBits &&
+ "Narrowed below demanded bits?");
+ // We found a type with free casts.
+ SDValue NarrowShl = TLO.DAG.getNode(
+ ISD::SHL, dl, SmallVT,
+ TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
+ TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
+ }
+ }
+ }
// Narrow shift to lower half - similar to ShrinkDemandedOp.
// (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
diff --git a/llvm/test/CodeGen/AArch64/bitfield-insert.ll b/llvm/test/CodeGen/AArch64/bitfield-insert.ll
index 30b5e86c1e6dc..14a594e8028d5 100644
--- a/llvm/test/CodeGen/AArch64/bitfield-insert.ll
+++ b/llvm/test/CodeGen/AArch64/bitfield-insert.ll
@@ -193,11 +193,10 @@ define void @test_64bit_badmask(ptr %existing, ptr %new) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: mov w10, #135 // =0x87
-; CHECK-NEXT: mov w11, #664 // =0x298
-; CHECK-NEXT: lsl w9, w9, #3
-; CHECK-NEXT: and x8, x8, x10
-; CHECK-NEXT: and x9, x9, x11
+; CHECK-NEXT: mov w10, #664 // =0x298
+; CHECK-NEXT: mov w11, #135 // =0x87
+; CHECK-NEXT: and x9, x10, x9, lsl #3
+; CHECK-NEXT: and x8, x8, x11
; CHECK-NEXT: orr x8, x8, x9
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: ret
@@ -579,7 +578,6 @@ define <2 x i32> @test_complex_type(ptr %addr, i64 %in, ptr %bf ) {
define i64 @test_truncated_shift(i64 %x, i64 %y) {
; CHECK-LABEL: test_truncated_shift:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $w1 killed $w1 killed $x1 def $x1
; CHECK-NEXT: bfi x0, x1, #25, #5
; CHECK-NEXT: ret
entry:
@@ -593,7 +591,6 @@ entry:
define i64 @test_and_extended_shift_with_imm(i64 %0) {
; CHECK-LABEL: test_and_extended_shift_with_imm:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 def $x0
; CHECK-NEXT: ubfiz x0, x0, #7, #8
; CHECK-NEXT: ret
%2 = shl i64 %0, 7
diff --git a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
index 18cd4cc2111a4..c4a58ba12dc6b 100644
--- a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
@@ -571,29 +571,27 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) {
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: LBB5_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldp x10, x9, [x0]
-; CHECK-NEXT: ldrb w13, [x0, #18]
-; CHECK-NEXT: ldrh w14, [x0, #16]
+; CHECK-NEXT: ldp x9, x10, [x0]
+; CHECK-NEXT: ldrb w14, [x0, #18]
+; CHECK-NEXT: ldrh w15, [x0, #16]
; CHECK-NEXT: add x0, x0, #32
-; CHECK-NEXT: ubfx x12, x9, #12, #20
-; CHECK-NEXT: fmov s0, w10
-; CHECK-NEXT: lsr x11, x10, #19
-; CHECK-NEXT: lsr x15, x9, #31
-; CHECK-NEXT: fmov s1, w12
-; CHECK-NEXT: lsr x12, x9, #50
-; CHECK-NEXT: mov.s v0[1], w11
-; CHECK-NEXT: orr w11, w14, w13, lsl #16
-; CHECK-NEXT: lsr x13, x10, #38
-; CHECK-NEXT: lsr x10, x10, #57
-; CHECK-NEXT: mov.s v1[1], w15
-; CHECK-NEXT: orr w12, w12, w11, lsl #14
-; CHECK-NEXT: orr w9, w10, w9, lsl #7
-; CHECK-NEXT: lsr w10, w11, #5
-; CHECK-NEXT: mov.s v0[2], w13
+; CHECK-NEXT: ubfx x12, x10, #12, #20
+; CHECK-NEXT: fmov s1, w9
+; CHECK-NEXT: lsr x11, x9, #19
+; CHECK-NEXT: lsr x13, x10, #31
+; CHECK-NEXT: fmov s0, w12
+; CHECK-NEXT: lsr x12, x9, #38
+; CHECK-NEXT: extr x9, x10, x9, #57
+; CHECK-NEXT: mov.s v1[1], w11
+; CHECK-NEXT: orr x11, x15, x14, lsl #16
+; CHECK-NEXT: mov.s v0[1], w13
+; CHECK-NEXT: extr x13, x11, x10, #50
+; CHECK-NEXT: ubfx x10, x11, #5, #27
; CHECK-NEXT: mov.s v1[2], w12
-; CHECK-NEXT: mov.s v0[3], w9
-; CHECK-NEXT: mov.s v1[3], w10
-; CHECK-NEXT: uzp1.8h v0, v0, v1
+; CHECK-NEXT: mov.s v0[2], w13
+; CHECK-NEXT: mov.s v1[3], w9
+; CHECK-NEXT: mov.s v0[3], w10
+; CHECK-NEXT: uzp1.8h v0, v1, v0
; CHECK-NEXT: xtn.8b v0, v0
; CHECK-NEXT: str d0, [x1, x8, lsl #3]
; CHECK-NEXT: add x8, x8, #1
@@ -608,35 +606,34 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) {
; CHECK-BE-NEXT: .LBB5_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-BE-NEXT: ldp x10, x9, [x0]
-; CHECK-BE-NEXT: ldrb w16, [x0, #18]
-; CHECK-BE-NEXT: lsr x11, x9, #40
-; CHECK-BE-NEXT: ubfx x12, x9, #33, #7
-; CHECK-BE-NEXT: lsr x15, x10, #45
-; CHECK-BE-NEXT: lsr x13, x10, #40
-; CHECK-BE-NEXT: ubfx x14, x10, #26, #14
-; CHECK-BE-NEXT: orr w11, w12, w11, lsl #7
-; CHECK-BE-NEXT: ldrh w12, [x0, #16]
-; CHECK-BE-NEXT: fmov s0, w15
-; CHECK-BE-NEXT: orr w13, w14, w13, lsl #14
-; CHECK-BE-NEXT: ubfx x14, x9, #14, #18
+; CHECK-BE-NEXT: ldrh w16, [x0, #16]
+; CHECK-BE-NEXT: ldrb w17, [x0, #18]
; CHECK-BE-NEXT: add x0, x0, #32
-; CHECK-BE-NEXT: fmov s1, w11
-; CHECK-BE-NEXT: orr w11, w16, w12, lsl #8
-; CHECK-BE-NEXT: lsl x12, x9, #24
-; CHECK-BE-NEXT: mov v0.s[1], w13
+; CHECK-BE-NEXT: lsl x11, x9, #24
+; CHECK-BE-NEXT: lsr x12, x9, #40
+; CHECK-BE-NEXT: lsr x13, x10, #45
+; CHECK-BE-NEXT: lsl x14, x10, #24
+; CHECK-BE-NEXT: lsr x15, x10, #40
+; CHECK-BE-NEXT: extr x12, x12, x11, #57
+; CHECK-BE-NEXT: fmov s0, w13
; CHECK-BE-NEXT: ubfx x13, x10, #7, #25
+; CHECK-BE-NEXT: extr x14, x15, x14, #50
+; CHECK-BE-NEXT: ubfx x15, x9, #14, #18
; CHECK-BE-NEXT: extr x9, x10, x9, #40
-; CHECK-BE-NEXT: orr w12, w11, w12
-; CHECK-BE-NEXT: mov v1.s[1], w14
-; CHECK-BE-NEXT: lsr w12, w12, #19
+; CHECK-BE-NEXT: fmov s1, w12
+; CHECK-BE-NEXT: orr w12, w17, w16, lsl #8
+; CHECK-BE-NEXT: mov v0.s[1], w14
; CHECK-BE-NEXT: ubfx x9, x9, #12, #20
+; CHECK-BE-NEXT: orr w11, w12, w11
+; CHECK-BE-NEXT: mov v1.s[1], w15
+; CHECK-BE-NEXT: lsr w11, w11, #19
; CHECK-BE-NEXT: mov v0.s[2], w13
-; CHECK-BE-NEXT: mov v1.s[2], w12
+; CHECK-BE-NEXT: mov v1.s[2], w11
; CHECK-BE-NEXT: mov v0.s[3], w9
; CHECK-BE-NEXT: add x9, x1, x8, lsl #3
; CHECK-BE-NEXT: add x8, x8, #1
; CHECK-BE-NEXT: cmp x8, #1000
-; CHECK-BE-NEXT: mov v1.s[3], w11
+; CHECK-BE-NEXT: mov v1.s[3], w12
; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
; CHECK-BE-NEXT: xtn v0.8b, v0.8h
; CHECK-BE-NEXT: st1 { v0.8b }, [x9]
@@ -650,35 +647,34 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) {
; CHECK-DISABLE-NEXT: .LBB5_1: // %loop
; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-DISABLE-NEXT: ldp x10, x9, [x0]
-; CHECK-DISABLE-NEXT: ldrb w16, [x0, #18]
-; CHECK-DISABLE-NEXT: lsr x11, x9, #40
-; CHECK-DISABLE-NEXT: ubfx x12, x9, #33, #7
-; CHECK-DISABLE-NEXT: lsr x15, x10, #45
-; CHECK-DISABLE-NEXT: lsr x13, x10, #40
-; CHECK-DISABLE-NEXT: ubfx x14, x10, #26, #14
-; CHECK-DISABLE-NEXT: orr w11, w12, w11, lsl #7
-; CHECK-DISABLE-NEXT: ldrh w12, [x0, #16]
-; CHECK-DISABLE-NEXT: fmov s0, w15
-; CHECK-DISABLE-NEXT: orr w13, w14, w13, lsl #14
-; CHECK-DISABLE-NEXT: ubfx x14, x9, #14, #18
+; CHECK-DISABLE-NEXT: ldrh w16, [x0, #16]
+; CHECK-DISABLE-NEXT: ldrb w17, [x0, #18]
; CHECK-DISABLE-NEXT: add x0, x0, #32
-; CHECK-DISABLE-NEXT: fmov s1, w11
-; CHECK-DISABLE-NEXT: orr w11, w16, w12, lsl #8
-; CHECK-DISABLE-NEXT: lsl x12, x9, #24
-; CHECK-DISABLE-NEXT: mov v0.s[1], w13
+; CHECK-DISABLE-NEXT: lsl x11, x9, #24
+; CHECK-DISABLE-NEXT: lsr x12, x9, #40
+; CHECK-DISABLE-NEXT: lsr x13, x10, #45
+; CHECK-DISABLE-NEXT: lsl x14, x10, #24
+; CHECK-DISABLE-NEXT: lsr x15, x10, #40
+; CHECK-DISABLE-NEXT: extr x12, x12, x11, #57
+; CHECK-DISABLE-NEXT: fmov s0, w13
; CHECK-DISABLE-NEXT: ubfx x13, x10, #7, #25
+; CHECK-DISABLE-NEXT: extr x14, x15, x14, #50
+; CHECK-DISABLE-NEXT: ubfx x15, x9, #14, #18
; CHECK-DISABLE-NEXT: extr x9, x10, x9, #40
-; CHECK-DISABLE-NEXT: orr w12, w11, w12
-; CHECK-DISABLE-NEXT: mov v1.s[1], w14
-; CHECK-DISABLE-NEXT: lsr w12, w12, #19
+; CHECK-DISABLE-NEXT: fmov s1, w12
+; CHECK-DISABLE-NEXT: orr w12, w17, w16, lsl #8
+; CHECK-DISABLE-NEXT: mov v0.s[1], w14
; CHECK-DISABLE-NEXT: ubfx x9, x9, #12, #20
+; CHECK-DISABLE-NEXT: orr w11, w12, w11
+; CHECK-DISABLE-NEXT: mov v1.s[1], w15
+; CHECK-DISABLE-NEXT: lsr w11, w11, #19
; CHECK-DISABLE-NEXT: mov v0.s[2], w13
-; CHECK-DISABLE-NEXT: mov v1.s[2], w12
+; CHECK-DISABLE-NEXT: mov v1.s[2], w11
; CHECK-DISABLE-NEXT: mov v0.s[3], w9
; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3
; CHECK-DISABLE-NEXT: add x8, x8, #1
; CHECK-DISABLE-NEXT: cmp x8, #1000
-; CHECK-DISABLE-NEXT: mov v1.s[3], w11
+; CHECK-DISABLE-NEXT: mov v1.s[3], w12
; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h
; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9]
diff --git a/llvm/test/CodeGen/X86/pr92720.ll b/llvm/test/CodeGen/X86/pr92720.ll
new file mode 100644
index 0000000000000..b2543c08328c7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr92720.ll
@@ -0,0 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s
+
+; Make sure we don't crash when shrinking the shift amount before legalization.
+define i64 @pr92720(i64 %x) {
+; CHECK-LABEL: pr92720:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movabsq $8589934592, %rax # imm = 0x200000000
+; CHECK-NEXT: retq
+ %or = or i64 %x, 255
+ %sub = sub i64 0, %or
+ %shl = shl i64 1, %sub
+ %sext = shl i64 %shl, 32
+ ret i64 %sext
+}
>From 5ba8a4fd138b16b4c671b058ce4cdb7c65d7b7a0 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Wed, 22 May 2024 19:31:03 +0800
Subject: [PATCH 2/2] [SDAG] Add todo. NFC.
---
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 83dee6db3787e..5064527045262 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1836,6 +1836,7 @@ bool TargetLowering::SimplifyDemandedBits(
}
}
+ // TODO: Can we merge this fold with the one below?
// Try shrinking the operation as long as the shift amount will still be
// in range.
if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
More information about the llvm-commits
mailing list