[llvm] cecf11c - [DAGCombiner] Fold SSHLSAT/USHLSAT to SHL when no saturation will occur
Bjorn Pettersson via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 6 10:00:30 PST 2022
Author: Bjorn Pettersson
Date: 2022-02-06T18:59:06+01:00
New Revision: cecf11c31544ec17b16843297246e93618cd56cd
URL: https://github.com/llvm/llvm-project/commit/cecf11c31544ec17b16843297246e93618cd56cd
DIFF: https://github.com/llvm/llvm-project/commit/cecf11c31544ec17b16843297246e93618cd56cd.diff
LOG: [DAGCombiner] Fold SSHLSAT/USHLSAT to SHL when no saturation will occur
When the shift amount is known and a known sign bit analysis of
the shiftee indicates that no saturation will occur, then we can
replace SSHLSAT/USHLSAT by SHL.
Differential Revision: https://reviews.llvm.org/D118765
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AArch64/sshl_sat.ll
llvm/test/CodeGen/AArch64/ushl_sat.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b69524a99f6c2..08450f77986b8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9345,6 +9345,21 @@ SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0, N1}))
return C;
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+
+ if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
+ // fold (sshlsat x, c) -> (shl x, c)
+ if (N->getOpcode() == ISD::SSHLSAT && N1C &&
+ N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
+
+ // fold (ushlsat x, c) -> (shl x, c)
+ if (N->getOpcode() == ISD::USHLSAT && N1C &&
+ N1C->getAPIntValue().ule(
+ DAG.computeKnownBits(N0).countMinLeadingZeros()))
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
+ }
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/AArch64/sshl_sat.ll b/llvm/test/CodeGen/AArch64/sshl_sat.ll
index 9cebbed06c68f..4572633ea1535 100644
--- a/llvm/test/CodeGen/AArch64/sshl_sat.ll
+++ b/llvm/test/CodeGen/AArch64/sshl_sat.ll
@@ -131,15 +131,7 @@ define void @combine_shlsat_vector() nounwind {
define i16 @combine_shlsat_to_shl(i16 %x) nounwind {
; CHECK-LABEL: combine_shlsat_to_shl:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: mov w9, #-65536
-; CHECK-NEXT: mov w10, #-2147483648
-; CHECK-NEXT: ands w8, w9, w8, lsl #14
-; CHECK-NEXT: lsl w9, w8, #2
-; CHECK-NEXT: cinv w10, w10, ge
-; CHECK-NEXT: cmp w8, w9, asr #2
-; CHECK-NEXT: csel w8, w10, w9, ne
-; CHECK-NEXT: asr w0, w8, #16
+; CHECK-NEXT: and w0, w0, #0xfffffffc
; CHECK-NEXT: ret
%x2 = ashr i16 %x, 2
%tmp = call i16 @llvm.sshl.sat.i16(i16 %x2, i16 2)
@@ -170,45 +162,8 @@ define <4 x i16> @combine_shlsat_to_shl_vec(<4 x i8> %a) nounwind {
; CHECK-LABEL: combine_shlsat_to_shl_vec:
; CHECK: // %bb.0:
; CHECK-NEXT: shl v0.4h, v0.4h, #8
-; CHECK-NEXT: mov w8, #-2147483648
-; CHECK-NEXT: sshr v1.4h, v0.4h, #8
-; CHECK-NEXT: umov w9, v1.h[0]
-; CHECK-NEXT: umov w10, v1.h[1]
-; CHECK-NEXT: umov w12, v1.h[2]
-; CHECK-NEXT: lsl w11, w9, #16
-; CHECK-NEXT: lsl w9, w9, #23
-; CHECK-NEXT: cmp w11, #0
-; CHECK-NEXT: cinv w13, w8, ge
-; CHECK-NEXT: cmp w11, w9, asr #7
-; CHECK-NEXT: lsl w11, w10, #16
-; CHECK-NEXT: lsl w10, w10, #23
-; CHECK-NEXT: csel w9, w13, w9, ne
-; CHECK-NEXT: cmp w11, #0
-; CHECK-NEXT: asr w9, w9, #16
-; CHECK-NEXT: cinv w13, w8, ge
-; CHECK-NEXT: cmp w11, w10, asr #7
-; CHECK-NEXT: lsl w11, w12, #16
-; CHECK-NEXT: csel w10, w13, w10, ne
-; CHECK-NEXT: lsl w12, w12, #23
-; CHECK-NEXT: asr w10, w10, #16
-; CHECK-NEXT: fmov s0, w9
-; CHECK-NEXT: umov w9, v1.h[3]
-; CHECK-NEXT: cmp w11, #0
-; CHECK-NEXT: cinv w13, w8, ge
-; CHECK-NEXT: cmp w11, w12, asr #7
-; CHECK-NEXT: csel w11, w13, w12, ne
-; CHECK-NEXT: mov v0.h[1], w10
-; CHECK-NEXT: asr w10, w11, #16
-; CHECK-NEXT: lsl w11, w9, #16
-; CHECK-NEXT: lsl w9, w9, #23
-; CHECK-NEXT: cmp w11, #0
-; CHECK-NEXT: cinv w8, w8, ge
-; CHECK-NEXT: cmp w11, w9, asr #7
-; CHECK-NEXT: mov v0.h[2], w10
-; CHECK-NEXT: csel w8, w8, w9, ne
-; CHECK-NEXT: asr w8, w8, #16
-; CHECK-NEXT: mov v0.h[3], w8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: sshr v0.4h, v0.4h, #8
+; CHECK-NEXT: shl v0.4h, v0.4h, #7
; CHECK-NEXT: ret
%sext = sext <4 x i8> %a to <4 x i16>
%tmp = call <4 x i16> @llvm.sshl.sat.v4i16(
diff --git a/llvm/test/CodeGen/AArch64/ushl_sat.ll b/llvm/test/CodeGen/AArch64/ushl_sat.ll
index 2701494b8e394..1be1ef4af4bf9 100644
--- a/llvm/test/CodeGen/AArch64/ushl_sat.ll
+++ b/llvm/test/CodeGen/AArch64/ushl_sat.ll
@@ -117,12 +117,7 @@ define void @combine_shlsat_vector() nounwind {
define i16 @combine_shlsat_to_shl(i16 %x) nounwind {
; CHECK-LABEL: combine_shlsat_to_shl:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xfffc
-; CHECK-NEXT: lsl w9, w8, #14
-; CHECK-NEXT: lsl w8, w8, #16
-; CHECK-NEXT: cmp w9, w9
-; CHECK-NEXT: csinv w8, w8, wzr, eq
-; CHECK-NEXT: lsr w0, w8, #16
+; CHECK-NEXT: and w0, w0, #0xfffffffc
; CHECK-NEXT: ret
%x2 = lshr i16 %x, 2
%tmp = call i16 @llvm.ushl.sat.i16(i16 %x2, i16 2)
@@ -150,22 +145,7 @@ define i16 @combine_shlsat_to_shl_no_fold(i16 %x) nounwind {
define <2 x i16> @combine_shlsat_to_shl_vec(<2 x i8> %a) nounwind {
; CHECK-LABEL: combine_shlsat_to_shl_vec:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi d1, #0x0000ff000000ff
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: shl v0.2s, v0.2s, #16
-; CHECK-NEXT: mov w8, v0.s[1]
-; CHECK-NEXT: fmov w9, s0
-; CHECK-NEXT: and w10, w8, #0xffffff
-; CHECK-NEXT: lsl w11, w8, #8
-; CHECK-NEXT: cmp w8, w10
-; CHECK-NEXT: and w8, w9, #0xffffff
-; CHECK-NEXT: csinv w10, w11, wzr, eq
-; CHECK-NEXT: lsl w11, w9, #8
-; CHECK-NEXT: cmp w9, w8
-; CHECK-NEXT: csinv w8, w11, wzr, eq
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: mov v0.s[1], w10
-; CHECK-NEXT: ushr v0.2s, v0.2s, #16
+; CHECK-NEXT: shl v0.2s, v0.2s, #8
; CHECK-NEXT: ret
%ext = zext <2 x i8> %a to <2 x i16>
%tmp = call <2 x i16> @llvm.ushl.sat.v2i16(
More information about the llvm-commits
mailing list