[llvm] 0718ac7 - [SDAG] allow cast folding for vector sext-of-setcc with signed compare
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 2 12:05:11 PDT 2021
Author: Sanjay Patel
Date: 2021-06-02T15:05:02-04:00
New Revision: 0718ac706d4df719a0f019d1c0c3050c96ac2b8b
URL: https://github.com/llvm/llvm-project/commit/0718ac706d4df719a0f019d1c0c3050c96ac2b8b
DIFF: https://github.com/llvm/llvm-project/commit/0718ac706d4df719a0f019d1c0c3050c96ac2b8b.diff
LOG: [SDAG] allow cast folding for vector sext-of-setcc with signed compare
This extends 434c8e013a2c and ede3982792df to handle signed
predicates by sign-extending the setcc operands.
This is not shown directly in https://llvm.org/PR50055 ,
but the pattern is visible by changing the unsigned convert
to signed in the source code.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/X86/sext-vsetcc.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a75580a5c4c6..52de05117b12 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10813,28 +10813,29 @@ SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
}
// Try to eliminate the sext of a setcc by zexting the compare operands.
- // TODO: Handle signed compare by sexting the ops.
- if (!ISD::isSignedIntSetCC(CC) && N0.hasOneUse() &&
- TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
+ if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
!TLI.isOperationLegalOrCustom(ISD::SETCC, SVT)) {
+ bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
+ unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+ unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+
// We have an unsupported narrow vector compare op that would be legal
// if extended to the destination type. See if the compare operands
// can be freely extended to the destination type.
- auto IsFreeToZext = [&](SDValue V) {
+ auto IsFreeToExtend = [&](SDValue V) {
if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
return true;
-
// Match a simple, non-extended load that can be converted to a
- // legal zext-load.
- // TODO: Allow widening of an existing zext-load?
+ // legal {z/s}ext-load.
+ // TODO: Allow widening of an existing {z/s}ext-load?
if (!(ISD::isNON_EXTLoad(V.getNode()) &&
ISD::isUNINDEXEDLoad(V.getNode()) &&
cast<LoadSDNode>(V)->isSimple() &&
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, V.getValueType())))
+ TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
return false;
// Non-chain users of this value must either be the setcc in this
- // sequence or zexts that can be folded into the new zext-load.
+ // sequence or extends that can be folded into the new {z/s}ext-load.
for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
UI != UE; ++UI) {
// Skip uses of the chain and the setcc.
@@ -10844,16 +10845,15 @@ SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
// Extra users must have exactly the same cast we are about to create.
// TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
// is enhanced similarly.
- if (User->getOpcode() != ISD::ZERO_EXTEND ||
- User->getValueType(0) != VT)
+ if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
return false;
}
return true;
};
- if (IsFreeToZext(N00) && IsFreeToZext(N01)) {
- SDValue Ext0 = DAG.getZExtOrTrunc(N00, DL, VT);
- SDValue Ext1 = DAG.getZExtOrTrunc(N01, DL, VT);
+ if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
+ SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
+ SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
}
}
diff --git a/llvm/test/CodeGen/X86/sext-vsetcc.ll b/llvm/test/CodeGen/X86/sext-vsetcc.ll
index e78d7356d89d..579939a35684 100644
--- a/llvm/test/CodeGen/X86/sext-vsetcc.ll
+++ b/llvm/test/CodeGen/X86/sext-vsetcc.ll
@@ -283,7 +283,7 @@ define <3 x i32> @cmp_ult_load_const_bad_type(<3 x i8>* %x) nounwind {
ret <3 x i32> %sext
}
-; negative test - signed cmp (TODO)
+; Signed compare needs signed extend.
define <4 x i32> @cmp_slt_load_const(<4 x i8>* %x) nounwind {
; SSE-LABEL: cmp_slt_load_const:
@@ -298,10 +298,9 @@ define <4 x i32> @cmp_slt_load_const(<4 x i8>* %x) nounwind {
;
; AVX-LABEL: cmp_slt_load_const:
; AVX: # %bb.0:
-; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = <42,214,0,255,u,u,u,u,u,u,u,u,u,u,u,u>
-; AVX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
+; AVX-NEXT: vpmovsxbd (%rdi), %xmm0
+; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [42,4294967254,0,4294967295]
+; AVX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%loadx = load <4 x i8>, <4 x i8>* %x
%icmp = icmp slt <4 x i8> %loadx, <i8 42, i8 -42, i8 0, i8 -1>
@@ -373,7 +372,7 @@ define <8 x i16> @cmp_ugt_zextload(<8 x i8>* %x, <8 x i8>* %y) nounwind {
ret <8 x i16> %sext
}
-; negative test - signed cmp (TODO)
+; Signed compare needs signed extends.
define <8 x i16> @cmp_sgt_zextload(<8 x i8>* %x, <8 x i8>* %y) nounwind {
; SSE-LABEL: cmp_sgt_zextload:
@@ -387,10 +386,9 @@ define <8 x i16> @cmp_sgt_zextload(<8 x i8>* %x, <8 x i8>* %y) nounwind {
;
; AVX-LABEL: cmp_sgt_zextload:
; AVX: # %bb.0:
-; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpmovsxbw %xmm0, %xmm0
+; AVX-NEXT: vpmovsxbw (%rdi), %xmm0
+; AVX-NEXT: vpmovsxbw (%rsi), %xmm1
+; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%loadx = load <8 x i8>, <8 x i8>* %x
%loady = load <8 x i8>, <8 x i8>* %y
@@ -571,11 +569,9 @@ define <4 x i64> @PR50055_signed(<2 x i64>* %src, <4 x i64>* %dst) {
;
; AVX-LABEL: PR50055_signed:
; AVX: # %bb.0:
-; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vpmovsxbd %xmm1, %ymm0
-; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpcmpgtb %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpmovsxbd %xmm1, %ymm1
+; AVX-NEXT: vpmovsxbd (%rdi), %ymm0
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm1
; AVX-NEXT: vmovdqa %ymm1, (%rsi)
; AVX-NEXT: retq
%t0 = bitcast <2 x i64>* %src to <8 x i8>*
More information about the llvm-commits
mailing list