[llvm] r295451 - [DAGCombine] Recognise any_extend_vector_inreg and truncation style shuffle masks

Mon Feb 20 00:33:49 PST 2017

Hi Simon,

The attached ll-file doesn't compile with this commit.

E.g:

llc -march=x86-64 -mcpu=corei7 stress.ll

gives:

"ScalarizeVectorResult #0: t100: v1i256 = any_extend_vector_inreg t98

LLVM ERROR: Do not know how to scalarize the result of this operator!"

Originally generated with llvm-stress and then bugpoint reduced.

Regards,
Mikael

On 02/17/2017 04:14 PM, Simon Pilgrim via llvm-commits wrote:
> Author: rksimon
> Date: Fri Feb 17 09:14:48 2017
> New Revision: 295451
>
> URL: http://llvm.org/viewvc/llvm-project?rev=295451&view=rev
> Log:
> [DAGCombine] Recognise any_extend_vector_inreg and truncation style shuffle masks
>
> During legalization we are often creating shuffles (via a build_vector scalarization stage) that are "any_extend_vector_inreg" style masks, and also other masks that are the equivalent of "truncate_vector_inreg" (if we had such a thing).
>
> This patch is an attempt to match these cases to help undo the effects of just leaving shuffle lowering to handle it - which typically means we lose track of the undefined elements of the shuffles resulting in an unnecessary extension+truncation stage for widened illegal types.
>
> The 2011-10-21-widen-cmp.ll regression will be fixed by making SIGN_EXTEND_VECTOR_IN_REG legal in SSE instead of lowering them to X86ISD::VSEXT (PR31712).
>
> Differential Revision: https://reviews.llvm.org/D29454
>
> Modified:
>     llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>     llvm/trunk/test/CodeGen/X86/2011-10-21-widen-cmp.ll
>     llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
>     llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll
>     llvm/trunk/test/CodeGen/X86/sse-minmax.ll
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=295451&r1=295450&r2=295451&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Feb 17 09:14:48 2017
> @@ -7530,6 +7530,16 @@ SDValue DAGCombiner::visitSIGN_EXTEND_IN
>        return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
>    }
>
> +  // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x)
> +  if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
> +       N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
> +       N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
> +      N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
> +    if (!LegalOperations ||
> +        TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
> +      return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
> +  }
> +
>    // fold (sext_in_reg (zext x)) -> (sext x)
>    // iff we are extending the source sign bit.
>    if (N0.getOpcode() == ISD::ZERO_EXTEND) {
> @@ -14194,6 +14204,113 @@ static SDValue combineShuffleOfScalars(S
>    return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
>  }
>
> +// Match shuffles that can be converted to any_vector_extend_in_reg.
> +// This is often generated during legalization.
> +// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
> +// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
> +SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
> +                                     SelectionDAG &DAG,
> +                                     const TargetLowering &TLI,
> +                                     bool LegalOperations) {
> +  EVT VT = SVN->getValueType(0);
> +  bool IsBigEndian = DAG.getDataLayout().isBigEndian();
> +
> +  // TODO Add support for big-endian when we have a test case.
> +  if (!VT.isInteger() || IsBigEndian)
> +    return SDValue();
> +
> +  unsigned NumElts = VT.getVectorNumElements();
> +  unsigned EltSizeInBits = VT.getScalarSizeInBits();
> +  ArrayRef<int> Mask = SVN->getMask();
> +  SDValue N0 = SVN->getOperand(0);
> +
> +  // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
> +  auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
> +    for (unsigned i = 0; i != NumElts; ++i) {
> +      if (Mask[i] < 0)
> +        continue;
> +      if ((i % Scale) == 0 && Mask[i] == (i / Scale))
> +        continue;
> +      return false;
> +    }
> +    return true;
> +  };
> +
> +  // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
> +  // power-of-2 extensions as they are the most likely.
> +  for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
> +    if (!isAnyExtend(Scale))
> +      continue;
> +
> +    EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
> +    EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
> +    if (!LegalOperations ||
> +        TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
> +      return DAG.getBitcast(VT,
> +                            DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
> +  }
> +
> +  return SDValue();
> +}
> +
> +// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
> +// each source element of a large type into the lowest elements of a smaller
> +// destination type. This is often generated during legalization.
> +// If the source node itself was a '*_extend_vector_inreg' node then we should
> +// then be able to remove it.
> +SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG) {
> +  EVT VT = SVN->getValueType(0);
> +  bool IsBigEndian = DAG.getDataLayout().isBigEndian();
> +
> +  // TODO Add support for big-endian when we have a test case.
> +  if (!VT.isInteger() || IsBigEndian)
> +    return SDValue();
> +
> +  SDValue N0 = SVN->getOperand(0);
> +  while (N0.getOpcode() == ISD::BITCAST)
> +    N0 = N0.getOperand(0);
> +
> +  unsigned Opcode = N0.getOpcode();
> +  if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
> +      Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
> +      Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
> +    return SDValue();
> +
> +  SDValue N00 = N0.getOperand(0);
> +  ArrayRef<int> Mask = SVN->getMask();
> +  unsigned NumElts = VT.getVectorNumElements();
> +  unsigned EltSizeInBits = VT.getScalarSizeInBits();
> +  unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
> +
> +  // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
> +  // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
> +  // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
> +  auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
> +    for (unsigned i = 0; i != NumElts; ++i) {
> +      if (Mask[i] < 0)
> +        continue;
> +      if ((i * Scale) < NumElts && Mask[i] == (i * Scale))
> +        continue;
> +      return false;
> +    }
> +    return true;
> +  };
> +
> +  // At the moment we just handle the case where we've truncated back to the
> +  // same size as before the extension.
> +  // TODO: handle more extension/truncation cases as cases arise.
> +  if (EltSizeInBits != ExtSrcSizeInBits)
> +    return SDValue();
> +
> +  // Attempt to match a 'truncate_vector_inreg' shuffle, we just search for
> +  // power-of-2 truncations as they are the most likely.
> +  for (unsigned Scale = 2; Scale < NumElts; Scale *= 2)
> +    if (isTruncate(Scale))
> +      return DAG.getBitcast(VT, N00);
> +
> +  return SDValue();
> +}
> +
>  SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
>    EVT VT = N->getValueType(0);
>    unsigned NumElts = VT.getVectorNumElements();
> @@ -14298,6 +14415,14 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE
>    if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
>      return S;
>
> +  // Match shuffles that can be converted to any_vector_extend_in_reg.
> +  if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
> +    return V;
> +
> +  // Combine "truncate_vector_in_reg" style shuffles.
> +  if (SDValue V = combineTruncationShuffle(SVN, DAG))
> +    return V;
> +
>    if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
>        Level < AfterLegalizeVectorOps &&
>        (N1.isUndef() ||
>
> Modified: llvm/trunk/test/CodeGen/X86/2011-10-21-widen-cmp.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2011-10-21-widen-cmp.ll?rev=295451&r1=295450&r2=295451&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/2011-10-21-widen-cmp.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/2011-10-21-widen-cmp.ll Fri Feb 17 09:14:48 2017
> @@ -9,7 +9,8 @@ define void @cmp_2_floats(<2 x float> %a
>  ; CHECK:       # BB#0: # %entry
>  ; CHECK-NEXT:    movaps %xmm0, %xmm2
>  ; CHECK-NEXT:    cmpordps %xmm0, %xmm0
> -; CHECK-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],zero,xmm0[1]
> +; CHECK-NEXT:    pmovsxdq %xmm0, %xmm0
> +; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
>  ; CHECK-NEXT:    pslld $31, %xmm0
>  ; CHECK-NEXT:    blendvps %xmm0, %xmm2, %xmm1
>  ; CHECK-NEXT:    movlps %xmm1, (%rax)
>
> Modified: llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-cvt.ll?rev=295451&r1=295450&r2=295451&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx512-cvt.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx512-cvt.ll Fri Feb 17 09:14:48 2017
> @@ -1075,7 +1075,6 @@ define <2 x float> @sitofp_2i1_float(<2
>  ; NOVL:       ## BB#0:
>  ; NOVL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
>  ; NOVL-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
> -; NOVL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],zero,xmm0[1]
>  ; NOVL-NEXT:    vcvtdq2ps %xmm0, %xmm0
>  ; NOVL-NEXT:    retq
>  ;
>
> Modified: llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll?rev=295451&r1=295450&r2=295451&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll Fri Feb 17 09:14:48 2017
> @@ -1226,11 +1226,7 @@ define <2 x i64> @test46(<2 x float> %x,
>  ; KNL-LABEL: test46:
>  ; KNL:       ## BB#0:
>  ; KNL-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm0
> -; KNL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
> -; KNL-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
> -; KNL-NEXT:    vpsrad $31, %xmm0, %xmm1
> -; KNL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
> -; KNL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
> +; KNL-NEXT:    vpmovsxdq %xmm0, %xmm0
>  ; KNL-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
>  ; KNL-NEXT:    retq
>  ;
>
> Modified: llvm/trunk/test/CodeGen/X86/sse-minmax.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-minmax.ll?rev=295451&r1=295450&r2=295451&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/sse-minmax.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/sse-minmax.ll Fri Feb 17 09:14:48 2017
> @@ -1277,8 +1277,6 @@ define <2 x float> @test_maxps_illegal_v
>  ; STRICT-NEXT:    movaps %xmm0, %xmm2
>  ; STRICT-NEXT:    movaps %xmm1, %xmm0
>  ; STRICT-NEXT:    cmpleps %xmm2, %xmm0
> -; STRICT-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],zero,xmm0[1]
> -; STRICT-NEXT:    pslld $31, %xmm0
>  ; STRICT-NEXT:    blendvps %xmm0, %xmm2, %xmm1
>  ; STRICT-NEXT:    movaps %xmm1, %xmm0
>  ; STRICT-NEXT:    retq
> @@ -1297,8 +1295,6 @@ define <2 x float> @test_minps_illegal_v
>  ; STRICT:       # BB#0:
>  ; STRICT-NEXT:    movaps %xmm0, %xmm2
>  ; STRICT-NEXT:    cmpleps %xmm1, %xmm0
> -; STRICT-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],zero,xmm0[1]
> -; STRICT-NEXT:    pslld $31, %xmm0
>  ; STRICT-NEXT:    blendvps %xmm0, %xmm2, %xmm1
>  ; STRICT-NEXT:    movaps %xmm1, %xmm0
>  ; STRICT-NEXT:    retq
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
; ModuleID = 'bugpoint-reduced-simplified.bc'
source_filename = "bugpoint-output-f145f15.bc"
target triple = "x86_64-unknown-linux-gnu"

define void @autogen_SD9778() {
BB:
  %Shuff7 = shufflevector <16 x i64> zeroinitializer, <16 x i64> undef, <16 x i32> <i32 undef, i32 8, i32 undef, i32 undef, i32 undef, i32 16, i32 undef, i32 20, i32 22, i32 24, i32 26, i32 undef, i32 30, i32 0, i32 2, i32 4>
  br label %CF

CF:                                               ; preds = %CF, %BB
  %E56 = extractelement <16 x i64> %Shuff7, i32 15
  %I58 = insertelement <16 x i64> zeroinitializer, i64 %E56, i32 4
  %Sl90 = select i1 true, i1 undef, i1 undef
  br i1 %Sl90, label %CF, label %CF246

CF246:                                            ; preds = %CF246, %CF
  %E106 = extractelement <16 x i64> %I58, i32 0
  br label %CF246
}