[llvm] r294856 - [X86][SSE] Convert getTargetShuffleMaskIndices to use getTargetConstantBitsFromNode.

Sun Feb 12 11:35:49 PST 2017

Thanks for the test case, looking at this now. Simon.

On 12/02/2017 19:10, Andrew Adams wrote:
> Hi Simon,
>
> A commit in between 294848 and 294862 has created a problem when 
> no-nans-fp-math is on. I would open a bug, but buganizer is down. This 
> commit looks at least related and in the right range. Here's a repro:
>
> test.ll:
>
> % Computes b = select(a < 0, -1, 1) * b
> define void @fn(<8 x float>* %a_ptr, <8 x float>* %b_ptr) {
>        %a = load <8 x float>, <8 x float>* %a_ptr
>        %b = load <8 x float>, <8 x float>* %b_ptr
>        %cmp = fcmp olt <8 x float> %a, zeroinitializer
>        %sel = select <8 x i1> %cmp, <8 x float> <float -1.000000e+00, 
> float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 
> -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 
> -1.000000e+00>, <8 x float> <float 1.000000e+00, float 1.000000e+00, 
> float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 
> 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
>        %c = fmul <8 x float> %sel, %b
>        store <8 x float> %c, <8 x float>* %b_ptr
>        ret void
> }
>
> $ llc test.ll -mcpu=haswell -enable-no-nans-fp-math -O3
> .section__TEXT,__text,regular,pure_instructions
> .macosx_version_min 10, 12
> LLVM ERROR: Cannot select: t43: v8f32 = vselect t7, t35, t32
>   t7: v8f32,ch = load<LD32[%a_ptr]> t0, t2, undef:i64
>     t2: i64,ch = CopyFromReg t0, Register:i64 %vreg0
>       t1: i64 = Register %vreg0
>     t6: i64 = undef
>   t35: v8f32 = X86ISD::VBROADCAST t34
>     t34: f32,ch = load<LD4[ConstantPool]> t0, t37, undef:i64
>       t37: i64 = X86ISD::WrapperRIP TargetConstantPool:i64<float 
> -1.000000e+00> 0
>         t36: i64 = TargetConstantPool<float -1.000000e+00> 0
>       t6: i64 = undef
>   t32: v8f32 = X86ISD::VBROADCAST t31
>     t31: f32,ch = load<LD4[ConstantPool]> t0, t39, undef:i64
>       t39: i64 = X86ISD::WrapperRIP TargetConstantPool:i64<float 
> 1.000000e+00> 0
>         t38: i64 = TargetConstantPool<float 1.000000e+00> 0
>       t6: i64 = undef
> In function: fn
>
> On Sat, Feb 11, 2017 at 11:27 AM, Simon Pilgrim via llvm-commits 
> <llvm-commits at lists.llvm.org <mailto:llvm-commits at lists.llvm.org>> wrote:
>
>     Author: rksimon
>     Date: Sat Feb 11 11:27:21 2017
>     New Revision: 294856
>
>     URL: http://llvm.org/viewvc/llvm-project?rev=294856&view=rev
>     <http://llvm.org/viewvc/llvm-project?rev=294856&view=rev>
>     Log:
>     [X86][SSE] Convert getTargetShuffleMaskIndices to use
>     getTargetConstantBitsFromNode.
>
>     Removes duplicate constant extraction code in
>     getTargetShuffleMaskIndices.
>
>     getTargetConstantBitsFromNode - adds support for
>     VZEXT_MOVL(SCALAR_TO_VECTOR) and fail if the caller doesn't
>     support undef bits.
>
>     Modified:
>         llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>
>     Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>     URL:
>     http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=294856&r1=294855&r2=294856&view=diff
>     <http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=294856&r1=294855&r2=294856&view=diff>
>     ==============================================================================
>     --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
>     +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Feb 11
>     11:27:21 2017
>     @@ -5151,7 +5151,8 @@ static const Constant *getTargetConstant
>      // Extract raw constant bits from constant pools.
>      static bool getTargetConstantBitsFromNode(SDValue Op, unsigned
>     EltSizeInBits,
>                                                SmallBitVector &UndefElts,
>     - SmallVectorImpl<APInt> &EltBits) {
>     + SmallVectorImpl<APInt> &EltBits,
>     +                                          bool AllowUndefs = true) {
>        assert(UndefElts.empty() && "Expected an empty UndefElts vector");
>        assert(EltBits.empty() && "Expected an empty EltBits vector");
>
>     @@ -5171,6 +5172,10 @@ static bool getTargetConstantBitsFromNod
>
>        // Split the undef/constant single bitset data into the target
>     elements.
>        auto SplitBitData = [&]() {
>     +    // Don't split if we don't allow undef bits.
>     +    if (UndefBits.getBoolValue() && !AllowUndefs)
>     +      return false;
>     +
>          UndefElts = SmallBitVector(NumElts, false);
>          EltBits.resize(NumElts, APInt(EltSizeInBits, 0));
>
>     @@ -5264,89 +5269,34 @@ static bool getTargetConstantBitsFromNod
>          }
>        }
>
>     +  // Extract a rematerialized scalar constant insertion.
>     +  if (Op.getOpcode() == X86ISD::VZEXT_MOVL &&
>     +      Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
>     +      isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) {
>     +    auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0));
>     +    MaskBits = CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
>     +    MaskBits = MaskBits.zext(SizeInBits);
>     +    return SplitBitData();
>     +  }
>     +
>        return false;
>      }
>
>     -// TODO: Merge more of this with getTargetConstantBitsFromNode.
>      static bool getTargetShuffleMaskIndices(SDValue MaskNode,
>                                              unsigned MaskEltSizeInBits,
>      SmallVectorImpl<uint64_t> &RawMask) {
>     -  MaskNode = peekThroughBitcasts(MaskNode);
>     -
>     -  MVT VT = MaskNode.getSimpleValueType();
>     -  assert(VT.isVector() && "Can't produce a non-vector with a
>     build_vector!");
>     -  unsigned NumMaskElts = VT.getSizeInBits() / MaskEltSizeInBits;
>     -
>     -  // Split an APInt element into MaskEltSizeInBits sized pieces and
>     -  // insert into the shuffle mask.
>     -  auto SplitElementToMask = [&](APInt Element) {
>     -    // Note that this is x86 and so always little endian: the low
>     byte is
>     -    // the first byte of the mask.
>     -    int Split = VT.getScalarSizeInBits() / MaskEltSizeInBits;
>     -    for (int i = 0; i < Split; ++i) {
>     -      APInt RawElt = Element.getLoBits(MaskEltSizeInBits);
>     -      Element = Element.lshr(MaskEltSizeInBits);
>     -      RawMask.push_back(RawElt.getZExtValue());
>     -    }
>     -  };
>     -
>     -  if (MaskNode.getOpcode() == X86ISD::VBROADCAST) {
>     -    // TODO: Handle (MaskEltSizeInBits %
>     VT.getScalarSizeInBits()) == 0
>     -    // TODO: Handle (VT.getScalarSizeInBits() %
>     MaskEltSizeInBits) == 0
>     -    if (VT.getScalarSizeInBits() != MaskEltSizeInBits)
>     -      return false;
>     -    if (auto *CN =
>     dyn_cast<ConstantSDNode>(MaskNode.getOperand(0))) {
>     -      const APInt &MaskElement = CN->getAPIntValue();
>     -      for (unsigned i = 0, e = VT.getVectorNumElements(); i != e;
>     ++i) {
>     -        APInt RawElt = MaskElement.getLoBits(MaskEltSizeInBits);
>     -        RawMask.push_back(RawElt.getZExtValue());
>     -      }
>     -    }
>     -    return false;
>     -  }
>     +  SmallBitVector UndefElts;
>     +  SmallVector<APInt, 64> EltBits;
>
>     -  if (MaskNode.getOpcode() == X86ISD::VZEXT_MOVL &&
>     -      MaskNode.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR) {
>     -    SDValue MaskOp = MaskNode.getOperand(0).getOperand(0);
>     -    if (auto *CN = dyn_cast<ConstantSDNode>(MaskOp)) {
>     -      if ((MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0) {
>     -        RawMask.push_back(CN->getZExtValue());
>     -        RawMask.append(NumMaskElts - 1, 0);
>     -        return true;
>     -      }
>     -
>     -      if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) == 0) {
>     -        unsigned ElementSplit = VT.getScalarSizeInBits() /
>     MaskEltSizeInBits;
>     -        SplitElementToMask(CN->getAPIntValue());
>     -        RawMask.append((VT.getVectorNumElements() - 1) *
>     ElementSplit, 0);
>     -        return true;
>     -      }
>     -    }
>     -    return false;
>     -  }
>     -
>     -  if (MaskNode.getOpcode() != ISD::BUILD_VECTOR)
>     +  // Extract the raw target constant bits.
>     +  // FIXME: We currently don't support UNDEF bits or mask entries.
>     +  if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits,
>     UndefElts,
>     +                                     EltBits, /* AllowUndefs */
>     false))
>          return false;
>
>     -  // We can always decode if the buildvector is all zero constants,
>     -  // but can't use isBuildVectorAllZeros as it might contain UNDEFs.
>     -  if (all_of(MaskNode->ops(), X86::isZeroNode)) {
>     -    RawMask.append(NumMaskElts, 0);
>     -    return true;
>     -  }
>     -
>     -  // TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0
>     -  if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) != 0)
>     -    return false;
>     -
>     -  for (SDValue Op : MaskNode->ops()) {
>     -    if (auto *CN = dyn_cast<ConstantSDNode>(Op.getNode()))
>     -      SplitElementToMask(CN->getAPIntValue());
>     -    else if (auto *CFN = dyn_cast<ConstantFPSDNode>(Op.getNode()))
>     -      SplitElementToMask(CFN->getValueAPF().bitcastToAPInt());
>     -    else
>     -      return false;
>     -  }
>     +  // Insert the extracted elements into the mask.
>     +  for (APInt Elt : EltBits)
>     +    RawMask.push_back(Elt.getZExtValue());
>
>        return true;
>      }
>
>
>     _______________________________________________
>     llvm-commits mailing list
>     llvm-commits at lists.llvm.org <mailto:llvm-commits at lists.llvm.org>
>     http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>     <http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits>
>
>

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170212/3d5b5d75/attachment-0001.html>