[llvm] r294856 - [X86][SSE] Convert getTargetShuffleMaskIndices to use getTargetConstantBitsFromNode.

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Sun Feb 12 15:28:18 PST 2017


Fix submitted as:
https://reviews.llvm.org/rL294924

Please let me know if you see any other problems. Thanks and sorry about
the bug!

On Sun, Feb 12, 2017 at 3:29 PM, Sanjay Patel <spatel at rotateright.com>
wrote:

> Yes - the SimplifyDemandedBits enhancement:
> https://reviews.llvm.org/rL294863
>
> ...exposes a hole in the x86 lowering. We didn't expect that an FP vector
> might be the replacement for the condition operand in a VSELECT. That's why
> we get the "cannot select" error.
>
> We should be able to handle that case with a bitcast in the x86 code...
>
> But there is also a bug in the SimplifyDemandedBits logic. Unless we don't
> care about signed-zero, we need to check that the operand is an integer
> because "SETLT" can be used with FP operands including signed-zero.
>
>
>
>
> On Sun, Feb 12, 2017 at 2:54 PM, Simon Pilgrim <llvm-dev at redking.me.uk>
> wrote:
>
>> Handing off to @spatel as it appears to be due to rL294863, not rL294856.
>> On 12/02/2017 19:35, Simon Pilgrim wrote:
>>
>> Thanks for the test case, looking at this now. Simon.
>> On 12/02/2017 19:10, Andrew Adams wrote:
>>
>> Hi Simon,
>>
>> A commit in between 294848 and 294862 has created a problem when
>> no-nans-fp-math is on. I would open a bug, but buganizer is down. This
>> commit looks at least related and in the right range. Here's a repro:
>>
>> test.ll:
>>
>> % Computes b = select(a < 0, -1, 1) * b
>> define void @fn(<8 x float>* %a_ptr, <8 x float>* %b_ptr) {
>>        %a = load <8 x float>, <8 x float>* %a_ptr
>>        %b = load <8 x float>, <8 x float>* %b_ptr
>>        %cmp = fcmp olt <8 x float> %a, zeroinitializer
>>        %sel = select <8 x i1> %cmp, <8 x float> <float -1.000000e+00,
>> float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float
>> -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float
>> -1.000000e+00>, <8 x float> <float 1.000000e+00, float 1.000000e+00, float
>> 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00,
>> float 1.000000e+00, float 1.000000e+00>
>>        %c = fmul <8 x float> %sel, %b
>>        store <8 x float> %c, <8 x float>* %b_ptr
>>        ret void
>> }
>>
>> $ llc test.ll -mcpu=haswell -enable-no-nans-fp-math -O3
>> .section __TEXT,__text,regular,pure_instructions
>> .macosx_version_min 10, 12
>> LLVM ERROR: Cannot select: t43: v8f32 = vselect t7, t35, t32
>>   t7: v8f32,ch = load<LD32[%a_ptr]> t0, t2, undef:i64
>>     t2: i64,ch = CopyFromReg t0, Register:i64 %vreg0
>>       t1: i64 = Register %vreg0
>>     t6: i64 = undef
>>   t35: v8f32 = X86ISD::VBROADCAST t34
>>     t34: f32,ch = load<LD4[ConstantPool]> t0, t37, undef:i64
>>       t37: i64 = X86ISD::WrapperRIP TargetConstantPool:i64<float
>> -1.000000e+00> 0
>>         t36: i64 = TargetConstantPool<float -1.000000e+00> 0
>>       t6: i64 = undef
>>   t32: v8f32 = X86ISD::VBROADCAST t31
>>     t31: f32,ch = load<LD4[ConstantPool]> t0, t39, undef:i64
>>       t39: i64 = X86ISD::WrapperRIP TargetConstantPool:i64<float
>> 1.000000e+00> 0
>>         t38: i64 = TargetConstantPool<float 1.000000e+00> 0
>>       t6: i64 = undef
>> In function: fn
>>
>> On Sat, Feb 11, 2017 at 11:27 AM, Simon Pilgrim via llvm-commits <
>> llvm-commits at lists.llvm.org> wrote:
>>
>>> Author: rksimon
>>> Date: Sat Feb 11 11:27:21 2017
>>> New Revision: 294856
>>>
>>> URL: http://llvm.org/viewvc/llvm-project?rev=294856&view=rev
>>> Log:
>>> [X86][SSE] Convert getTargetShuffleMaskIndices to use
>>> getTargetConstantBitsFromNode.
>>>
>>> Removes duplicate constant extraction code in
>>> getTargetShuffleMaskIndices.
>>>
>>> getTargetConstantBitsFromNode - adds support for
>>> VZEXT_MOVL(SCALAR_TO_VECTOR) and fail if the caller doesn't support undef
>>> bits.
>>>
>>> Modified:
>>>     llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>>>
>>> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X8
>>> 6/X86ISelLowering.cpp?rev=294856&r1=294855&r2=294856&view=diff
>>> ============================================================
>>> ==================
>>> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
>>> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Feb 11 11:27:21
>>> 2017
>>> @@ -5151,7 +5151,8 @@ static const Constant *getTargetConstant
>>>  // Extract raw constant bits from constant pools.
>>>  static bool getTargetConstantBitsFromNode(SDValue Op, unsigned
>>> EltSizeInBits,
>>>                                            SmallBitVector &UndefElts,
>>> -                                          SmallVectorImpl<APInt>
>>> &EltBits) {
>>> +                                          SmallVectorImpl<APInt>
>>> &EltBits,
>>> +                                          bool AllowUndefs = true) {
>>>    assert(UndefElts.empty() && "Expected an empty UndefElts vector");
>>>    assert(EltBits.empty() && "Expected an empty EltBits vector");
>>>
>>> @@ -5171,6 +5172,10 @@ static bool getTargetConstantBitsFromNod
>>>
>>>    // Split the undef/constant single bitset data into the target
>>> elements.
>>>    auto SplitBitData = [&]() {
>>> +    // Don't split if we don't allow undef bits.
>>> +    if (UndefBits.getBoolValue() && !AllowUndefs)
>>> +      return false;
>>> +
>>>      UndefElts = SmallBitVector(NumElts, false);
>>>      EltBits.resize(NumElts, APInt(EltSizeInBits, 0));
>>>
>>> @@ -5264,89 +5269,34 @@ static bool getTargetConstantBitsFromNod
>>>      }
>>>    }
>>>
>>> +  // Extract a rematerialized scalar constant insertion.
>>> +  if (Op.getOpcode() == X86ISD::VZEXT_MOVL &&
>>> +      Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
>>> +      isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) {
>>> +    auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0));
>>> +    MaskBits = CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
>>> +    MaskBits = MaskBits.zext(SizeInBits);
>>> +    return SplitBitData();
>>> +  }
>>> +
>>>    return false;
>>>  }
>>>
>>> -// TODO: Merge more of this with getTargetConstantBitsFromNode.
>>>  static bool getTargetShuffleMaskIndices(SDValue MaskNode,
>>>                                          unsigned MaskEltSizeInBits,
>>>                                          SmallVectorImpl<uint64_t>
>>> &RawMask) {
>>> -  MaskNode = peekThroughBitcasts(MaskNode);
>>> -
>>> -  MVT VT = MaskNode.getSimpleValueType();
>>> -  assert(VT.isVector() && "Can't produce a non-vector with a
>>> build_vector!");
>>> -  unsigned NumMaskElts = VT.getSizeInBits() / MaskEltSizeInBits;
>>> -
>>> -  // Split an APInt element into MaskEltSizeInBits sized pieces and
>>> -  // insert into the shuffle mask.
>>> -  auto SplitElementToMask = [&](APInt Element) {
>>> -    // Note that this is x86 and so always little endian: the low byte
>>> is
>>> -    // the first byte of the mask.
>>> -    int Split = VT.getScalarSizeInBits() / MaskEltSizeInBits;
>>> -    for (int i = 0; i < Split; ++i) {
>>> -      APInt RawElt = Element.getLoBits(MaskEltSizeInBits);
>>> -      Element = Element.lshr(MaskEltSizeInBits);
>>> -      RawMask.push_back(RawElt.getZExtValue());
>>> -    }
>>> -  };
>>> -
>>> -  if (MaskNode.getOpcode() == X86ISD::VBROADCAST) {
>>> -    // TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0
>>> -    // TODO: Handle (VT.getScalarSizeInBits() % MaskEltSizeInBits) == 0
>>> -    if (VT.getScalarSizeInBits() != MaskEltSizeInBits)
>>> -      return false;
>>> -    if (auto *CN = dyn_cast<ConstantSDNode>(MaskNode.getOperand(0))) {
>>> -      const APInt &MaskElement = CN->getAPIntValue();
>>> -      for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
>>> -        APInt RawElt = MaskElement.getLoBits(MaskEltSizeInBits);
>>> -        RawMask.push_back(RawElt.getZExtValue());
>>> -      }
>>> -    }
>>> -    return false;
>>> -  }
>>> +  SmallBitVector UndefElts;
>>> +  SmallVector<APInt, 64> EltBits;
>>>
>>> -  if (MaskNode.getOpcode() == X86ISD::VZEXT_MOVL &&
>>> -      MaskNode.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR) {
>>> -    SDValue MaskOp = MaskNode.getOperand(0).getOperand(0);
>>> -    if (auto *CN = dyn_cast<ConstantSDNode>(MaskOp)) {
>>> -      if ((MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0) {
>>> -        RawMask.push_back(CN->getZExtValue());
>>> -        RawMask.append(NumMaskElts - 1, 0);
>>> -        return true;
>>> -      }
>>> -
>>> -      if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) == 0) {
>>> -        unsigned ElementSplit = VT.getScalarSizeInBits() /
>>> MaskEltSizeInBits;
>>> -        SplitElementToMask(CN->getAPIntValue());
>>> -        RawMask.append((VT.getVectorNumElements() - 1) * ElementSplit,
>>> 0);
>>> -        return true;
>>> -      }
>>> -    }
>>> -    return false;
>>> -  }
>>> -
>>> -  if (MaskNode.getOpcode() != ISD::BUILD_VECTOR)
>>> +  // Extract the raw target constant bits.
>>> +  // FIXME: We currently don't support UNDEF bits or mask entries.
>>> +  if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits,
>>> UndefElts,
>>> +                                     EltBits, /* AllowUndefs */ false))
>>>      return false;
>>>
>>> -  // We can always decode if the buildvector is all zero constants,
>>> -  // but can't use isBuildVectorAllZeros as it might contain UNDEFs.
>>> -  if (all_of(MaskNode->ops(), X86::isZeroNode)) {
>>> -    RawMask.append(NumMaskElts, 0);
>>> -    return true;
>>> -  }
>>> -
>>> -  // TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0
>>> -  if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) != 0)
>>> -    return false;
>>> -
>>> -  for (SDValue Op : MaskNode->ops()) {
>>> -    if (auto *CN = dyn_cast<ConstantSDNode>(Op.getNode()))
>>> -      SplitElementToMask(CN->getAPIntValue());
>>> -    else if (auto *CFN = dyn_cast<ConstantFPSDNode>(Op.getNode()))
>>> -      SplitElementToMask(CFN->getValueAPF().bitcastToAPInt());
>>> -    else
>>> -      return false;
>>> -  }
>>> +  // Insert the extracted elements into the mask.
>>> +  for (APInt Elt : EltBits)
>>> +    RawMask.push_back(Elt.getZExtValue());
>>>
>>>    return true;
>>>  }
>>>
>>>
>>> _______________________________________________
>>> llvm-commits mailing list
>>> llvm-commits at lists.llvm.org
>>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>>
>>
>>
>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170212/c6bd4fa1/attachment.html>


More information about the llvm-commits mailing list