[llvm] r294856 - [X86][SSE] Convert getTargetShuffleMaskIndices to use getTargetConstantBitsFromNode.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 12 13:54:20 PST 2017
Handing off to @spatel as it appears to be due to rL294863, not rL294856.
On 12/02/2017 19:35, Simon Pilgrim wrote:
>
> Thanks for the test case, looking at this now. Simon.
>
> On 12/02/2017 19:10, Andrew Adams wrote:
>> Hi Simon,
>>
>> A commit in between 294848 and 294862 has created a problem when
>> no-nans-fp-math is on. I would open a bug, but buganizer is down.
>> This commit looks at least related and in the right range. Here's a
>> repro:
>>
>> test.ll:
>>
>> % Computes b = select(a < 0, -1, 1) * b
>> define void @fn(<8 x float>* %a_ptr, <8 x float>* %b_ptr) {
>> %a = load <8 x float>, <8 x float>* %a_ptr
>> %b = load <8 x float>, <8 x float>* %b_ptr
>> %cmp = fcmp olt <8 x float> %a, zeroinitializer
>> %sel = select <8 x i1> %cmp, <8 x float> <float -1.000000e+00,
>> float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float
>> -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float
>> -1.000000e+00>, <8 x float> <float 1.000000e+00, float 1.000000e+00,
>> float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float
>> 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
>> %c = fmul <8 x float> %sel, %b
>> store <8 x float> %c, <8 x float>* %b_ptr
>> ret void
>> }
>>
>> $ llc test.ll -mcpu=haswell -enable-no-nans-fp-math -O3
>> .section__TEXT,__text,regular,pure_instructions
>> .macosx_version_min 10, 12
>> LLVM ERROR: Cannot select: t43: v8f32 = vselect t7, t35, t32
>> t7: v8f32,ch = load<LD32[%a_ptr]> t0, t2, undef:i64
>> t2: i64,ch = CopyFromReg t0, Register:i64 %vreg0
>> t1: i64 = Register %vreg0
>> t6: i64 = undef
>> t35: v8f32 = X86ISD::VBROADCAST t34
>> t34: f32,ch = load<LD4[ConstantPool]> t0, t37, undef:i64
>> t37: i64 = X86ISD::WrapperRIP TargetConstantPool:i64<float
>> -1.000000e+00> 0
>> t36: i64 = TargetConstantPool<float -1.000000e+00> 0
>> t6: i64 = undef
>> t32: v8f32 = X86ISD::VBROADCAST t31
>> t31: f32,ch = load<LD4[ConstantPool]> t0, t39, undef:i64
>> t39: i64 = X86ISD::WrapperRIP TargetConstantPool:i64<float
>> 1.000000e+00> 0
>> t38: i64 = TargetConstantPool<float 1.000000e+00> 0
>> t6: i64 = undef
>> In function: fn
>>
>> On Sat, Feb 11, 2017 at 11:27 AM, Simon Pilgrim via llvm-commits
>> <llvm-commits at lists.llvm.org <mailto:llvm-commits at lists.llvm.org>> wrote:
>>
>> Author: rksimon
>> Date: Sat Feb 11 11:27:21 2017
>> New Revision: 294856
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=294856&view=rev
>> <http://llvm.org/viewvc/llvm-project?rev=294856&view=rev>
>> Log:
>> [X86][SSE] Convert getTargetShuffleMaskIndices to use
>> getTargetConstantBitsFromNode.
>>
>> Removes duplicate constant extraction code in
>> getTargetShuffleMaskIndices.
>>
>> getTargetConstantBitsFromNode - adds support for
>> VZEXT_MOVL(SCALAR_TO_VECTOR) and fail if the caller doesn't
>> support undef bits.
>>
>> Modified:
>> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>>
>> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=294856&r1=294855&r2=294856&view=diff
>> <http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=294856&r1=294855&r2=294856&view=diff>
>> ==============================================================================
>> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
>> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Feb 11
>> 11:27:21 2017
>> @@ -5151,7 +5151,8 @@ static const Constant *getTargetConstant
>> // Extract raw constant bits from constant pools.
>> static bool getTargetConstantBitsFromNode(SDValue Op, unsigned
>> EltSizeInBits,
>> SmallBitVector &UndefElts,
>> - SmallVectorImpl<APInt> &EltBits) {
>> + SmallVectorImpl<APInt> &EltBits,
>> + bool AllowUndefs = true) {
>> assert(UndefElts.empty() && "Expected an empty UndefElts vector");
>> assert(EltBits.empty() && "Expected an empty EltBits vector");
>>
>> @@ -5171,6 +5172,10 @@ static bool getTargetConstantBitsFromNod
>>
>> // Split the undef/constant single bitset data into the target
>> elements.
>> auto SplitBitData = [&]() {
>> + // Don't split if we don't allow undef bits.
>> + if (UndefBits.getBoolValue() && !AllowUndefs)
>> + return false;
>> +
>> UndefElts = SmallBitVector(NumElts, false);
>> EltBits.resize(NumElts, APInt(EltSizeInBits, 0));
>>
>> @@ -5264,89 +5269,34 @@ static bool getTargetConstantBitsFromNod
>> }
>> }
>>
>> + // Extract a rematerialized scalar constant insertion.
>> + if (Op.getOpcode() == X86ISD::VZEXT_MOVL &&
>> + Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
>> + isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) {
>> + auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0));
>> + MaskBits = CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
>> + MaskBits = MaskBits.zext(SizeInBits);
>> + return SplitBitData();
>> + }
>> +
>> return false;
>> }
>>
>> -// TODO: Merge more of this with getTargetConstantBitsFromNode.
>> static bool getTargetShuffleMaskIndices(SDValue MaskNode,
>> unsigned MaskEltSizeInBits,
>> SmallVectorImpl<uint64_t> &RawMask) {
>> - MaskNode = peekThroughBitcasts(MaskNode);
>> -
>> - MVT VT = MaskNode.getSimpleValueType();
>> - assert(VT.isVector() && "Can't produce a non-vector with a
>> build_vector!");
>> - unsigned NumMaskElts = VT.getSizeInBits() / MaskEltSizeInBits;
>> -
>> - // Split an APInt element into MaskEltSizeInBits sized pieces and
>> - // insert into the shuffle mask.
>> - auto SplitElementToMask = [&](APInt Element) {
>> - // Note that this is x86 and so always little endian: the
>> low byte is
>> - // the first byte of the mask.
>> - int Split = VT.getScalarSizeInBits() / MaskEltSizeInBits;
>> - for (int i = 0; i < Split; ++i) {
>> - APInt RawElt = Element.getLoBits(MaskEltSizeInBits);
>> - Element = Element.lshr(MaskEltSizeInBits);
>> - RawMask.push_back(RawElt.getZExtValue());
>> - }
>> - };
>> -
>> - if (MaskNode.getOpcode() == X86ISD::VBROADCAST) {
>> - // TODO: Handle (MaskEltSizeInBits %
>> VT.getScalarSizeInBits()) == 0
>> - // TODO: Handle (VT.getScalarSizeInBits() %
>> MaskEltSizeInBits) == 0
>> - if (VT.getScalarSizeInBits() != MaskEltSizeInBits)
>> - return false;
>> - if (auto *CN =
>> dyn_cast<ConstantSDNode>(MaskNode.getOperand(0))) {
>> - const APInt &MaskElement = CN->getAPIntValue();
>> - for (unsigned i = 0, e = VT.getVectorNumElements(); i !=
>> e; ++i) {
>> - APInt RawElt = MaskElement.getLoBits(MaskEltSizeInBits);
>> - RawMask.push_back(RawElt.getZExtValue());
>> - }
>> - }
>> - return false;
>> - }
>> + SmallBitVector UndefElts;
>> + SmallVector<APInt, 64> EltBits;
>>
>> - if (MaskNode.getOpcode() == X86ISD::VZEXT_MOVL &&
>> - MaskNode.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR) {
>> - SDValue MaskOp = MaskNode.getOperand(0).getOperand(0);
>> - if (auto *CN = dyn_cast<ConstantSDNode>(MaskOp)) {
>> - if ((MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0) {
>> - RawMask.push_back(CN->getZExtValue());
>> - RawMask.append(NumMaskElts - 1, 0);
>> - return true;
>> - }
>> -
>> - if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) == 0) {
>> - unsigned ElementSplit = VT.getScalarSizeInBits() /
>> MaskEltSizeInBits;
>> - SplitElementToMask(CN->getAPIntValue());
>> - RawMask.append((VT.getVectorNumElements() - 1) *
>> ElementSplit, 0);
>> - return true;
>> - }
>> - }
>> - return false;
>> - }
>> -
>> - if (MaskNode.getOpcode() != ISD::BUILD_VECTOR)
>> + // Extract the raw target constant bits.
>> + // FIXME: We currently don't support UNDEF bits or mask entries.
>> + if (!getTargetConstantBitsFromNode(MaskNode,
>> MaskEltSizeInBits, UndefElts,
>> + EltBits, /* AllowUndefs */
>> false))
>> return false;
>>
>> - // We can always decode if the buildvector is all zero constants,
>> - // but can't use isBuildVectorAllZeros as it might contain UNDEFs.
>> - if (all_of(MaskNode->ops(), X86::isZeroNode)) {
>> - RawMask.append(NumMaskElts, 0);
>> - return true;
>> - }
>> -
>> - // TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits())
>> == 0
>> - if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) != 0)
>> - return false;
>> -
>> - for (SDValue Op : MaskNode->ops()) {
>> - if (auto *CN = dyn_cast<ConstantSDNode>(Op.getNode()))
>> - SplitElementToMask(CN->getAPIntValue());
>> - else if (auto *CFN = dyn_cast<ConstantFPSDNode>(Op.getNode()))
>> - SplitElementToMask(CFN->getValueAPF().bitcastToAPInt());
>> - else
>> - return false;
>> - }
>> + // Insert the extracted elements into the mask.
>> + for (APInt Elt : EltBits)
>> + RawMask.push_back(Elt.getZExtValue());
>>
>> return true;
>> }
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org <mailto:llvm-commits at lists.llvm.org>
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>> <http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits>
>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170212/f2bc6f8c/attachment.html>
More information about the llvm-commits
mailing list