[llvm] r294856 - [X86][SSE] Convert getTargetShuffleMaskIndices to use getTargetConstantBitsFromNode.
Andrew Adams via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 12 11:10:24 PST 2017
Hi Simon,
A commit in between 294848 and 294862 has created a problem when
no-nans-fp-math is on. I would open a bug, but buganizer is down. This
commit looks at least related and in the right range. Here's a repro:
test.ll:
% Computes b = select(a < 0, -1, 1) * b
define void @fn(<8 x float>* %a_ptr, <8 x float>* %b_ptr) {
%a = load <8 x float>, <8 x float>* %a_ptr
%b = load <8 x float>, <8 x float>* %b_ptr
%cmp = fcmp olt <8 x float> %a, zeroinitializer
%sel = select <8 x i1> %cmp, <8 x float> <float -1.000000e+00, float
-1.000000e+00, float -1.000000e+00, float -1.000000e+00, float
-1.000000e+00, float -1.000000e+00, float -1.000000e+00, float
-1.000000e+00>, <8 x float> <float 1.000000e+00, float 1.000000e+00, float
1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00,
float 1.000000e+00, float 1.000000e+00>
%c = fmul <8 x float> %sel, %b
store <8 x float> %c, <8 x float>* %b_ptr
ret void
}
$ llc test.ll -mcpu=haswell -enable-no-nans-fp-math -O3
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 12
LLVM ERROR: Cannot select: t43: v8f32 = vselect t7, t35, t32
t7: v8f32,ch = load<LD32[%a_ptr]> t0, t2, undef:i64
t2: i64,ch = CopyFromReg t0, Register:i64 %vreg0
t1: i64 = Register %vreg0
t6: i64 = undef
t35: v8f32 = X86ISD::VBROADCAST t34
t34: f32,ch = load<LD4[ConstantPool]> t0, t37, undef:i64
t37: i64 = X86ISD::WrapperRIP TargetConstantPool:i64<float
-1.000000e+00> 0
t36: i64 = TargetConstantPool<float -1.000000e+00> 0
t6: i64 = undef
t32: v8f32 = X86ISD::VBROADCAST t31
t31: f32,ch = load<LD4[ConstantPool]> t0, t39, undef:i64
t39: i64 = X86ISD::WrapperRIP TargetConstantPool:i64<float
1.000000e+00> 0
t38: i64 = TargetConstantPool<float 1.000000e+00> 0
t6: i64 = undef
In function: fn
On Sat, Feb 11, 2017 at 11:27 AM, Simon Pilgrim via llvm-commits <
llvm-commits at lists.llvm.org> wrote:
> Author: rksimon
> Date: Sat Feb 11 11:27:21 2017
> New Revision: 294856
>
> URL: http://llvm.org/viewvc/llvm-project?rev=294856&view=rev
> Log:
> [X86][SSE] Convert getTargetShuffleMaskIndices to use
> getTargetConstantBitsFromNode.
>
> Removes duplicate constant extraction code in getTargetShuffleMaskIndices.
>
> getTargetConstantBitsFromNode - adds support for
> VZEXT_MOVL(SCALAR_TO_VECTOR) and fail if the caller doesn't support undef
> bits.
>
> Modified:
> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/
> X86/X86ISelLowering.cpp?rev=294856&r1=294855&r2=294856&view=diff
> ============================================================
> ==================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Feb 11 11:27:21 2017
> @@ -5151,7 +5151,8 @@ static const Constant *getTargetConstant
> // Extract raw constant bits from constant pools.
> static bool getTargetConstantBitsFromNode(SDValue Op, unsigned
> EltSizeInBits,
> SmallBitVector &UndefElts,
> - SmallVectorImpl<APInt>
> &EltBits) {
> + SmallVectorImpl<APInt> &EltBits,
> + bool AllowUndefs = true) {
> assert(UndefElts.empty() && "Expected an empty UndefElts vector");
> assert(EltBits.empty() && "Expected an empty EltBits vector");
>
> @@ -5171,6 +5172,10 @@ static bool getTargetConstantBitsFromNod
>
> // Split the undef/constant single bitset data into the target elements.
> auto SplitBitData = [&]() {
> + // Don't split if we don't allow undef bits.
> + if (UndefBits.getBoolValue() && !AllowUndefs)
> + return false;
> +
> UndefElts = SmallBitVector(NumElts, false);
> EltBits.resize(NumElts, APInt(EltSizeInBits, 0));
>
> @@ -5264,89 +5269,34 @@ static bool getTargetConstantBitsFromNod
> }
> }
>
> + // Extract a rematerialized scalar constant insertion.
> + if (Op.getOpcode() == X86ISD::VZEXT_MOVL &&
> + Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
> + isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) {
> + auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0));
> + MaskBits = CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
> + MaskBits = MaskBits.zext(SizeInBits);
> + return SplitBitData();
> + }
> +
> return false;
> }
>
> -// TODO: Merge more of this with getTargetConstantBitsFromNode.
> static bool getTargetShuffleMaskIndices(SDValue MaskNode,
> unsigned MaskEltSizeInBits,
> SmallVectorImpl<uint64_t>
> &RawMask) {
> - MaskNode = peekThroughBitcasts(MaskNode);
> -
> - MVT VT = MaskNode.getSimpleValueType();
> - assert(VT.isVector() && "Can't produce a non-vector with a
> build_vector!");
> - unsigned NumMaskElts = VT.getSizeInBits() / MaskEltSizeInBits;
> -
> - // Split an APInt element into MaskEltSizeInBits sized pieces and
> - // insert into the shuffle mask.
> - auto SplitElementToMask = [&](APInt Element) {
> - // Note that this is x86 and so always little endian: the low byte is
> - // the first byte of the mask.
> - int Split = VT.getScalarSizeInBits() / MaskEltSizeInBits;
> - for (int i = 0; i < Split; ++i) {
> - APInt RawElt = Element.getLoBits(MaskEltSizeInBits);
> - Element = Element.lshr(MaskEltSizeInBits);
> - RawMask.push_back(RawElt.getZExtValue());
> - }
> - };
> -
> - if (MaskNode.getOpcode() == X86ISD::VBROADCAST) {
> - // TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0
> - // TODO: Handle (VT.getScalarSizeInBits() % MaskEltSizeInBits) == 0
> - if (VT.getScalarSizeInBits() != MaskEltSizeInBits)
> - return false;
> - if (auto *CN = dyn_cast<ConstantSDNode>(MaskNode.getOperand(0))) {
> - const APInt &MaskElement = CN->getAPIntValue();
> - for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
> - APInt RawElt = MaskElement.getLoBits(MaskEltSizeInBits);
> - RawMask.push_back(RawElt.getZExtValue());
> - }
> - }
> - return false;
> - }
> + SmallBitVector UndefElts;
> + SmallVector<APInt, 64> EltBits;
>
> - if (MaskNode.getOpcode() == X86ISD::VZEXT_MOVL &&
> - MaskNode.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR) {
> - SDValue MaskOp = MaskNode.getOperand(0).getOperand(0);
> - if (auto *CN = dyn_cast<ConstantSDNode>(MaskOp)) {
> - if ((MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0) {
> - RawMask.push_back(CN->getZExtValue());
> - RawMask.append(NumMaskElts - 1, 0);
> - return true;
> - }
> -
> - if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) == 0) {
> - unsigned ElementSplit = VT.getScalarSizeInBits() /
> MaskEltSizeInBits;
> - SplitElementToMask(CN->getAPIntValue());
> - RawMask.append((VT.getVectorNumElements() - 1) * ElementSplit,
> 0);
> - return true;
> - }
> - }
> - return false;
> - }
> -
> - if (MaskNode.getOpcode() != ISD::BUILD_VECTOR)
> + // Extract the raw target constant bits.
> + // FIXME: We currently don't support UNDEF bits or mask entries.
> + if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits,
> UndefElts,
> + EltBits, /* AllowUndefs */ false))
> return false;
>
> - // We can always decode if the buildvector is all zero constants,
> - // but can't use isBuildVectorAllZeros as it might contain UNDEFs.
> - if (all_of(MaskNode->ops(), X86::isZeroNode)) {
> - RawMask.append(NumMaskElts, 0);
> - return true;
> - }
> -
> - // TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0
> - if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) != 0)
> - return false;
> -
> - for (SDValue Op : MaskNode->ops()) {
> - if (auto *CN = dyn_cast<ConstantSDNode>(Op.getNode()))
> - SplitElementToMask(CN->getAPIntValue());
> - else if (auto *CFN = dyn_cast<ConstantFPSDNode>(Op.getNode()))
> - SplitElementToMask(CFN->getValueAPF().bitcastToAPInt());
> - else
> - return false;
> - }
> + // Insert the extracted elements into the mask.
> + for (APInt Elt : EltBits)
> + RawMask.push_back(Elt.getZExtValue());
>
> return true;
> }
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170212/e401ecb3/attachment.html>
More information about the llvm-commits
mailing list