<div dir="ltr">Hi Simon, <div><br></div><div><div>A commit in between 294848 and 294862 has created a problem when no-nans-fp-math is on. I would open a bug, but buganizer is down. This commit looks at least related and in the right range. Here's a repro:</div><div><br></div><div>test.ll:</div><div><br></div><div>% Computes b = select(a < 0, -1, 1) * b</div><div>define void @fn(<8 x float>* %a_ptr, <8 x float>* %b_ptr) {</div><div> %a = load <8 x float>, <8 x float>* %a_ptr</div><div> %b = load <8 x float>, <8 x float>* %b_ptr</div><div> %cmp = fcmp olt <8 x float> %a, zeroinitializer</div><div> %sel = select <8 x i1> %cmp, <8 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00></div><div> %c = fmul <8 x float> %sel, %b</div><div> store <8 x float> %c, <8 x float>* %b_ptr</div><div> ret void</div><div>}</div><div><br></div><div>$ llc test.ll -mcpu=haswell -enable-no-nans-fp-math -O3</div><div><span class="gmail-Apple-tab-span" style="white-space:pre"> </span>.section<span class="gmail-Apple-tab-span" style="white-space:pre"> </span>__TEXT,__text,regular,pure_instructions</div><div><span class="gmail-Apple-tab-span" style="white-space:pre"> </span>.macosx_version_min 10, 12</div><div>LLVM ERROR: Cannot select: t43: v8f32 = vselect t7, t35, t32</div><div> t7: v8f32,ch = load<LD32[%a_ptr]> t0, t2, undef:i64</div><div> t2: i64,ch = CopyFromReg t0, Register:i64 %vreg0</div><div> t1: i64 = Register %vreg0</div><div> t6: i64 = undef</div><div> t35: v8f32 = X86ISD::VBROADCAST t34</div><div> t34: f32,ch = load<LD4[ConstantPool]> t0, t37, undef:i64</div><div> t37: i64 = X86ISD::WrapperRIP TargetConstantPool:i64<float -1.000000e+00> 0</div><div> t36: i64 = TargetConstantPool<float -1.000000e+00> 0</div><div> t6: i64 = undef</div><div> t32: v8f32 = X86ISD::VBROADCAST t31</div><div> t31: f32,ch = load<LD4[ConstantPool]> t0, t39, undef:i64</div><div> t39: i64 = X86ISD::WrapperRIP TargetConstantPool:i64<float 1.000000e+00> 0</div><div> t38: i64 = TargetConstantPool<float 1.000000e+00> 0</div><div> t6: i64 = undef</div><div>In function: fn</div></div></div><div class="gmail_extra"><br><div class="gmail_quote">On Sat, Feb 11, 2017 at 11:27 AM, Simon Pilgrim via llvm-commits <span dir="ltr"><<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: rksimon<br>
Date: Sat Feb 11 11:27:21 2017<br>
New Revision: 294856<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=294856&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project?rev=294856&view=rev</a><br>
Log:<br>
[X86][SSE] Convert getTargetShuffleMaskIndices to use getTargetConstantBitsFromNode.<br>
<br>
Removes duplicate constant extraction code in getTargetShuffleMaskIndices.<br>
<br>
getTargetConstantBitsFromNode - adds support for VZEXT_MOVL(SCALAR_TO_VECTOR) and fail if the caller doesn't support undef bits.<br>
<br>
Modified:<br>
llvm/trunk/lib/Target/X86/<wbr>X86ISelLowering.cpp<br>
<br>
Modified: llvm/trunk/lib/Target/X86/<wbr>X86ISelLowering.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=294856&r1=294855&r2=294856&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/lib/Target/<wbr>X86/X86ISelLowering.cpp?rev=<wbr>294856&r1=294855&r2=294856&<wbr>view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/lib/Target/X86/<wbr>X86ISelLowering.cpp (original)<br>
+++ llvm/trunk/lib/Target/X86/<wbr>X86ISelLowering.cpp Sat Feb 11 11:27:21 2017<br>
@@ -5151,7 +5151,8 @@ static const Constant *getTargetConstant<br>
// Extract raw constant bits from constant pools.<br>
static bool getTargetConstantBitsFromNode(<wbr>SDValue Op, unsigned EltSizeInBits,<br>
SmallBitVector &UndefElts,<br>
- SmallVectorImpl<APInt> &EltBits) {<br>
+ SmallVectorImpl<APInt> &EltBits,<br>
+ bool AllowUndefs = true) {<br>
assert(UndefElts.empty() && "Expected an empty UndefElts vector");<br>
assert(EltBits.empty() && "Expected an empty EltBits vector");<br>
<br>
@@ -5171,6 +5172,10 @@ static bool getTargetConstantBitsFromNod<br>
<br>
// Split the undef/constant single bitset data into the target elements.<br>
auto SplitBitData = [&]() {<br>
+ // Don't split if we don't allow undef bits.<br>
+ if (UndefBits.getBoolValue() && !AllowUndefs)<br>
+ return false;<br>
+<br>
UndefElts = SmallBitVector(NumElts, false);<br>
EltBits.resize(NumElts, APInt(EltSizeInBits, 0));<br>
<br>
@@ -5264,89 +5269,34 @@ static bool getTargetConstantBitsFromNod<br>
}<br>
}<br>
<br>
+ // Extract a rematerialized scalar constant insertion.<br>
+ if (Op.getOpcode() == X86ISD::VZEXT_MOVL &&<br>
+ Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&<br>
+ isa<ConstantSDNode>(Op.<wbr>getOperand(0).getOperand(0))) {<br>
+ auto *CN = cast<ConstantSDNode>(Op.<wbr>getOperand(0).getOperand(0));<br>
+ MaskBits = CN->getAPIntValue().<wbr>zextOrTrunc(SrcEltSizeInBits);<br>
+ MaskBits = MaskBits.zext(SizeInBits);<br>
+ return SplitBitData();<br>
+ }<br>
+<br>
return false;<br>
}<br>
<br>
-// TODO: Merge more of this with getTargetConstantBitsFromNode.<br>
static bool getTargetShuffleMaskIndices(<wbr>SDValue MaskNode,<br>
unsigned MaskEltSizeInBits,<br>
SmallVectorImpl<uint64_t> &RawMask) {<br>
- MaskNode = peekThroughBitcasts(MaskNode);<br>
-<br>
- MVT VT = MaskNode.getSimpleValueType();<br>
- assert(VT.isVector() && "Can't produce a non-vector with a build_vector!");<br>
- unsigned NumMaskElts = VT.getSizeInBits() / MaskEltSizeInBits;<br>
-<br>
- // Split an APInt element into MaskEltSizeInBits sized pieces and<br>
- // insert into the shuffle mask.<br>
- auto SplitElementToMask = [&](APInt Element) {<br>
- // Note that this is x86 and so always little endian: the low byte is<br>
- // the first byte of the mask.<br>
- int Split = VT.getScalarSizeInBits() / MaskEltSizeInBits;<br>
- for (int i = 0; i < Split; ++i) {<br>
- APInt RawElt = Element.getLoBits(<wbr>MaskEltSizeInBits);<br>
- Element = Element.lshr(<wbr>MaskEltSizeInBits);<br>
- RawMask.push_back(RawElt.<wbr>getZExtValue());<br>
- }<br>
- };<br>
-<br>
- if (MaskNode.getOpcode() == X86ISD::VBROADCAST) {<br>
- // TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0<br>
- // TODO: Handle (VT.getScalarSizeInBits() % MaskEltSizeInBits) == 0<br>
- if (VT.getScalarSizeInBits() != MaskEltSizeInBits)<br>
- return false;<br>
- if (auto *CN = dyn_cast<ConstantSDNode>(<wbr>MaskNode.getOperand(0))) {<br>
- const APInt &MaskElement = CN->getAPIntValue();<br>
- for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {<br>
- APInt RawElt = MaskElement.getLoBits(<wbr>MaskEltSizeInBits);<br>
- RawMask.push_back(RawElt.<wbr>getZExtValue());<br>
- }<br>
- }<br>
- return false;<br>
- }<br>
+ SmallBitVector UndefElts;<br>
+ SmallVector<APInt, 64> EltBits;<br>
<br>
- if (MaskNode.getOpcode() == X86ISD::VZEXT_MOVL &&<br>
- MaskNode.getOperand(0).<wbr>getOpcode() == ISD::SCALAR_TO_VECTOR) {<br>
- SDValue MaskOp = MaskNode.getOperand(0).<wbr>getOperand(0);<br>
- if (auto *CN = dyn_cast<ConstantSDNode>(<wbr>MaskOp)) {<br>
- if ((MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0) {<br>
- RawMask.push_back(CN-><wbr>getZExtValue());<br>
- RawMask.append(NumMaskElts - 1, 0);<br>
- return true;<br>
- }<br>
-<br>
- if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) == 0) {<br>
- unsigned ElementSplit = VT.getScalarSizeInBits() / MaskEltSizeInBits;<br>
- SplitElementToMask(CN-><wbr>getAPIntValue());<br>
- RawMask.append((VT.<wbr>getVectorNumElements() - 1) * ElementSplit, 0);<br>
- return true;<br>
- }<br>
- }<br>
- return false;<br>
- }<br>
-<br>
- if (MaskNode.getOpcode() != ISD::BUILD_VECTOR)<br>
+ // Extract the raw target constant bits.<br>
+ // FIXME: We currently don't support UNDEF bits or mask entries.<br>
+ if (!<wbr>getTargetConstantBitsFromNode(<wbr>MaskNode, MaskEltSizeInBits, UndefElts,<br>
+ EltBits, /* AllowUndefs */ false))<br>
return false;<br>
<br>
- // We can always decode if the buildvector is all zero constants,<br>
- // but can't use isBuildVectorAllZeros as it might contain UNDEFs.<br>
- if (all_of(MaskNode->ops(), X86::isZeroNode)) {<br>
- RawMask.append(NumMaskElts, 0);<br>
- return true;<br>
- }<br>
-<br>
- // TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0<br>
- if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) != 0)<br>
- return false;<br>
-<br>
- for (SDValue Op : MaskNode->ops()) {<br>
- if (auto *CN = dyn_cast<ConstantSDNode>(Op.<wbr>getNode()))<br>
- SplitElementToMask(CN-><wbr>getAPIntValue());<br>
- else if (auto *CFN = dyn_cast<ConstantFPSDNode>(Op.<wbr>getNode()))<br>
- SplitElementToMask(CFN-><wbr>getValueAPF().bitcastToAPInt()<wbr>);<br>
- else<br>
- return false;<br>
- }<br>
+ // Insert the extracted elements into the mask.<br>
+ for (APInt Elt : EltBits)<br>
+ RawMask.push_back(Elt.<wbr>getZExtValue());<br>
<br>
return true;<br>
}<br>
<br>
<br>
______________________________<wbr>_________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/<wbr>mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br></div>