[llvm-commits] [llvm] r148108 - in /llvm/trunk/lib/Target/X86: X86ISelLowering.cpp X86InstrInfo.cpp X86InstrSSE.td X86MCInstLower.cpp
Craig Topper
craig.topper at gmail.com
Fri Jan 13 00:12:35 PST 2012
Author: ctopper
Date: Fri Jan 13 02:12:35 2012
New Revision: 148108
URL: http://llvm.org/viewvc/llvm-project?rev=148108&view=rev
Log:
Make X86 instruction selection use 256-bit VPXOR for build_vector of all ones if AVX2 is enabled. This gives the ExeDepsFix pass a chance to choose FP vs int as appropriate. Also use v8i32 as the type for getZeroVector if AVX2 is enabled. This is consistent with SSE2 using prefering v4i32.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86MCInstLower.cpp
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=148108&r1=148107&r2=148108&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Jan 13 02:12:35 2012
@@ -4234,8 +4234,8 @@
/// getZeroVector - Returns a vector of specified type with all zero elements.
///
-static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
- DebugLoc dl) {
+static SDValue getZeroVector(EVT VT, bool HasSSE2, bool HasAVX2,
+ SelectionDAG &DAG, DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
// Always build SSE zero vectors as <4 x i32> bitcasted
@@ -4250,12 +4250,17 @@
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
}
} else if (VT.getSizeInBits() == 256) { // AVX
- // 256-bit logic and arithmetic instructions in AVX are
- // all floating-point, no support for integer ops. Default
- // to emitting fp zeroed vectors then.
- SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
- SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8);
+ if (HasAVX2) { // AVX2
+ SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
+ } else {
+ // 256-bit logic and arithmetic instructions in AVX are all
+ // floating-point, no support for integer ops. Emit fp zeroed vectors.
+ SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
+ SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8);
+ }
}
return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
}
@@ -4445,11 +4450,13 @@
/// element of V2 is swizzled into the zero/undef vector, landing at element
/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
- bool isZero, bool HasSSE2,
+ bool IsZero,
+ const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
EVT VT = V2.getValueType();
- SDValue V1 = isZero
- ? getZeroVector(VT, HasSSE2, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT);
+ SDValue V1 = IsZero
+ ? getZeroVector(VT, Subtarget->hasSSE2(), Subtarget->hasAVX2(), DAG,
+ V2.getDebugLoc()) : DAG.getUNDEF(VT);
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 16> MaskVec;
for (unsigned i = 0; i != NumElems; ++i)
@@ -4729,7 +4736,8 @@
bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
if (ThisIsNonZero && First) {
if (NumZero)
- V = getZeroVector(MVT::v8i16, true, DAG, dl);
+ V = getZeroVector(MVT::v8i16, /*HasSSE2*/ true, /*HasAVX2*/ false,
+ DAG, dl);
else
V = DAG.getUNDEF(MVT::v8i16);
First = false;
@@ -4777,7 +4785,8 @@
if (isNonZero) {
if (First) {
if (NumZero)
- V = getZeroVector(MVT::v8i16, true, DAG, dl);
+ V = getZeroVector(MVT::v8i16, /*HasSSE2*/ true, /*HasAVX2*/ false,
+ DAG, dl);
else
V = DAG.getUNDEF(MVT::v8i16);
First = false;
@@ -5065,7 +5074,8 @@
Op.getValueType() == MVT::v8i32)
return Op;
- return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG, dl);
+ return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(),
+ Subtarget->hasAVX2(), DAG, dl);
}
// Vectors containing all ones can be matched by pcmpeqd on 128-bit width
@@ -5132,8 +5142,7 @@
// convert it to a vector with movd (S2V+shuffle to zero extend).
Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item);
- Item = getShuffleVectorZeroOrUndef(Item, 0, true,
- Subtarget->hasSSE2(), DAG);
+ Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
// Now we have our 32-bit value zero extended in the low element of
// a vector. If Idx != 0, swizzle it into place.
@@ -5161,28 +5170,28 @@
if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
(ExtVT == MVT::i64 && Subtarget->is64Bit())) {
if (VT.getSizeInBits() == 256) {
- SDValue ZeroVec = getZeroVector(VT, true, DAG, dl);
+ SDValue ZeroVec = getZeroVector(VT, Subtarget->hasSSE2(),
+ Subtarget->hasAVX2(), DAG, dl);
return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
Item, DAG.getIntPtrConstant(0));
}
assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
- return getShuffleVectorZeroOrUndef(Item, 0, true,
- Subtarget->hasSSE2(), DAG);
+ return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
}
if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
if (VT.getSizeInBits() == 256) {
- SDValue ZeroVec = getZeroVector(MVT::v8i32, true, DAG, dl);
+ SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget->hasSSE2(),
+ Subtarget->hasAVX2(), DAG, dl);
Item = Insert128BitVector(ZeroVec, Item, DAG.getConstant(0, MVT::i32),
DAG, dl);
} else {
assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
- Item = getShuffleVectorZeroOrUndef(Item, 0, true,
- Subtarget->hasSSE2(), DAG);
+ Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
}
return DAG.getNode(ISD::BITCAST, dl, VT, Item);
}
@@ -5211,8 +5220,7 @@
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
// Turn it into a shuffle of zero and zero-extended scalar to vector.
- Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
- Subtarget->hasSSE2(), DAG);
+ Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, Subtarget, DAG);
SmallVector<int, 8> MaskVec;
for (unsigned i = 0; i < NumElems; i++)
MaskVec.push_back(i == Idx ? 0 : 1);
@@ -5268,8 +5276,7 @@
unsigned Idx = CountTrailingZeros_32(NonZeros);
SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
Op.getOperand(Idx));
- return getShuffleVectorZeroOrUndef(V2, Idx, true,
- Subtarget->hasSSE2(), DAG);
+ return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG);
}
return SDValue();
}
@@ -5294,7 +5301,8 @@
for (unsigned i = 0; i < 4; ++i) {
bool isZero = !(NonZeros & (1 << i));
if (isZero)
- V[i] = getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl);
+ V[i] = getZeroVector(VT, Subtarget->hasSSE2(), Subtarget->hasAVX2(),
+ DAG, dl);
else
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
}
@@ -6402,7 +6410,8 @@
SDValue V2 = Op.getOperand(1);
if (isZeroShuffle(SVOp))
- return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl);
+ return getZeroVector(VT, Subtarget->hasSSE2(), Subtarget->hasAVX2(),
+ DAG, dl);
// Handle splat operations
if (SVOp->isSplat()) {
@@ -7663,8 +7672,7 @@
Op.getOperand(0));
// Zero out the upper parts of the register.
- Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget->hasSSE2(),
- DAG);
+ Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG);
Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load),
@@ -10104,7 +10112,8 @@
if (VT == MVT::v16i8 && Op.getOpcode() == ISD::SRA) {
if (ShiftAmt == 7) {
// R s>> 7 === R s< 0
- SDValue Zeros = getZeroVector(VT, true /* HasSSE2 */, DAG, dl);
+ SDValue Zeros = getZeroVector(VT, /* HasSSE2 */true,
+ /* HasAVX2 */false, DAG, dl);
return DAG.getNode(X86ISD::PCMPGTB, dl, VT, Zeros, R);
}
@@ -10146,7 +10155,8 @@
if (Op.getOpcode() == ISD::SRA) {
if (ShiftAmt == 7) {
// R s>> 7 === R s< 0
- SDValue Zeros = getZeroVector(VT, true /* HasSSE2 */, DAG, dl);
+ SDValue Zeros = getZeroVector(VT, true /* HasSSE2 */,
+ true /* HasAVX2 */, DAG, dl);
return DAG.getNode(X86ISD::PCMPGTB, dl, VT, Zeros, R);
}
@@ -12685,7 +12695,8 @@
/// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors.
static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ bool HasAVX2) {
DebugLoc dl = N->getDebugLoc();
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
SDValue V1 = SVOp->getOperand(0);
@@ -12737,7 +12748,7 @@
// Emit a zeroed vector and insert the desired subvector on its
// first half.
- SDValue Zeros = getZeroVector(VT, true /* HasSSE2 */, DAG, dl);
+ SDValue Zeros = getZeroVector(VT, true /* HasSSE2 */, HasAVX2, DAG, dl);
SDValue InsV = Insert128BitVector(Zeros, V1.getOperand(0),
DAG.getConstant(0, MVT::i32), DAG, dl);
return DCI.CombineTo(N, InsV);
@@ -12782,7 +12793,7 @@
// Combine 256-bit vector shuffles. This is only profitable when in AVX mode
if (Subtarget->hasAVX() && VT.getSizeInBits() == 256 &&
N->getOpcode() == ISD::VECTOR_SHUFFLE)
- return PerformShuffleCombine256(N, DAG, DCI);
+ return PerformShuffleCombine256(N, DAG, DCI, Subtarget->hasAVX2());
// Only handle 128 wide vector from here on.
if (VT.getSizeInBits() != 128)
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=148108&r1=148107&r2=148108&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Fri Jan 13 02:12:35 2012
@@ -2908,6 +2908,7 @@
case X86::AVX_SET0PSY:
case X86::AVX_SET0PDY:
case X86::AVX2_SETALLONES:
+ case X86::AVX2_SET0:
Alignment = 32;
break;
case X86::V_SET0:
@@ -2952,6 +2953,7 @@
case X86::AVX_SET0PDY:
case X86::AVX_SETALLONES:
case X86::AVX2_SETALLONES:
+ case X86::AVX2_SET0:
case X86::FsFLD0SD:
case X86::FsFLD0SS: {
// Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
@@ -2985,7 +2987,7 @@
Ty = Type::getDoubleTy(MF.getFunction()->getContext());
else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
- else if (Opc == X86::AVX2_SETALLONES)
+ else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX2_SET0)
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 8);
else
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=148108&r1=148107&r2=148108&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Fri Jan 13 02:12:35 2012
@@ -279,13 +279,24 @@
// JIT implementatioan, it does not expand the instructions below like
// X86MCInstLower does.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1, Predicates = [HasAVX] in {
+ isCodeGenOnly = 1 in {
+let Predicates = [HasAVX] in {
def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
[(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V;
def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
[(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V;
}
+let Predicates = [HasAVX2], neverHasSideEffects = 1 in
+def AVX2_SET0 : PDI<0xef, MRMInitReg, (outs VR256:$dst), (ins), "",
+ []>, VEX_4V;
+}
+let Predicates = [HasAVX2], AddedComplexity = 5 in {
+ def : Pat<(v4i64 immAllZerosV), (AVX2_SET0)>;
+ def : Pat<(v8i32 immAllZerosV), (AVX2_SET0)>;
+ def : Pat<(v16i16 immAllZerosV), (AVX2_SET0)>;
+ def : Pat<(v32i8 immAllZerosV), (AVX2_SET0)>;
+}
// AVX has no support for 256-bit integer instructions, but since the 128-bit
// VPXOR instruction writes zero to its upper part, it's safe build zeros.
Modified: llvm/trunk/lib/Target/X86/X86MCInstLower.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86MCInstLower.cpp?rev=148108&r1=148107&r2=148108&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86MCInstLower.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86MCInstLower.cpp Fri Jan 13 02:12:35 2012
@@ -373,6 +373,7 @@
case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break;
case X86::AVX_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDrr); break;
case X86::AVX2_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDYrr);break;
+ case X86::AVX2_SET0: LowerUnaryToTwoAddr(OutMI, X86::VPXORYrr); break;
case X86::MOV16r0:
LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0
More information about the llvm-commits
mailing list