[llvm-commits] [llvm] r112377 - /llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Chris Lattner
sabre at nondot.org
Sat Aug 28 10:15:43 PDT 2010
Author: lattner
Date: Sat Aug 28 12:15:43 2010
New Revision: 112377
URL: http://llvm.org/viewvc/llvm-project?rev=112377&view=rev
Log:
improve comments in the unpcklps generating logic, introduce
a new EltStride variable instead of reusing NumElems variable
for a non-obvious purpose. No functionality change.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=112377&r1=112376&r2=112377&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Aug 28 12:15:43 2010
@@ -4040,8 +4040,8 @@
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
- // All zero's are handled with pxor in SSE2 and above, xorps in SSE1 and
- // all one's are handled with pcmpeqd. In AVX, zero's are handled with
+ // All zero's are handled with pxor in SSE2 and above, xorps in SSE1.
+ // All one's are handled with pcmpeqd. In AVX, zero's are handled with
// vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd
// is present, so AllOnes is ignored.
if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
@@ -4288,18 +4288,25 @@
return V[0];
}
- // Otherwise, expand into a number of unpckl*
- // e.g. for v4f32
+ // Otherwise, expand into a number of unpckl*, start by extending each of
+ // our (non-undef) elements to the full vector width with the element in the
+ // bottom slot of the vector (which generates no code for SSE).
+ for (unsigned i = 0; i < NumElems; ++i) {
+ if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
+ V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
+ else
+ V[i] = DAG.getUNDEF(VT);
+ }
+
+ // Next, we iteratively mix elements, e.g. for v4f32:
// Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
// : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
// Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
- for (unsigned i = 0; i < NumElems; ++i)
- V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
- NumElems >>= 1;
- while (NumElems != 0) {
- for (unsigned i = 0; i < NumElems; ++i)
- V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + NumElems]);
- NumElems >>= 1;
+ unsigned EltStride = NumElems >> 1;
+ while (EltStride != 0) {
+ for (unsigned i = 0; i < EltStride; ++i)
+ V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]);
+ EltStride >>= 1;
}
return V[0];
}
More information about the llvm-commits
mailing list