[llvm-commits] CVS: llvm/lib/Target/X86/X86ISelLowering.cpp X86InstrInfo.cpp X86InstrInfo.td X86InstrSSE.td
Evan Cheng
evan.cheng at apple.com
Mon Apr 3 13:53:41 PDT 2006
Changes in directory llvm/lib/Target/X86:
X86ISelLowering.cpp updated: 1.151 -> 1.152
X86InstrInfo.cpp updated: 1.47 -> 1.48
X86InstrInfo.td updated: 1.259 -> 1.260
X86InstrSSE.td updated: 1.53 -> 1.54
---
Log message:
- More efficient extract_vector_elt with shuffle and movss, movsd, movd, etc.
- Some bug fixes and naming inconsistency fixes.
---
Diffs of the changes: (+151 -70)
X86ISelLowering.cpp | 62 +++++++++++++++++++--
X86InstrInfo.cpp | 6 +-
X86InstrInfo.td | 1
X86InstrSSE.td | 152 ++++++++++++++++++++++++++++++----------------------
4 files changed, 151 insertions(+), 70 deletions(-)
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
diff -u llvm/lib/Target/X86/X86ISelLowering.cpp:1.151 llvm/lib/Target/X86/X86ISelLowering.cpp:1.152
--- llvm/lib/Target/X86/X86ISelLowering.cpp:1.151 Fri Mar 31 15:55:24 2006
+++ llvm/lib/Target/X86/X86ISelLowering.cpp Mon Apr 3 15:53:28 2006
@@ -280,6 +280,7 @@
setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
}
if (Subtarget->hasSSE2()) {
@@ -316,7 +317,9 @@
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
}
@@ -1484,11 +1487,20 @@
// Dest { 2, 1 } <= shuffle( Dest { 1, 0 }, Src { 3, 2 }
// Expect bit 0 == 1, bit1 == 2
SDOperand Bit0 = N->getOperand(0);
+ if (Bit0.getOpcode() != ISD::UNDEF) {
+ assert(isa<ConstantSDNode>(Bit0) && "Invalid VECTOR_SHUFFLE mask!");
+ if (cast<ConstantSDNode>(Bit0)->getValue() != 1)
+ return false;
+ }
+
SDOperand Bit1 = N->getOperand(1);
- assert(isa<ConstantSDNode>(Bit0) && isa<ConstantSDNode>(Bit1) &&
- "Invalid VECTOR_SHUFFLE mask!");
- return (cast<ConstantSDNode>(Bit0)->getValue() == 1 &&
- cast<ConstantSDNode>(Bit1)->getValue() == 2);
+ if (Bit1.getOpcode() != ISD::UNDEF) {
+ assert(isa<ConstantSDNode>(Bit1) && "Invalid VECTOR_SHUFFLE mask!");
+ if (cast<ConstantSDNode>(Bit1)->getValue() != 2)
+ return false;
+ }
+
+ return true;
}
if (NumElems != 4) return false;
@@ -2660,15 +2672,55 @@
return SDOperand();
}
case ISD::EXTRACT_VECTOR_ELT: {
- // Transform it so it match pextrw which produces a 32-bit result.
+ if (!isa<ConstantSDNode>(Op.getOperand(1)))
+ return SDOperand();
+
MVT::ValueType VT = Op.getValueType();
if (MVT::getSizeInBits(VT) == 16) {
+ // Transform it so it match pextrw which produces a 32-bit result.
MVT::ValueType EVT = (MVT::ValueType)(VT+1);
SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
Op.getOperand(0), Op.getOperand(1));
SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
+ } else if (MVT::getSizeInBits(VT) == 32) {
+ SDOperand Vec = Op.getOperand(0);
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
+ if (Idx == 0)
+ return Op;
+
+ // TODO: if Idex == 2, we can use unpckhps
+ // SHUFPS the element to the lowest double word, then movss.
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
+ SDOperand IdxNode = DAG.getConstant((Idx < 2) ? Idx : Idx+4,
+ MVT::getVectorBaseType(MaskVT));
+ std::vector<SDOperand> IdxVec;
+ IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT)));
+ IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
+ IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
+ IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
+ SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec);
+ Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
+ Vec, Vec, Mask);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
+ DAG.getConstant(0, MVT::i32));
+ } else if (MVT::getSizeInBits(VT) == 64) {
+ SDOperand Vec = Op.getOperand(0);
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
+ if (Idx == 0)
+ return Op;
+
+ // UNPCKHPD the element to the lowest double word, then movsd.
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
+ std::vector<SDOperand> IdxVec;
+ IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT)));
+ IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
+ SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec);
+ Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
+ Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
+ DAG.getConstant(0, MVT::i32));
}
return SDOperand();
Index: llvm/lib/Target/X86/X86InstrInfo.cpp
diff -u llvm/lib/Target/X86/X86InstrInfo.cpp:1.47 llvm/lib/Target/X86/X86InstrInfo.cpp:1.48
--- llvm/lib/Target/X86/X86InstrInfo.cpp:1.47 Fri Mar 24 17:15:12 2006
+++ llvm/lib/Target/X86/X86InstrInfo.cpp Mon Apr 3 15:53:28 2006
@@ -31,8 +31,10 @@
oc == X86::FpMOV || oc == X86::MOVSSrr || oc == X86::MOVSDrr ||
oc == X86::FsMOVAPSrr || oc == X86::FsMOVAPDrr ||
oc == X86::MOVAPSrr || oc == X86::MOVAPDrr ||
- oc == X86::MOVSS128rr || oc == X86::MOVSD128rr ||
- oc == X86::MOVD128rr || oc == X86::MOVQ128rr) {
+ oc == X86::MOVSS2PSrr || oc == X86::MOVSD2PDrr ||
+ oc == X86::MOVPS2SSrr || oc == X86::MOVPD2SDrr ||
+ oc == X86::MOVDI2PDIrr || oc == X86::MOVQI2PQIrr ||
+ oc == X86::MOVPDI2DIrr) {
assert(MI.getNumOperands() == 2 &&
MI.getOperand(0).isRegister() &&
MI.getOperand(1).isRegister() &&
Index: llvm/lib/Target/X86/X86InstrInfo.td
diff -u llvm/lib/Target/X86/X86InstrInfo.td:1.259 llvm/lib/Target/X86/X86InstrInfo.td:1.260
--- llvm/lib/Target/X86/X86InstrInfo.td:1.259 Sat Mar 25 03:45:48 2006
+++ llvm/lib/Target/X86/X86InstrInfo.td Mon Apr 3 15:53:28 2006
@@ -284,6 +284,7 @@
def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>;
def loadi16 : PatFrag<(ops node:$ptr), (i16 (load node:$ptr))>;
def loadi32 : PatFrag<(ops node:$ptr), (i32 (load node:$ptr))>;
+def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
Index: llvm/lib/Target/X86/X86InstrSSE.td
diff -u llvm/lib/Target/X86/X86InstrSSE.td:1.53 llvm/lib/Target/X86/X86InstrSSE.td:1.54
--- llvm/lib/Target/X86/X86InstrSSE.td:1.53 Fri Mar 31 15:55:24 2006
+++ llvm/lib/Target/X86/X86InstrSSE.td Mon Apr 3 15:53:28 2006
@@ -226,24 +226,6 @@
"movsd {$src, $dst|$dst, $src}",
[(store FR64:$src, addr:$dst)]>;
-// FR32 / FR64 to 128-bit vector conversion.
-def MOVSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, FR32:$src),
- "movss {$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4f32 (scalar_to_vector FR32:$src)))]>;
-def MOVSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
- "movss {$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
-def MOVSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, FR64:$src),
- "movsd {$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2f64 (scalar_to_vector FR64:$src)))]>;
-def MOVSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
- "movsd {$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
-
// Arithmetic instructions
let isTwoAddress = 1 in {
let isCommutable = 1 in {
@@ -1122,18 +1104,6 @@
//===----------------------------------------------------------------------===//
// Move Instructions
-def MOVD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
- "movd {$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (scalar_to_vector R32:$src)))]>;
-def MOVD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
- "movd {$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
-
-def MOVD128mr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
- "movd {$src, $dst|$dst, $src}", []>;
-
def MOVDQArr : PDI<0x6F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
"movdqa {$src, $dst|$dst, $src}", []>;
def MOVDQArm : PDI<0x6F, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
@@ -1143,18 +1113,6 @@
"movdqa {$src, $dst|$dst, $src}",
[(store (v4i32 VR128:$src), addr:$dst)]>;
-// SSE2 instructions with XS prefix
-def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
- "movq {$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2i64 (scalar_to_vector VR64:$src)))]>, XS,
- Requires<[HasSSE2]>;
-def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
- "movq {$src, $dst|$dst, $src}", []>, XS,
- Requires<[HasSSE2]>;
-def MOVQ128mr : PDI<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
- "movq {$src, $dst|$dst, $src}", []>;
-
// 128-bit Integer Arithmetic
let isTwoAddress = 1 in {
let isCommutable = 1 in {
@@ -1549,32 +1507,102 @@
"pcmpeqd $dst, $dst",
[(set VR128:$dst, (v2f64 immAllOnesV))]>;
-// Scalar to 128-bit vector with zero extension.
+// FR32 / FR64 to 128-bit vector conversion.
+def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, FR32:$src),
+ "movss {$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4f32 (scalar_to_vector FR32:$src)))]>;
+def MOVSS2PSrm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
+ "movss {$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
+def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, FR64:$src),
+ "movsd {$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2f64 (scalar_to_vector FR64:$src)))]>;
+def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
+ "movsd {$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
+
+def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
+ "movd {$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (scalar_to_vector R32:$src)))]>;
+def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
+ "movd {$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+// SSE2 instructions with XS prefix
+def MOVQI2PQIrr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
+ "movq {$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector VR64:$src)))]>, XS,
+ Requires<[HasSSE2]>;
+def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
+ "movq {$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
+ Requires<[HasSSE2]>;
+// FIXME: may not be able to eliminate this movss with coalescing the src and
+// dest register classes are different. We really want to write this pattern
+// like this:
+// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (i32 0))),
+// (f32 FR32:$src)>;
+def MOVPS2SSrr : SSI<0x10, MRMSrcReg, (ops FR32:$dst, VR128:$src),
+ "movss {$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (vector_extract (v4f32 VR128:$src),
+ (i32 0)))]>;
+def MOVPS2SSmr : SSI<0x10, MRMDestMem, (ops f32mem:$dst, VR128:$src),
+ "movss {$src, $dst|$dst, $src}",
+ [(store (f32 (vector_extract (v4f32 VR128:$src),
+ (i32 0))), addr:$dst)]>;
+def MOVPD2SDrr : SDI<0x10, MRMSrcReg, (ops FR64:$dst, VR128:$src),
+ "movsd {$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (vector_extract (v2f64 VR128:$src),
+ (i32 0)))]>;
+def MOVPD2SDmr : SDI<0x10, MRMDestMem, (ops f64mem:$dst, VR128:$src),
+ "movsd {$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (v2f64 VR128:$src),
+ (i32 0))), addr:$dst)]>;
+def MOVPDI2DIrr : PDI<0x7E, MRMSrcReg, (ops R32:$dst, VR128:$src),
+ "movd {$src, $dst|$dst, $src}",
+ [(set R32:$dst, (vector_extract (v4i32 VR128:$src),
+ (i32 0)))]>;
+def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
+ "movd {$src, $dst|$dst, $src}",
+ [(store (i32 (vector_extract (v4i32 VR128:$src),
+ (i32 0))), addr:$dst)]>;
+
+// Move to lower bits of a VR128, leaving upper bits alone.
// Three operand (but two address) aliases.
let isTwoAddress = 1 in {
-def MOVZSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR32:$src2),
+def MOVLSS2PSrr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR32:$src2),
"movss {$src2, $dst|$dst, $src2}", []>;
-def MOVZSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR64:$src2),
+def MOVLSD2PDrr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR64:$src2),
"movsd {$src2, $dst|$dst, $src2}", []>;
-def MOVZD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, R32:$src2),
+def MOVLDI2PDIrr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, R32:$src2),
"movd {$src2, $dst|$dst, $src2}", []>;
-def MOVZQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR64:$src2),
- "movq {$src2, $dst|$dst, $src2}", []>;
}
+// Move to lower bits of a VR128 and zeroing upper bits.
// Loading from memory automatically zeroing upper bits.
-def MOVZSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
+def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
"movss {$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4f32 (X86zexts2vec (loadf32 addr:$src))))]>;
-def MOVZSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
+def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
"movsd {$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2f64 (X86zexts2vec (loadf64 addr:$src))))]>;
-def MOVZD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
- "movd {$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (X86zexts2vec (loadi32 addr:$src))))]>;
+def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
+ "movd {$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (X86zexts2vec (loadi32 addr:$src))))]>;
+def MOVZQI2PQIrm : PDI<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
+ "movd {$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (X86zexts2vec (loadi64 addr:$src))))]>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
@@ -1621,9 +1649,9 @@
// Scalar to v8i16 / v16i8. The source may be a R32, but only the lower 8 or
// 16-bits matter.
-def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
+def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVDI2PDIrr R32:$src)>,
Requires<[HasSSE2]>;
-def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
+def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVDI2PDIrr R32:$src)>,
Requires<[HasSSE2]>;
// bit_convert
@@ -1659,17 +1687,15 @@
// Zeroing a VR128 then do a MOVS* to the lower bits.
def : Pat<(v2f64 (X86zexts2vec FR64:$src)),
- (MOVZSD128rr (V_SET0_PD), FR64:$src)>, Requires<[HasSSE2]>;
+ (MOVLSD2PDrr (V_SET0_PD), FR64:$src)>, Requires<[HasSSE2]>;
def : Pat<(v4f32 (X86zexts2vec FR32:$src)),
- (MOVZSS128rr (V_SET0_PS), FR32:$src)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (X86zexts2vec VR64:$src)),
- (MOVZQ128rr (V_SET0_PI), VR64:$src)>, Requires<[HasSSE2]>;
+ (MOVLSS2PSrr (V_SET0_PS), FR32:$src)>, Requires<[HasSSE2]>;
def : Pat<(v4i32 (X86zexts2vec R32:$src)),
- (MOVZD128rr (V_SET0_PI), R32:$src)>, Requires<[HasSSE2]>;
+ (MOVLDI2PDIrr (V_SET0_PI), R32:$src)>, Requires<[HasSSE2]>;
def : Pat<(v8i16 (X86zexts2vec R16:$src)),
- (MOVZD128rr (V_SET0_PI), (MOVZX32rr16 R16:$src))>, Requires<[HasSSE2]>;
+ (MOVLDI2PDIrr (V_SET0_PI), (MOVZX32rr16 R16:$src))>, Requires<[HasSSE2]>;
def : Pat<(v16i8 (X86zexts2vec R8:$src)),
- (MOVZD128rr (V_SET0_PI), (MOVZX32rr8 R8:$src))>, Requires<[HasSSE2]>;
+ (MOVLDI2PDIrr (V_SET0_PI), (MOVZX32rr8 R8:$src))>, Requires<[HasSSE2]>;
// Splat v2f64 / v2i64
def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_mask:$sm),
More information about the llvm-commits
mailing list