[llvm-commits] CVS: llvm/lib/Target/X86/X86ISelLowering.cpp X86ISelLowering.h X86InstrSSE.td
Evan Cheng
evan.cheng at apple.com
Mon Mar 27 16:40:10 PST 2006
Changes in directory llvm/lib/Target/X86:
X86ISelLowering.cpp updated: 1.135 -> 1.136
X86ISelLowering.h updated: 1.44 -> 1.45
X86InstrSSE.td updated: 1.34 -> 1.35
---
Log message:
Model unpack lower and interleave as vector_shuffle so we can lower the
intrinsics as such.
---
Diffs of the changes: (+88 -33)
X86ISelLowering.cpp | 53 +++++++++++++++++++++++++++++++++++++++------
X86ISelLowering.h | 8 +++---
X86InstrSSE.td | 60 ++++++++++++++++++++++++++++++++--------------------
3 files changed, 88 insertions(+), 33 deletions(-)
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
diff -u llvm/lib/Target/X86/X86ISelLowering.cpp:1.135 llvm/lib/Target/X86/X86ISelLowering.cpp:1.136
--- llvm/lib/Target/X86/X86ISelLowering.cpp:1.135 Mon Mar 27 01:00:16 2006
+++ llvm/lib/Target/X86/X86ISelLowering.cpp Mon Mar 27 18:39:58 2006
@@ -303,14 +303,18 @@
setOperationAction(ISD::LOAD, MVT::v8i16, Legal);
setOperationAction(ISD::LOAD, MVT::v4i32, Legal);
setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
}
computeRegisterProperties();
@@ -1499,6 +1503,29 @@
cast<ConstantSDNode>(Bit1)->getValue() == 3);
}
+/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to UNPCKL.
+bool X86::isUNPCKLMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ unsigned NumElems = N->getNumOperands();
+ if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
+ return false;
+
+ for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
+ SDOperand BitI = N->getOperand(i);
+ SDOperand BitI1 = N->getOperand(i+1);
+ assert(isa<ConstantSDNode>(BitI) && isa<ConstantSDNode>(BitI1) &&
+ "Invalid VECTOR_SHUFFLE mask!");
+ if (cast<ConstantSDNode>(BitI)->getValue() != j)
+ return false;
+ if (cast<ConstantSDNode>(BitI1)->getValue() != j + NumElems)
+ return false;
+ }
+
+ return true;
+}
+
/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
/// a splat of a single element.
bool X86::isSplatMask(SDNode *N) {
@@ -2321,6 +2348,9 @@
MVT::ValueType VT = Op.getValueType();
unsigned NumElems = PermMask.getNumOperands();
+ // All v2f64 cases are handled.
+ if (NumElems == 2) return SDOperand();
+
// Handle splat cases.
if (X86::isSplatMask(PermMask.Val)) {
if (V2.getOpcode() == ISD::UNDEF)
@@ -2332,8 +2362,8 @@
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
DAG.getNode(ISD::UNDEF, V1.getValueType()),
PermMask);
- } else if (NumElems == 2) {
- // All v2f64 cases are handled.
+ } else if (X86::isUNPCKLMask(PermMask.Val)) {
+ // Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*.
return SDOperand();
} else if (X86::isPSHUFDMask(PermMask.Val)) {
if (V2.getOpcode() == ISD::UNDEF)
@@ -2404,13 +2434,22 @@
// : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
// Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
MVT::ValueType VT = Op.getValueType();
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+ MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
+ std::vector<SDOperand> MaskVec;
+ for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
+ MaskVec.push_back(DAG.getConstant(i, BaseVT));
+ MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT));
+ }
+ SDOperand PermMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
std::vector<SDOperand> V(NumElems);
for (unsigned i = 0; i < NumElems; ++i)
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
NumElems >>= 1;
while (NumElems != 0) {
for (unsigned i = 0; i < NumElems; ++i)
- V[i] = DAG.getNode(X86ISD::UNPCKL, VT, V[i], V[i + NumElems]);
+ V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems],
+ PermMask);
NumElems >>= 1;
}
return V[0];
@@ -2453,7 +2492,6 @@
case X86ISD::Wrapper: return "X86ISD::Wrapper";
case X86ISD::S2VEC: return "X86ISD::S2VEC";
case X86ISD::ZEXT_S2VEC: return "X86ISD::ZEXT_S2VEC";
- case X86ISD::UNPCKL: return "X86ISD::UNPCKL";
}
}
@@ -2543,5 +2581,6 @@
return (Mask.Val->getNumOperands() == 2 ||
X86::isSplatMask(Mask.Val) ||
X86::isPSHUFDMask(Mask.Val) ||
- X86::isSHUFPMask(Mask.Val));
+ X86::isSHUFPMask(Mask.Val) ||
+ X86::isUNPCKLMask(Mask.Val));
}
Index: llvm/lib/Target/X86/X86ISelLowering.h
diff -u llvm/lib/Target/X86/X86ISelLowering.h:1.44 llvm/lib/Target/X86/X86ISelLowering.h:1.45
--- llvm/lib/Target/X86/X86ISelLowering.h:1.44 Sun Mar 26 03:53:12 2006
+++ llvm/lib/Target/X86/X86ISelLowering.h Mon Mar 27 18:39:58 2006
@@ -153,10 +153,6 @@
/// ZEXT_S2VEC - SCALAR_TO_VECTOR with zero extension. The destination base
/// does not have to match the operand type.
ZEXT_S2VEC,
-
- /// UNPCKL - Unpack and interleave low. This corresponds to X86::UNPCKLPS,
- /// X86::PUNPCKL*.
- UNPCKL,
};
// X86 specific condition code. These correspond to X86_*_COND in
@@ -205,6 +201,10 @@
/// specifies a shuffle of elements that is suitable for input to UNPCKHPD.
bool isUNPCKHPDMask(SDNode *N);
+ /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to UNPCKL.
+ bool isUNPCKLMask(SDNode *N);
+
/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a splat of a single element.
bool isSplatMask(SDNode *N);
Index: llvm/lib/Target/X86/X86InstrSSE.td
diff -u llvm/lib/Target/X86/X86InstrSSE.td:1.34 llvm/lib/Target/X86/X86InstrSSE.td:1.35
--- llvm/lib/Target/X86/X86InstrSSE.td:1.34 Mon Mar 27 10:52:45 2006
+++ llvm/lib/Target/X86/X86InstrSSE.td Mon Mar 27 18:39:58 2006
@@ -30,8 +30,6 @@
def SDTUnpckl : SDTypeProfile<1, 2,
[SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
-def X86unpckl : SDNode<"X86ISD::UNPCKL", SDTUnpckl,
- []>;
//===----------------------------------------------------------------------===//
// SSE pattern fragments
@@ -77,6 +75,10 @@
return X86::isUNPCKHPDMask(N);
}], SHUFFLE_get_shuf_imm>;
+def UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isUNPCKLMask(N);
+}]>;
+
// Only use PSHUF if it is not a splat.
def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
return !X86::isSplatMask(N) && X86::isPSHUFDMask(N);
@@ -756,14 +758,17 @@
let isTwoAddress = 1 in {
def SHUFPSrr : PSIi8<0xC6, MRMSrcReg,
- (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
+ (ops VR128:$dst, VR128:$src1, VR128:$src2, i32i8imm:$src3),
"shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst, (vector_shuffle
(v4f32 VR128:$src1), (v4f32 VR128:$src2),
SHUFP_shuffle_mask:$src3))]>;
def SHUFPSrm : PSIi8<0xC6, MRMSrcMem,
- (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
- "shufps {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
+ (ops VR128:$dst, VR128:$src1, f128mem:$src2, i32i8imm:$src3),
+ "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst, (vector_shuffle
+ (v4f32 VR128:$src1), (load addr:$src2),
+ SHUFP_shuffle_mask:$src3))]>;
def SHUFPDrr : PDIi8<0xC6, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
"shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
@@ -772,7 +777,10 @@
SHUFP_shuffle_mask:$src3))]>;
def SHUFPDrm : PDIi8<0xC6, MRMSrcMem,
(ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
- "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
+ "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst, (vector_shuffle
+ (v2f64 VR128:$src1), (load addr:$src2),
+ SHUFP_shuffle_mask:$src3))]>;
def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2),
@@ -789,13 +797,15 @@
def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2),
"unpcklps {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v4f32 (X86unpckl VR128:$src1,
- VR128:$src2)))]>;
+ [(set VR128:$dst,
+ (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
+ UNPCKL_shuffle_mask)))]>;
def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
(ops VR128:$dst, VR128:$src1, f128mem:$src2),
"unpcklps {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v4f32 (X86unpckl VR128:$src1,
- (load addr:$src2))))]>;
+ [(set VR128:$dst,
+ (v4f32 (vector_shuffle VR128:$src1, (load addr:$src2),
+ UNPCKL_shuffle_mask)))]>;
def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2),
"unpcklpd {$src2, $dst|$dst, $src2}", []>;
@@ -895,33 +905,39 @@
def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2),
"punpcklbw {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v16i8 (X86unpckl VR128:$src1,
- VR128:$src2)))]>;
+ [(set VR128:$dst,
+ (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
+ UNPCKL_shuffle_mask)))]>;
def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
(ops VR128:$dst, VR128:$src1, i128mem:$src2),
"punpcklbw {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v16i8 (X86unpckl VR128:$src1,
- (load addr:$src2))))]>;
+ [(set VR128:$dst,
+ (v16i8 (vector_shuffle VR128:$src1, (load addr:$src2),
+ UNPCKL_shuffle_mask)))]>;
def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2),
"punpcklwd {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v8i16 (X86unpckl VR128:$src1,
- VR128:$src2)))]>;
+ [(set VR128:$dst,
+ (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
+ UNPCKL_shuffle_mask)))]>;
def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
(ops VR128:$dst, VR128:$src1, i128mem:$src2),
"punpcklwd {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v8i16 (X86unpckl VR128:$src1,
- (load addr:$src2))))]>;
+ [(set VR128:$dst,
+ (v8i16 (vector_shuffle VR128:$src1, (load addr:$src2),
+ UNPCKL_shuffle_mask)))]>;
def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2),
"punpckldq {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v4i32 (X86unpckl VR128:$src1,
- VR128:$src2)))]>;
+ [(set VR128:$dst,
+ (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
+ UNPCKL_shuffle_mask)))]>;
def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
(ops VR128:$dst, VR128:$src1, i128mem:$src2),
"punpckldq {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v4i32 (X86unpckl VR128:$src1,
- (load addr:$src2))))]>;
+ [(set VR128:$dst,
+ (v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
+ UNPCKL_shuffle_mask)))]>;
def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2),
"punpcklqdq {$src2, $dst|$dst, $src2}", []>;
More information about the llvm-commits
mailing list