[llvm-commits] CVS: llvm/lib/Target/X86/X86ISelLowering.cpp X86InstrSSE.td
Evan Cheng
evan.cheng at apple.com
Tue Mar 28 17:31:03 PST 2006
Changes in directory llvm/lib/Target/X86:
X86ISelLowering.cpp updated: 1.141 -> 1.142
X86InstrSSE.td updated: 1.40 -> 1.41
---
Log message:
- Only use pshufd for v4i32 vector shuffles.
- Other shuffle related fixes.
---
Diffs of the changes: (+83 -61)
X86ISelLowering.cpp | 51 +++++++++++++++++++++-------
X86InstrSSE.td | 93 +++++++++++++++++++++++++---------------------------
2 files changed, 83 insertions(+), 61 deletions(-)
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
diff -u llvm/lib/Target/X86/X86ISelLowering.cpp:1.141 llvm/lib/Target/X86/X86ISelLowering.cpp:1.142
--- llvm/lib/Target/X86/X86ISelLowering.cpp:1.141 Tue Mar 28 17:41:33 2006
+++ llvm/lib/Target/X86/X86ISelLowering.cpp Tue Mar 28 19:30:51 2006
@@ -1583,15 +1583,21 @@
return Mask;
}
-/// CommuteVectorShuffleIfNeeded - Swap vector_shuffle operands (as well as
-/// values in ther permute mask if needed. Return an empty SDOperand is it is
-/// already well formed.
-static SDOperand CommuteVectorShuffleIfNeeded(SDOperand V1, SDOperand V2,
- SDOperand Mask, MVT::ValueType VT,
- SelectionDAG &DAG) {
+/// NormalizeVectorShuffle - Swap vector_shuffle operands (as well as
+/// values in ther permute mask if needed. Use V1 as second vector if it is
+/// undef. Return an empty SDOperand is it is already well formed.
+static SDOperand NormalizeVectorShuffle(SDOperand V1, SDOperand V2,
+ SDOperand Mask, MVT::ValueType VT,
+ SelectionDAG &DAG) {
unsigned NumElems = Mask.getNumOperands();
SDOperand Half1 = Mask.getOperand(0);
SDOperand Half2 = Mask.getOperand(NumElems/2);
+ bool V2Undef = false;
+ if (V2.getOpcode() == ISD::UNDEF) {
+ V2Undef = true;
+ V2 = V1;
+ }
+
if (cast<ConstantSDNode>(Half1)->getValue() >= NumElems &&
cast<ConstantSDNode>(Half2)->getValue() < NumElems) {
// Swap the operands and change mask.
@@ -1604,6 +1610,10 @@
DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), MaskVec);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask);
}
+
+ if (V2Undef)
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
+
return SDOperand();
}
@@ -2387,8 +2397,26 @@
MVT::ValueType VT = Op.getValueType();
unsigned NumElems = PermMask.getNumOperands();
- if (NumElems == 2)
- return CommuteVectorShuffleIfNeeded(V1, V2, PermMask, VT, DAG);
+ if (X86::isUNPCKLMask(PermMask.Val) ||
+ X86::isUNPCKHMask(PermMask.Val))
+ // Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*.
+ return SDOperand();
+
+ // PSHUFD's 2nd vector must be undef.
+ if (MVT::isInteger(VT) && X86::isPSHUFDMask(PermMask.Val))
+ if (V2.getOpcode() == ISD::UNDEF)
+ return SDOperand();
+ else
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
+ DAG.getNode(ISD::UNDEF, V1.getValueType()),
+ PermMask);
+
+ if (NumElems == 2 ||
+ X86::isSplatMask(PermMask.Val) ||
+ X86::isSHUFPMask(PermMask.Val)) {
+ return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG);
+ }
+#if 0
else if (X86::isSplatMask(PermMask.Val)) {
// Handle splat cases.
if (V2.getOpcode() == ISD::UNDEF)
@@ -2400,10 +2428,6 @@
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
DAG.getNode(ISD::UNDEF, V1.getValueType()),
PermMask);
- } else if (X86::isUNPCKLMask(PermMask.Val) ||
- X86::isUNPCKHMask(PermMask.Val)) {
- // Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*.
- return SDOperand();
} else if (X86::isPSHUFDMask(PermMask.Val)) {
if (V2.getOpcode() == ISD::UNDEF)
// Leave the VECTOR_SHUFFLE alone. It matches PSHUFD.
@@ -2414,7 +2438,8 @@
DAG.getNode(ISD::UNDEF, V1.getValueType()),
PermMask);
} else if (X86::isSHUFPMask(PermMask.Val))
- return CommuteVectorShuffleIfNeeded(V1, V2, PermMask, VT, DAG);
+ return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG);
+#endif
assert(0 && "Unexpected VECTOR_SHUFFLE to lower");
abort();
Index: llvm/lib/Target/X86/X86InstrSSE.td
diff -u llvm/lib/Target/X86/X86InstrSSE.td:1.40 llvm/lib/Target/X86/X86InstrSSE.td:1.41
--- llvm/lib/Target/X86/X86InstrSSE.td:1.40 Tue Mar 28 17:51:43 2006
+++ llvm/lib/Target/X86/X86InstrSSE.td Tue Mar 28 19:30:51 2006
@@ -79,9 +79,8 @@
return X86::isUNPCKHMask(N);
}]>;
-// Only use PSHUF if it is not a splat.
def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
- return !X86::isSplatMask(N) && X86::isPSHUFDMask(N);
+ return X86::isPSHUFDMask(N);
}], SHUFFLE_get_shuf_imm>;
def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
@@ -918,86 +917,92 @@
"pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
def PSHUFDrr : PDIi8<0x70, MRMDestReg,
(ops VR128:$dst, VR128:$src1, i8imm:$src2),
- "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
+ "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v4i32 (vector_shuffle
+ VR128:$src1, (undef),
+ PSHUFD_shuffle_mask:$src2)))]>;
def PSHUFDrm : PDIi8<0x70, MRMSrcMem,
(ops VR128:$dst, i128mem:$src1, i8imm:$src2),
- "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
+ "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v4i32 (vector_shuffle
+ (load addr:$src1), (undef),
+ PSHUFD_shuffle_mask:$src2)))]>;
let isTwoAddress = 1 in {
def SHUFPSrr : PSIi8<0xC6, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2, i32i8imm:$src3),
"shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst, (vector_shuffle
- (v4f32 VR128:$src1), (v4f32 VR128:$src2),
- SHUFP_shuffle_mask:$src3))]>;
+ [(set VR128:$dst, (v4f32 (vector_shuffle
+ VR128:$src1, VR128:$src2,
+ SHUFP_shuffle_mask:$src3)))]>;
def SHUFPSrm : PSIi8<0xC6, MRMSrcMem,
(ops VR128:$dst, VR128:$src1, f128mem:$src2, i32i8imm:$src3),
"shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst, (vector_shuffle
- (v4f32 VR128:$src1), (load addr:$src2),
- SHUFP_shuffle_mask:$src3))]>;
+ [(set VR128:$dst, (v4f32 (vector_shuffle
+ VR128:$src1, (load addr:$src2),
+ SHUFP_shuffle_mask:$src3)))]>;
def SHUFPDrr : PDIi8<0xC6, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
"shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst, (vector_shuffle
- (v2f64 VR128:$src1), (v2f64 VR128:$src2),
- SHUFP_shuffle_mask:$src3))]>;
+ [(set VR128:$dst, (v2f64 (vector_shuffle
+ VR128:$src1, VR128:$src2,
+ SHUFP_shuffle_mask:$src3)))]>;
def SHUFPDrm : PDIi8<0xC6, MRMSrcMem,
(ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
"shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst, (vector_shuffle
- (v2f64 VR128:$src1), (load addr:$src2),
- SHUFP_shuffle_mask:$src3))]>;
+ [(set VR128:$dst, (v2f64 (vector_shuffle
+ VR128:$src1, (load addr:$src2),
+ SHUFP_shuffle_mask:$src3)))]>;
def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2),
"unpckhps {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
- UNPCKH_shuffle_mask)))]>;
+ [(set VR128:$dst, (v4f32 (vector_shuffle
+ VR128:$src1, VR128:$src2,
+ UNPCKH_shuffle_mask)))]>;
def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
(ops VR128:$dst, VR128:$src1, f128mem:$src2),
"unpckhps {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (vector_shuffle VR128:$src1, (load addr:$src2),
- UNPCKH_shuffle_mask)))]>;
+ [(set VR128:$dst, (v4f32 (vector_shuffle
+ VR128:$src1, (load addr:$src2),
+ UNPCKH_shuffle_mask)))]>;
def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2),
"unpckhpd {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
- UNPCKH_shuffle_mask)))]>;
+ [(set VR128:$dst, (v2f64 (vector_shuffle
+ VR128:$src1, VR128:$src2,
+ UNPCKH_shuffle_mask)))]>;
def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
(ops VR128:$dst, VR128:$src1, f128mem:$src2),
"unpckhpd {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2f64 (vector_shuffle VR128:$src1, (load addr:$src2),
- UNPCKH_shuffle_mask)))]>;
+ [(set VR128:$dst, (v2f64 (vector_shuffle
+ VR128:$src1, (load addr:$src2),
+ UNPCKH_shuffle_mask)))]>;
def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2),
"unpcklps {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
- UNPCKL_shuffle_mask)))]>;
+ [(set VR128:$dst, (v4f32 (vector_shuffle
+ VR128:$src1, VR128:$src2,
+ UNPCKL_shuffle_mask)))]>;
def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
(ops VR128:$dst, VR128:$src1, f128mem:$src2),
"unpcklps {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (vector_shuffle VR128:$src1, (load addr:$src2),
- UNPCKL_shuffle_mask)))]>;
+ [(set VR128:$dst, (v4f32 (vector_shuffle
+ VR128:$src1, (load addr:$src2),
+ UNPCKL_shuffle_mask)))]>;
def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2),
"unpcklpd {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
- UNPCKL_shuffle_mask)))]>;
+ [(set VR128:$dst, (v2f64 (vector_shuffle
+ VR128:$src1, VR128:$src2,
+ UNPCKL_shuffle_mask)))]>;
def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
(ops VR128:$dst, VR128:$src1, f128mem:$src2),
"unpcklpd {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2f64 (vector_shuffle VR128:$src1, (load addr:$src2),
- UNPCKL_shuffle_mask)))]>;
+ [(set VR128:$dst, (v2f64 (vector_shuffle
+ VR128:$src1, (load addr:$src2),
+ UNPCKL_shuffle_mask)))]>;
}
//===----------------------------------------------------------------------===//
@@ -1354,11 +1359,3 @@
(v2f64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
(v2i64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
-
-// Shuffle v4f32 / v4i32, undef. These should only match if splat cases do not.
-def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
- (v4f32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
- Requires<[HasSSE2]>;
-def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
- (v4i32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
- Requires<[HasSSE2]>;
More information about the llvm-commits
mailing list