[llvm-commits] CVS: llvm/lib/Target/X86/X86ISelLowering.cpp X86ISelLowering.h X86InstrSSE.td
Evan Cheng
evan.cheng at apple.com
Wed Mar 29 15:07:26 PST 2006
Changes in directory llvm/lib/Target/X86:
X86ISelLowering.cpp updated: 1.144 -> 1.145
X86ISelLowering.h updated: 1.47 -> 1.48
X86InstrSSE.td updated: 1.44 -> 1.45
---
Log message:
- Added some SSE2 128-bit packed integer ops.
- Added SSE2 128-bit integer pack with signed saturation ops.
- Added pshufhw and pshuflw ops.
---
Diffs of the changes: (+314 -25)
X86ISelLowering.cpp | 106 ++++++++++++++++++++++++-
X86ISelLowering.h | 18 ++++
X86InstrSSE.td | 215 +++++++++++++++++++++++++++++++++++++++++++++++-----
3 files changed, 314 insertions(+), 25 deletions(-)
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
diff -u llvm/lib/Target/X86/X86ISelLowering.cpp:1.144 llvm/lib/Target/X86/X86ISelLowering.cpp:1.145
--- llvm/lib/Target/X86/X86ISelLowering.cpp:1.144 Wed Mar 29 13:02:40 2006
+++ llvm/lib/Target/X86/X86ISelLowering.cpp Wed Mar 29 17:07:14 2006
@@ -1399,10 +1399,67 @@
return false;
// Check if the value doesn't reference the second vector.
- for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
"Invalid VECTOR_SHUFFLE mask!");
- if (cast<ConstantSDNode>(N->getOperand(i))->getValue() >= 4) return false;
+ if (cast<ConstantSDNode>(N->getOperand(i))->getValue() >= 4)
+ return false;
+ }
+
+ return true;
+}
+
+/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to PSHUFD.
+bool X86::isPSHUFHWMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ if (N->getNumOperands() != 8)
+ return false;
+
+ // Lower quadword copied in order.
+ for (unsigned i = 0; i != 4; ++i) {
+ assert(isa<ConstantSDNode>(N->getOperand(i)) &&
+ "Invalid VECTOR_SHUFFLE mask!");
+ if (cast<ConstantSDNode>(N->getOperand(i))->getValue() != i)
+ return false;
+ }
+
+ // Upper quadword shuffled.
+ for (unsigned i = 4; i != 8; ++i) {
+ assert(isa<ConstantSDNode>(N->getOperand(i)) &&
+ "Invalid VECTOR_SHUFFLE mask!");
+ unsigned Val = cast<ConstantSDNode>(N->getOperand(i))->getValue();
+ if (Val < 4 || Val > 7)
+ return false;
+ }
+
+ return true;
+}
+
+/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to PSHUFD.
+bool X86::isPSHUFLWMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ if (N->getNumOperands() != 8)
+ return false;
+
+ // Upper quadword copied in order.
+ for (unsigned i = 4; i != 8; ++i) {
+ assert(isa<ConstantSDNode>(N->getOperand(i)) &&
+ "Invalid VECTOR_SHUFFLE mask!");
+ if (cast<ConstantSDNode>(N->getOperand(i))->getValue() != i)
+ return false;
+ }
+
+ // Lower quadword shuffled.
+ for (unsigned i = 0; i != 4; ++i) {
+ assert(isa<ConstantSDNode>(N->getOperand(i)) &&
+ "Invalid VECTOR_SHUFFLE mask!");
+ unsigned Val = cast<ConstantSDNode>(N->getOperand(i))->getValue();
+ if (Val > 4)
+ return false;
}
return true;
@@ -1431,7 +1488,7 @@
// Each half must refer to only one of the vector.
SDOperand Elt = N->getOperand(0);
assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
- for (unsigned i = 1; i != NumElems / 2; ++i) {
+ for (unsigned i = 1; i < NumElems / 2; ++i) {
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
"Invalid VECTOR_SHUFFLE mask!");
if (cast<ConstantSDNode>(N->getOperand(i))->getValue() !=
@@ -1440,7 +1497,7 @@
}
Elt = N->getOperand(NumElems / 2);
assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
- for (unsigned i = NumElems / 2; i != NumElems; ++i) {
+ for (unsigned i = NumElems / 2 + 1; i < NumElems; ++i) {
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
"Invalid VECTOR_SHUFFLE mask!");
if (cast<ConstantSDNode>(N->getOperand(i))->getValue() !=
@@ -1583,6 +1640,40 @@
return Mask;
}
+/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
+/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
+/// instructions.
+unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
+ unsigned Mask = 0;
+ // 8 nodes, but we only care about the last 4.
+ for (unsigned i = 7; i >= 4; --i) {
+ unsigned Val
+ = cast<ConstantSDNode>(N->getOperand(i))->getValue();
+ Mask |= (Val - 4);
+ if (i != 4)
+ Mask <<= 2;
+ }
+
+ return Mask;
+}
+
+/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
+/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
+/// instructions.
+unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
+ unsigned Mask = 0;
+ // 8 nodes, but we only care about the first 4.
+ for (int i = 3; i >= 0; --i) {
+ unsigned Val
+ = cast<ConstantSDNode>(N->getOperand(i))->getValue();
+ Mask |= Val;
+ if (i != 0)
+ Mask <<= 2;
+ }
+
+ return Mask;
+}
+
/// NormalizeVectorShuffle - Swap vector_shuffle operands (as well as
/// values in ther permute mask if needed. Use V1 as second vector if it is
/// undef. Return an empty SDOperand is it is already well formed.
@@ -2399,7 +2490,10 @@
// Splat && PSHUFD's 2nd vector must be undef.
if (X86::isSplatMask(PermMask.Val) ||
- ((MVT::isInteger(VT) && X86::isPSHUFDMask(PermMask.Val)))) {
+ ((MVT::isInteger(VT) &&
+ (X86::isPSHUFDMask(PermMask.Val) ||
+ X86::isPSHUFHWMask(PermMask.Val) ||
+ X86::isPSHUFLWMask(PermMask.Val))))) {
if (V2.getOpcode() != ISD::UNDEF)
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
@@ -2607,6 +2701,8 @@
return (Mask.Val->getNumOperands() == 2 ||
X86::isSplatMask(Mask.Val) ||
X86::isPSHUFDMask(Mask.Val) ||
+ X86::isPSHUFHWMask(Mask.Val) ||
+ X86::isPSHUFLWMask(Mask.Val) ||
X86::isSHUFPMask(Mask.Val) ||
X86::isUNPCKLMask(Mask.Val) ||
X86::isUNPCKHMask(Mask.Val));
Index: llvm/lib/Target/X86/X86ISelLowering.h
diff -u llvm/lib/Target/X86/X86ISelLowering.h:1.47 llvm/lib/Target/X86/X86ISelLowering.h:1.48
--- llvm/lib/Target/X86/X86ISelLowering.h:1.47 Tue Mar 28 00:50:32 2006
+++ llvm/lib/Target/X86/X86ISelLowering.h Wed Mar 29 17:07:14 2006
@@ -184,6 +184,14 @@
/// specifies a shuffle of elements that is suitable for input to PSHUFD.
bool isPSHUFDMask(SDNode *N);
+ /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+ bool isPSHUFHWMask(SDNode *N);
+
+ /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+ bool isPSHUFLWMask(SDNode *N);
+
/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to SHUFP*.
bool isSHUFPMask(SDNode *N);
@@ -212,6 +220,16 @@
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
/// instructions.
unsigned getShuffleSHUFImmediate(SDNode *N);
+
+ /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
+ /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
+ /// instructions.
+ unsigned getShufflePSHUFHWImmediate(SDNode *N);
+
+ /// getShufflePSHUFKWImmediate - Return the appropriate immediate to shuffle
+ /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
+ /// instructions.
+ unsigned getShufflePSHUFLWImmediate(SDNode *N);
}
//===----------------------------------------------------------------------===//
Index: llvm/lib/Target/X86/X86InstrSSE.td
diff -u llvm/lib/Target/X86/X86InstrSSE.td:1.44 llvm/lib/Target/X86/X86InstrSSE.td:1.45
--- llvm/lib/Target/X86/X86InstrSSE.td:1.44 Wed Mar 29 13:02:40 2006
+++ llvm/lib/Target/X86/X86InstrSSE.td Wed Mar 29 17:07:14 2006
@@ -45,6 +45,8 @@
def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
+def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
+def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
@@ -58,6 +60,18 @@
return getI8Imm(X86::getShuffleSHUFImmediate(N));
}]>;
+// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
+// PSHUFHW imm.
+def SHUFFLE_get_pshufhw_imm : SDNodeXForm<build_vector, [{
+ return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
+}]>;
+
+// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
+// PSHUFLW imm.
+def SHUFFLE_get_pshuflw_imm : SDNodeXForm<build_vector, [{
+ return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
+}]>;
+
def SSE_splat_mask : PatLeaf<(build_vector), [{
return X86::isSplatMask(N);
}], SHUFFLE_get_shuf_imm>;
@@ -82,6 +96,14 @@
return X86::isPSHUFDMask(N);
}], SHUFFLE_get_shuf_imm>;
+def PSHUFHW_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isPSHUFHWMask(N);
+}], SHUFFLE_get_pshufhw_imm>;
+
+def PSHUFLW_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isPSHUFLWMask(N);
+}], SHUFFLE_get_pshuflw_imm>;
+
def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isSHUFPMask(N);
}], SHUFFLE_get_shuf_imm>;
@@ -935,25 +957,6 @@
}
// Shuffle and unpack instructions
-def PSHUFWrr : PSIi8<0x70, MRMDestReg,
- (ops VR64:$dst, VR64:$src1, i8imm:$src2),
- "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
-def PSHUFWrm : PSIi8<0x70, MRMSrcMem,
- (ops VR64:$dst, i64mem:$src1, i8imm:$src2),
- "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
-def PSHUFDrr : PDIi8<0x70, MRMDestReg,
- (ops VR128:$dst, VR128:$src1, i8imm:$src2),
- "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v4i32 (vector_shuffle
- VR128:$src1, (undef),
- PSHUFD_shuffle_mask:$src2)))]>;
-def PSHUFDrm : PDIi8<0x70, MRMSrcMem,
- (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
- "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v4i32 (vector_shuffle
- (load addr:$src1), (undef),
- PSHUFD_shuffle_mask:$src2)))]>;
-
let isTwoAddress = 1 in {
def SHUFPSrr : PSIi8<0xC6, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2, i32i8imm:$src3),
@@ -1081,6 +1084,10 @@
def PADDDrr : PDI<0xFE, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
"paddd {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v4i32 (add VR128:$src1, VR128:$src2)))]>;
+
+def PADDQrr : PDI<0xD4, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "paddq {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64 (add VR128:$src1, VR128:$src2)))]>;
}
def PADDBrm : PDI<0xFC, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
"paddb {$src2, $dst|$dst, $src2}",
@@ -1094,6 +1101,10 @@
"paddd {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v4i32 (add VR128:$src1,
(load addr:$src2))))]>;
+def PADDQrm : PDI<0xD4, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
+ "paddd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64 (add VR128:$src1,
+ (load addr:$src2))))]>;
def PSUBBrr : PDI<0xF8, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
"psubb {$src2, $dst|$dst, $src2}",
@@ -1104,6 +1115,9 @@
def PSUBDrr : PDI<0xFA, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
"psubd {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v4i32 (sub VR128:$src1, VR128:$src2)))]>;
+def PSUBQrr : PDI<0xFB, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "psubq {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64 (sub VR128:$src1, VR128:$src2)))]>;
def PSUBBrm : PDI<0xF8, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
"psubb {$src2, $dst|$dst, $src2}",
@@ -1117,8 +1131,146 @@
"psubd {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v4i32 (sub VR128:$src1,
(load addr:$src2))))]>;
+def PSUBQrm : PDI<0xFB, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
+ "psubd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64 (sub VR128:$src1,
+ (load addr:$src2))))]>;
+}
-// Unpack and interleave
+// Logical
+let isTwoAddress = 1 in {
+let isCommutable = 1 in {
+def PANDrr : PDI<0xDB, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "pand {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64 (and VR128:$src1, VR128:$src2)))]>;
+
+def PANDrm : PDI<0xDB, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+ "pand {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64 (and VR128:$src1,
+ (load addr:$src2))))]>;
+def PORrr : PDI<0xDB, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "por {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64 (or VR128:$src1, VR128:$src2)))]>;
+
+def PORrm : PDI<0xDB, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+ "por {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64 (or VR128:$src1,
+ (load addr:$src2))))]>;
+def PXORrr : PDI<0xEF, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "pxor {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64 (xor VR128:$src1, VR128:$src2)))]>;
+
+def PXORrm : PDI<0xEF, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+ "pxor {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64 (xor VR128:$src1,
+ (load addr:$src2))))]>;
+}
+
+def PANDNrr : PDI<0xDF, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "pandn {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+ VR128:$src2)))]>;
+
+def PANDNrm : PDI<0xDF, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+ "pandn {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+ (load addr:$src2))))]>;
+}
+
+// Pack instructions
+let isTwoAddress = 1 in {
+def PACKSSWBrr : PDI<0x63, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
+ VR128:$src2),
+ "packsswb {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v16i8 (int_x86_sse2_packsswb_128
+ (v8i16 VR128:$src1),
+ (v8i16 VR128:$src2))))]>;
+def PACKSSWBrm : PDI<0x63, MRMSrcMem, (ops VR128:$dst, VR128:$src1,
+ i128mem:$src2),
+ "packsswb {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v16i8 (int_x86_sse2_packsswb_128
+ (v8i16 VR128:$src1),
+ (loadv8i16 addr:$src2))))]>;
+def PACKSSDWrr : PDI<0x6B, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
+ VR128:$src2),
+ "packsswb {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v8i16 (int_x86_sse2_packssdw_128
+ (v4i32 VR128:$src1),
+ (v4i32 VR128:$src2))))]>;
+def PACKSSDWrm : PDI<0x6B, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
+ i128mem:$src2),
+ "packsswb {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v8i16 (int_x86_sse2_packssdw_128
+ (v4i32 VR128:$src1),
+ (loadv4i32 addr:$src2))))]>;
+def PACKUSWBrr : PDI<0x67, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
+ VR128:$src2),
+ "packuswb {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v16i8 (int_x86_sse2_packuswb_128
+ (v8i16 VR128:$src1),
+ (v8i16 VR128:$src2))))]>;
+def PACKUSWBrm : PDI<0x67, MRMSrcReg, (ops VR128:$dst, VR128:$src1,
+ i128mem:$src2),
+ "packuswb {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v16i8 (int_x86_sse2_packuswb_128
+ (v8i16 VR128:$src1),
+ (loadv8i16 addr:$src2))))]>;
+}
+
+// Shuffle and unpack instructions
+def PSHUFWrr : PSIi8<0x70, MRMDestReg,
+ (ops VR64:$dst, VR64:$src1, i8imm:$src2),
+ "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
+def PSHUFWrm : PSIi8<0x70, MRMSrcMem,
+ (ops VR64:$dst, i64mem:$src1, i8imm:$src2),
+ "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
+
+def PSHUFDrr : PDIi8<0x70, MRMDestReg,
+ (ops VR128:$dst, VR128:$src1, i8imm:$src2),
+ "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v4i32 (vector_shuffle
+ VR128:$src1, (undef),
+ PSHUFD_shuffle_mask:$src2)))]>;
+def PSHUFDrm : PDIi8<0x70, MRMSrcMem,
+ (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
+ "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v4i32 (vector_shuffle
+ (load addr:$src1), (undef),
+ PSHUFD_shuffle_mask:$src2)))]>;
+
+// SSE2 with ImmT == Imm8 and XS prefix.
+def PSHUFHWrr : Ii8<0x70, MRMDestReg,
+ (ops VR128:$dst, VR128:$src1, i8imm:$src2),
+ "pshufhw {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v8i16 (vector_shuffle
+ VR128:$src1, (undef),
+ PSHUFHW_shuffle_mask:$src2)))]>,
+ XS, Requires<[HasSSE2]>;
+def PSHUFHWrm : Ii8<0x70, MRMDestMem,
+ (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
+ "pshufhw {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v8i16 (vector_shuffle
+ (bc_v8i16 (loadv2i64 addr:$src1)), (undef),
+ PSHUFHW_shuffle_mask:$src2)))]>,
+ XS, Requires<[HasSSE2]>;
+
+// SSE2 with ImmT == Imm8 and XD prefix.
+def PSHUFLWrr : Ii8<0x70, MRMDestReg,
+ (ops VR128:$dst, VR128:$src1, i32i8imm:$src2),
+ "pshufLw {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v8i16 (vector_shuffle
+ VR128:$src1, (undef),
+ PSHUFLW_shuffle_mask:$src2)))]>,
+ XD, Requires<[HasSSE2]>;
+def PSHUFLWrm : Ii8<0x70, MRMDestMem,
+ (ops VR128:$dst, i128mem:$src1, i32i8imm:$src2),
+ "pshufLw {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v8i16 (vector_shuffle
+ (bc_v8i16 (loadv2i64 addr:$src1)), (undef),
+ PSHUFLW_shuffle_mask:$src2)))]>,
+ XD, Requires<[HasSSE2]>;
+
+let isTwoAddress = 1 in {
def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2),
"punpcklbw {$src2, $dst|$dst, $src2}",
@@ -1355,6 +1507,29 @@
// bit_convert
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>,
Requires<[HasSSE2]>;
+def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>,
+ Requires<[HasSSE2]>;
+def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>,
+ Requires<[HasSSE2]>;
+def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>,
+ Requires<[HasSSE2]>;
+def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>,
+ Requires<[HasSSE2]>;
+def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>,
+ Requires<[HasSSE2]>;
+def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>,
+ Requires<[HasSSE2]>;
+def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v4i32 VR128:$src)>,
+ Requires<[HasSSE2]>;
+def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>,
+ Requires<[HasSSE2]>;
+def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>,
+ Requires<[HasSSE2]>;
+def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v4i32 VR128:$src)>,
+ Requires<[HasSSE2]>;
+def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>,
+ Requires<[HasSSE2]>;
+
def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>,
Requires<[HasSSE2]>;
def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>,
More information about the llvm-commits
mailing list