[llvm] r189189 - AVX-512: added UNPACK instructions and tests for all-zero/all-ones vectors
Elena Demikhovsky
elena.demikhovsky at intel.com
Sun Aug 25 05:54:30 PDT 2013
Author: delena
Date: Sun Aug 25 07:54:30 2013
New Revision: 189189
URL: http://llvm.org/viewvc/llvm-project?rev=189189&view=rev
Log:
AVX-512: added UNPACK instructions and tests for all-zero/all-ones vectors
Added:
llvm/trunk/test/CodeGen/X86/avx512-build-vector.ll
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/avx512-shuffle.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=189189&r1=189188&r2=189189&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Aug 25 07:54:30 2013
@@ -3866,37 +3866,46 @@ SDValue Compact8x32ShuffleNode(ShuffleVe
static bool isUNPCKLMask(ArrayRef<int> Mask, MVT VT,
bool HasInt256, bool V2IsSplat = false) {
- if (VT.is512BitVector())
- return false;
- assert((VT.is128BitVector() || VT.is256BitVector()) &&
- "Unsupported vector type for unpckh");
+ assert(VT.getSizeInBits() >= 128 &&
+ "Unsupported vector type for unpckl");
+ // AVX defines UNPCK* to operate independently on 128-bit lanes.
+ unsigned NumLanes;
+ unsigned NumOf256BitLanes;
unsigned NumElts = VT.getVectorNumElements();
- if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
- (!HasInt256 || (NumElts != 16 && NumElts != 32)))
+ if (VT.is256BitVector()) {
+ if (NumElts != 4 && NumElts != 8 &&
+ (!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
+ NumLanes = 2;
+ NumOf256BitLanes = 1;
+ } else if (VT.is512BitVector()) {
+ assert(VT.getScalarType().getSizeInBits() >= 32 &&
+ "Unsupported vector type for unpckh");
+ NumLanes = 2;
+ NumOf256BitLanes = 2;
+ } else {
+ NumLanes = 1;
+ NumOf256BitLanes = 1;
+ }
- // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
- // independently on 128-bit lanes.
- unsigned NumLanes = VT.getSizeInBits()/128;
- unsigned NumLaneElts = NumElts/NumLanes;
+ unsigned NumEltsInStride = NumElts/NumOf256BitLanes;
+ unsigned NumLaneElts = NumEltsInStride/NumLanes;
- for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
- for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) {
- int BitI = Mask[l+i];
- int BitI1 = Mask[l+i+1];
- if (!isUndefOrEqual(BitI, j))
- return false;
- if (V2IsSplat) {
- if (!isUndefOrEqual(BitI1, NumElts))
+ for (unsigned l256 = 0; l256 < NumOf256BitLanes; l256 += 1) {
+ for (unsigned l = 0; l != NumEltsInStride; l += NumLaneElts) {
+ for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) {
+ int BitI = Mask[l256*NumEltsInStride+l+i];
+ int BitI1 = Mask[l256*NumEltsInStride+l+i+1];
+ if (!isUndefOrEqual(BitI, j+l256*NumElts))
return false;
- } else {
- if (!isUndefOrEqual(BitI1, j + NumElts))
+ if (V2IsSplat && !isUndefOrEqual(BitI1, NumElts))
+ return false;
+ if (!isUndefOrEqual(BitI1, j+l256*NumElts+NumEltsInStride))
return false;
}
}
}
-
return true;
}
@@ -3904,33 +3913,42 @@ static bool isUNPCKLMask(ArrayRef<int> M
/// specifies a shuffle of elements that is suitable for input to UNPCKH.
static bool isUNPCKHMask(ArrayRef<int> Mask, MVT VT,
bool HasInt256, bool V2IsSplat = false) {
- unsigned NumElts = VT.getVectorNumElements();
-
- if (VT.is512BitVector())
- return false;
- assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ assert(VT.getSizeInBits() >= 128 &&
"Unsupported vector type for unpckh");
- if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
- (!HasInt256 || (NumElts != 16 && NumElts != 32)))
+ // AVX defines UNPCK* to operate independently on 128-bit lanes.
+ unsigned NumLanes;
+ unsigned NumOf256BitLanes;
+ unsigned NumElts = VT.getVectorNumElements();
+ if (VT.is256BitVector()) {
+ if (NumElts != 4 && NumElts != 8 &&
+ (!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
+ NumLanes = 2;
+ NumOf256BitLanes = 1;
+ } else if (VT.is512BitVector()) {
+ assert(VT.getScalarType().getSizeInBits() >= 32 &&
+ "Unsupported vector type for unpckh");
+ NumLanes = 2;
+ NumOf256BitLanes = 2;
+ } else {
+ NumLanes = 1;
+ NumOf256BitLanes = 1;
+ }
- // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
- // independently on 128-bit lanes.
- unsigned NumLanes = VT.getSizeInBits()/128;
- unsigned NumLaneElts = NumElts/NumLanes;
+ unsigned NumEltsInStride = NumElts/NumOf256BitLanes;
+ unsigned NumLaneElts = NumEltsInStride/NumLanes;
- for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
- for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) {
- int BitI = Mask[l+i];
- int BitI1 = Mask[l+i+1];
- if (!isUndefOrEqual(BitI, j))
- return false;
- if (V2IsSplat) {
- if (isUndefOrEqual(BitI1, NumElts))
+ for (unsigned l256 = 0; l256 < NumOf256BitLanes; l256 += 1) {
+ for (unsigned l = 0; l != NumEltsInStride; l += NumLaneElts) {
+ for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) {
+ int BitI = Mask[l256*NumEltsInStride+l+i];
+ int BitI1 = Mask[l256*NumEltsInStride+l+i+1];
+ if (!isUndefOrEqual(BitI, j+l256*NumElts))
return false;
- } else {
- if (!isUndefOrEqual(BitI1, j+NumElts))
+ if (V2IsSplat && !isUndefOrEqual(BitI1, NumElts))
+ return false;
+ if (!isUndefOrEqual(BitI1, j+l256*NumElts+NumEltsInStride))
return false;
}
}
@@ -4336,7 +4354,7 @@ bool X86::isVEXTRACT256Index(SDNode *N)
static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
MVT VT = N->getSimpleValueType(0);
- assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ assert((VT.getSizeInBits() >= 128) &&
"Unsupported vector type for PSHUF/SHUFP");
// Handle 128 and 256-bit vector lengths. AVX defines PSHUF/SHUFP to operate
@@ -4345,10 +4363,10 @@ static unsigned getShuffleSHUFImmediate(
unsigned NumLanes = VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
- assert((NumLaneElts == 2 || NumLaneElts == 4) &&
- "Only supports 2 or 4 elements per lane");
+ assert((NumLaneElts == 2 || NumLaneElts == 4 || NumLaneElts == 8) &&
+ "Only supports 2, 4 or 8 elements per lane");
- unsigned Shift = (NumLaneElts == 4) ? 1 : 0;
+ unsigned Shift = (NumLaneElts >= 4) ? 1 : 0;
unsigned Mask = 0;
for (unsigned i = 0; i != NumElts; ++i) {
int Elt = N->getMaskElt(i);
@@ -4680,6 +4698,11 @@ static SDValue getZeroVector(EVT VT, con
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops,
array_lengthof(Ops));
}
+ } else if (VT.is512BitVector()) { // AVX-512
+ SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
+ Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops, 16);
} else
llvm_unreachable("Unexpected vector type");
@@ -5674,10 +5697,13 @@ X86TargetLowering::LowerBUILD_VECTORvXi1
DAG.getIntPtrConstant(0));
}
- if (!isSplatVector(Op.getNode()))
- llvm_unreachable("Unsupported predicate operation");
-
+ // Splat vector (with undefs)
SDValue In = Op.getOperand(0);
+ for (unsigned i = 1, e = Op.getNumOperands(); i != e; ++i) {
+ if (Op.getOperand(i) != In && Op.getOperand(i).getOpcode() != ISD::UNDEF)
+ llvm_unreachable("Unsupported predicate operation");
+ }
+
SDValue EFLAGS, X86CC;
if (In.getOpcode() == ISD::SETCC) {
SDValue Op0 = In.getOperand(0);
@@ -5759,7 +5785,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDV
if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasInt256()))
return Op;
- return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl);
+ if (!VT.is512BitVector())
+ return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl);
}
SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG);
@@ -5841,7 +5868,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDV
if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
(ExtVT == MVT::i64 && Subtarget->is64Bit())) {
- if (VT.is256BitVector()) {
+ if (VT.is256BitVector() || VT.is512BitVector()) {
SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
Item, DAG.getIntPtrConstant(0));
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=189189&r1=189188&r2=189189&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sun Aug 25 07:54:30 2013
@@ -1561,6 +1561,68 @@ def : Pat<(v8i64 (X86pmuludq (v16i32 VR5
(VPMULUDQZrr VR512:$src1, VR512:$src2)>;
//===----------------------------------------------------------------------===//
+// AVX-512 - Unpack Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_unpack_fp<bits<8> opc, SDNode OpNode, ValueType vt,
+ PatFrag mem_frag, RegisterClass RC,
+ X86MemOperand x86memop, string asm,
+ Domain d> {
+ def rr : AVX512PI<opc, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ asm, [(set RC:$dst,
+ (vt (OpNode RC:$src1, RC:$src2)))],
+ d>, EVEX_4V, TB;
+ def rm : AVX512PI<opc, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ asm, [(set RC:$dst,
+ (vt (OpNode RC:$src1,
+ (bitconvert (mem_frag addr:$src2)))))],
+ d>, EVEX_4V, TB;
+}
+
+defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64,
+ VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64,
+ VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64,
+ VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64,
+ VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+multiclass avx512_unpack_int<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop> {
+ def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
+ IIC_SSE_UNPCK>, EVEX_4V;
+ def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1),
+ (bitconvert (memop_frag addr:$src2)))))],
+ IIC_SSE_UNPCK>, EVEX_4V;
+}
+defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32,
+ VR512, memopv16i32, i512mem>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64,
+ VR512, memopv8i64, i512mem>, EVEX_V512,
+ VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32,
+ VR512, memopv16i32, i512mem>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64,
+ VR512, memopv8i64, i512mem>, EVEX_V512,
+ VEX_W, EVEX_CD8<64, CD8VF>;
+
+//===----------------------------------------------------------------------===//
// AVX-512 Logical Instructions
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=189189&r1=189188&r2=189189&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sun Aug 25 07:54:30 2013
@@ -2939,7 +2939,6 @@ static unsigned CopyToFromAsymmetricReg(
if (X86::FR32XRegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg))
// Copy from a GR32 register to a FR32 register.
return HasAVX512 ? X86::VMOVDI2SSZrr : (HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr);
-
return 0;
}
@@ -3781,6 +3780,8 @@ bool X86InstrInfo::expandPostRAPseudo(Ma
case X86::AVX_SET0:
assert(HasAVX && "AVX not supported");
return Expand2AddrUndef(MIB, get(X86::VXORPSYrr));
+ case X86::AVX512_512_SET0:
+ return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
case X86::V_SETALLONES:
return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
case X86::AVX2_SETALLONES:
@@ -3788,6 +3789,9 @@ bool X86InstrInfo::expandPostRAPseudo(Ma
case X86::TEST8ri_NOREX:
MI->setDesc(get(X86::TEST8ri));
return true;
+ case X86::KSET0W: return Expand2AddrUndef(MIB, get(X86::KXORWrr));
+ case X86::KSET1B:
+ case X86::KSET1W: return Expand2AddrUndef(MIB, get(X86::KXNORWrr));
}
return false;
}
Added: llvm/trunk/test/CodeGen/X86/avx512-build-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-build-vector.ll?rev=189189&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-build-vector.ll (added)
+++ llvm/trunk/test/CodeGen/X86/avx512-build-vector.ll Sun Aug 25 07:54:30 2013
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+; CHECK-LABEL: test1
+; CHECK: vpxord
+; CHECK: ret
+define <16 x i32> @test1(i32* %x) {
+ %y = load i32* %x, align 4
+ %res = insertelement <16 x i32>zeroinitializer, i32 %y, i32 4
+ ret <16 x i32>%res
+}
+
+; CHECK-LABEL: test2
+; CHECK: vpaddd LCP{{.*}}(%rip){1to16}
+; CHECK: ret
+define <16 x i32> @test2(<16 x i32> %x) {
+ %res = add <16 x i32><i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, %x
+ ret <16 x i32>%res
+}
\ No newline at end of file
Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffle.ll?rev=189189&r1=189188&r2=189189&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-shuffle.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-shuffle.ll Sun Aug 25 07:54:30 2013
@@ -16,7 +16,7 @@
; CHECK: .long 0
; CHECK: .long 10
; CHECK: .long 1
-; CHECK: test1:
+; CHECK-LABEL: test1:
; CHECK: vpermps
; CHECK: ret
define <16 x float> @test1(<16 x float> %a) nounwind {
@@ -24,7 +24,7 @@ define <16 x float> @test1(<16 x float>
ret <16 x float> %c
}
-; CHECK: test2:
+; CHECK-LABEL: test2:
; CHECK: vpermd
; CHECK: ret
define <16 x i32> @test2(<16 x i32> %a) nounwind {
@@ -32,7 +32,7 @@ define <16 x i32> @test2(<16 x i32> %a)
ret <16 x i32> %c
}
-; CHECK: test3:
+; CHECK-LABEL: test3:
; CHECK: vpermq
; CHECK: ret
define <8 x i64> @test3(<8 x i64> %a) nounwind {
@@ -40,7 +40,7 @@ define <8 x i64> @test3(<8 x i64> %a) no
ret <8 x i64> %c
}
-; CHECK: test4:
+; CHECK-LABEL: test4:
; CHECK: vpermpd
; CHECK: ret
define <8 x double> @test4(<8 x double> %a) nounwind {
@@ -48,7 +48,7 @@ define <8 x double> @test4(<8 x double>
ret <8 x double> %c
}
-; CHECK: test5:
+; CHECK-LABEL: test5:
; CHECK: vpermi2pd
; CHECK: ret
define <8 x double> @test5(<8 x double> %a, <8 x double> %b) nounwind {
@@ -56,7 +56,7 @@ define <8 x double> @test5(<8 x double>
ret <8 x double> %c
}
-; CHECK: test6:
+; CHECK-LABEL: test6:
; CHECK: vpermq $30
; CHECK: ret
define <8 x i64> @test6(<8 x i64> %a) nounwind {
@@ -64,7 +64,7 @@ define <8 x i64> @test6(<8 x i64> %a) no
ret <8 x i64> %c
}
-; CHECK: test7:
+; CHECK-LABEL: test7:
; CHECK: vpermi2q
; CHECK: ret
define <8 x i64> @test7(<8 x i64> %a, <8 x i64> %b) nounwind {
@@ -72,7 +72,7 @@ define <8 x i64> @test7(<8 x i64> %a, <8
ret <8 x i64> %c
}
-; CHECK: test8:
+; CHECK-LABEL: test8:
; CHECK: vpermi2d
; CHECK: ret
define <16 x i32> @test8(<16 x i32> %a, <16 x i32> %b) nounwind {
@@ -80,7 +80,7 @@ define <16 x i32> @test8(<16 x i32> %a,
ret <16 x i32> %c
}
-; CHECK: test9:
+; CHECK-LABEL: test9:
; CHECK: vpermi2ps
; CHECK: ret
define <16 x float> @test9(<16 x float> %a, <16 x float> %b) nounwind {
@@ -88,7 +88,7 @@ define <16 x float> @test9(<16 x float>
ret <16 x float> %c
}
-; CHECK: test10:
+; CHECK-LABEL: test10:
; CHECK: vpermi2ps (
; CHECK: ret
define <16 x float> @test10(<16 x float> %a, <16 x float>* %b) nounwind {
@@ -97,7 +97,7 @@ define <16 x float> @test10(<16 x float>
ret <16 x float> %d
}
-; CHECK: test11:
+; CHECK-LABEL: test11:
; CHECK: vpermi2d (
; CHECK: ret
define <16 x i32> @test11(<16 x i32> %a, <16 x i32>* %b) nounwind {
@@ -105,3 +105,36 @@ define <16 x i32> @test11(<16 x i32> %a,
%d = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
ret <16 x i32> %d
}
+
+; CHECK-LABEL: test18
+; CHECK: vpunpckhdq %zmm
+; CHECK: ret
+define <16 x i32> @test18(<16 x i32> %a, <16 x i32> %c) {
+ %b = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32><i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15, i32 18, i32 26, i32 19, i32 27, i32 22, i32 30, i32 23, i32 31>
+ ret <16 x i32> %b
+}
+
+; CHECK-LABEL: test19
+; CHECK: vpunpckldq %zmm
+; CHECK: ret
+define <16 x i32> @test19(<16 x i32> %a, <16 x i32> %c) {
+ %b = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32><i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13, i32 16, i32 24, i32 17, i32 25, i32 20, i32 28, i32 21, i32 29>
+ ret <16 x i32> %b
+}
+
+; CHECK-LABEL: test20
+; CHECK: vpunpckhqdq %zmm
+; CHECK: ret
+define <8 x i64> @test20(<8 x i64> %a, <8 x i64> %c) {
+ %b = shufflevector <8 x i64> %a, <8 x i64> %c, <8 x i32><i32 1, i32 5, i32 3, i32 7, i32 9, i32 13, i32 11, i32 15>
+ ret <8 x i64> %b
+}
+
+; CHECK-LABEL: test21
+; CHECK: vunpcklps %zmm
+; CHECK: ret
+define <16 x float> @test21(<16 x float> %a, <16 x float> %c) {
+ %b = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32><i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13, i32 16, i32 24, i32 17, i32 25, i32 20, i32 28, i32 21, i32 29>
+ ret <16 x float> %b
+}
+
More information about the llvm-commits
mailing list