[llvm] r287621 - [AVX-512] Add support for commuting VPERMT2(B/W/D/Q/PS/PD) to/from VPERMI2(B/W/D/Q/PS/PD).
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 21 20:57:35 PST 2016
Author: ctopper
Date: Mon Nov 21 22:57:34 2016
New Revision: 287621
URL: http://llvm.org/viewvc/llvm-project?rev=287621&view=rev
Log:
[AVX-512] Add support for commuting VPERMT2(B/W/D/Q/PS/PD) to/from VPERMI2(B/W/D/Q/PS/PD).
Summary:
The index and one of the table operands can be swapped by changing the opcode to the other version. Neither of these operands are the one that can load from memory so this can't be used to increase memory folding opportunities.
We need to handle the unmasked forms and the kz forms. Since the load operand isn't being commuted we can commute the load and broadcast instructions too.
Reviewers: igorb, delena, Ayal, Farhana, RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D25652
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
llvm/trunk/test/CodeGen/X86/avx512-vpermv3-commute.ll
llvm/trunk/test/CodeGen/X86/avx512vbmi-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512vbmivl-intrinsics.ll
llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=287621&r1=287620&r2=287621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Mon Nov 21 22:57:34 2016
@@ -1352,14 +1352,14 @@ let Constraints = "$src1 = $dst", ExeDom
defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2, _.RC:$src3))>, EVEX_4V,
+ (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2, _.RC:$src3)), 1>, EVEX_4V,
AVX5128IBase;
defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2,
- (_.VT (bitconvert (_.LdFrag addr:$src3)))))>,
+ (_.VT (bitconvert (_.LdFrag addr:$src3))))), 1>,
EVEX_4V, AVX5128IBase;
}
}
@@ -1371,8 +1371,8 @@ multiclass avx512_perm_i_mb<bits<8> opc,
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
(_.VT (X86VPermi2X _.RC:$src1,
- _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>,
- AVX5128IBase, EVEX_4V, EVEX_B;
+ _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
+ 1>, AVX5128IBase, EVEX_4V, EVEX_B;
}
multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
@@ -1420,14 +1420,14 @@ let Constraints = "$src1 = $dst", ExeDom
defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins IdxVT.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3))>, EVEX_4V,
- AVX5128IBase;
+ (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
+ EVEX_4V, AVX5128IBase;
defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins IdxVT.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
- (bitconvert (_.LdFrag addr:$src3))))>,
+ (bitconvert (_.LdFrag addr:$src3)))), 1>,
EVEX_4V, AVX5128IBase;
}
}
@@ -1439,8 +1439,8 @@ multiclass avx512_perm_t_mb<bits<8> opc,
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
(_.VT (X86VPermt2 _.RC:$src1,
- IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>,
- AVX5128IBase, EVEX_4V, EVEX_B;
+ IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
+ 1>, AVX5128IBase, EVEX_4V, EVEX_B;
}
multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=287621&r1=287620&r2=287621&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Mon Nov 21 22:57:34 2016
@@ -3533,6 +3533,92 @@ static bool commuteVPTERNLOG(MachineInst
return true;
}
+// Returns true if this is a VPERMI2 or VPERMT2 instrution that can be
+// commuted.
+static bool isCommutableVPERMV3Instruction(unsigned Opcode) {
+#define VPERM_CASES(Suffix) \
+ case X86::VPERMI2##Suffix##128rr: case X86::VPERMT2##Suffix##128rr: \
+ case X86::VPERMI2##Suffix##256rr: case X86::VPERMT2##Suffix##256rr: \
+ case X86::VPERMI2##Suffix##rr: case X86::VPERMT2##Suffix##rr: \
+ case X86::VPERMI2##Suffix##128rm: case X86::VPERMT2##Suffix##128rm: \
+ case X86::VPERMI2##Suffix##256rm: case X86::VPERMT2##Suffix##256rm: \
+ case X86::VPERMI2##Suffix##rm: case X86::VPERMT2##Suffix##rm: \
+ case X86::VPERMI2##Suffix##128rrkz: case X86::VPERMT2##Suffix##128rrkz: \
+ case X86::VPERMI2##Suffix##256rrkz: case X86::VPERMT2##Suffix##256rrkz: \
+ case X86::VPERMI2##Suffix##rrkz: case X86::VPERMT2##Suffix##rrkz: \
+ case X86::VPERMI2##Suffix##128rmkz: case X86::VPERMT2##Suffix##128rmkz: \
+ case X86::VPERMI2##Suffix##256rmkz: case X86::VPERMT2##Suffix##256rmkz: \
+ case X86::VPERMI2##Suffix##rmkz: case X86::VPERMT2##Suffix##rmkz:
+
+#define VPERM_CASES_BROADCAST(Suffix) \
+ VPERM_CASES(Suffix) \
+ case X86::VPERMI2##Suffix##128rmb: case X86::VPERMT2##Suffix##128rmb: \
+ case X86::VPERMI2##Suffix##256rmb: case X86::VPERMT2##Suffix##256rmb: \
+ case X86::VPERMI2##Suffix##rmb: case X86::VPERMT2##Suffix##rmb: \
+ case X86::VPERMI2##Suffix##128rmbkz: case X86::VPERMT2##Suffix##128rmbkz: \
+ case X86::VPERMI2##Suffix##256rmbkz: case X86::VPERMT2##Suffix##256rmbkz: \
+ case X86::VPERMI2##Suffix##rmbkz: case X86::VPERMT2##Suffix##rmbkz:
+
+ switch (Opcode) {
+ default: return false;
+ VPERM_CASES(B)
+ VPERM_CASES_BROADCAST(D)
+ VPERM_CASES_BROADCAST(PD)
+ VPERM_CASES_BROADCAST(PS)
+ VPERM_CASES_BROADCAST(Q)
+ VPERM_CASES(W)
+ return true;
+ }
+#undef VPERM_CASES_BROADCAST
+#undef VPERM_CASES
+}
+
+// Returns commuted opcode for VPERMI2 and VPERMT2 instructions by switching
+// from the I opcod to the T opcode and vice versa.
+static unsigned getCommutedVPERMV3Opcode(unsigned Opcode) {
+#define VPERM_CASES(Orig, New) \
+ case X86::Orig##128rr: return X86::New##128rr; \
+ case X86::Orig##128rrkz: return X86::New##128rrkz; \
+ case X86::Orig##128rm: return X86::New##128rm; \
+ case X86::Orig##128rmkz: return X86::New##128rmkz; \
+ case X86::Orig##256rr: return X86::New##256rr; \
+ case X86::Orig##256rrkz: return X86::New##256rrkz; \
+ case X86::Orig##256rm: return X86::New##256rm; \
+ case X86::Orig##256rmkz: return X86::New##256rmkz; \
+ case X86::Orig##rr: return X86::New##rr; \
+ case X86::Orig##rrkz: return X86::New##rrkz; \
+ case X86::Orig##rm: return X86::New##rm; \
+ case X86::Orig##rmkz: return X86::New##rmkz;
+
+#define VPERM_CASES_BROADCAST(Orig, New) \
+ VPERM_CASES(Orig, New) \
+ case X86::Orig##128rmb: return X86::New##128rmb; \
+ case X86::Orig##128rmbkz: return X86::New##128rmbkz; \
+ case X86::Orig##256rmb: return X86::New##256rmb; \
+ case X86::Orig##256rmbkz: return X86::New##256rmbkz; \
+ case X86::Orig##rmb: return X86::New##rmb; \
+ case X86::Orig##rmbkz: return X86::New##rmbkz;
+
+ switch (Opcode) {
+ VPERM_CASES(VPERMI2B, VPERMT2B)
+ VPERM_CASES_BROADCAST(VPERMI2D, VPERMT2D)
+ VPERM_CASES_BROADCAST(VPERMI2PD, VPERMT2PD)
+ VPERM_CASES_BROADCAST(VPERMI2PS, VPERMT2PS)
+ VPERM_CASES_BROADCAST(VPERMI2Q, VPERMT2Q)
+ VPERM_CASES(VPERMI2W, VPERMT2W)
+ VPERM_CASES(VPERMT2B, VPERMI2B)
+ VPERM_CASES_BROADCAST(VPERMT2D, VPERMI2D)
+ VPERM_CASES_BROADCAST(VPERMT2PD, VPERMI2PD)
+ VPERM_CASES_BROADCAST(VPERMT2PS, VPERMI2PS)
+ VPERM_CASES_BROADCAST(VPERMT2Q, VPERMI2Q)
+ VPERM_CASES(VPERMT2W, VPERMI2W)
+ }
+
+ llvm_unreachable("Unreachable!");
+#undef VPERM_CASES_BROADCAST
+#undef VPERM_CASES
+}
+
MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
unsigned OpIdx1,
unsigned OpIdx2) const {
@@ -3854,7 +3940,15 @@ MachineInstr *X86InstrInfo::commuteInstr
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
- default:
+ default: {
+ if (isCommutableVPERMV3Instruction(MI.getOpcode())) {
+ unsigned Opc = getCommutedVPERMV3Opcode(MI.getOpcode());
+ auto &WorkingMI = cloneIfNew(MI);
+ WorkingMI.setDesc(get(Opc));
+ return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+ OpIdx1, OpIdx2);
+ }
+
const X86InstrFMA3Group *FMA3Group =
X86InstrFMA3Info::getFMA3Group(MI.getOpcode());
if (FMA3Group) {
@@ -3870,6 +3964,7 @@ MachineInstr *X86InstrInfo::commuteInstr
return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
}
+ }
}
bool X86InstrInfo::findFMA3CommutedOpIndices(
@@ -4041,12 +4136,26 @@ bool X86InstrInfo::findCommutedOpIndices
// Handled masked instructions since we need to skip over the mask input
// and the preserved input.
if (Desc.TSFlags & X86II::EVEX_K) {
+ // First assume that the first input is the mask operand and skip past it.
unsigned CommutableOpIdx1 = Desc.getNumDefs() + 1;
- // If there is no preserved input we only need to skip 1 operand.
- if (MI.getDesc().getOperandConstraint(Desc.getNumDefs(),
- MCOI::TIED_TO) != -1)
- ++CommutableOpIdx1;
- unsigned CommutableOpIdx2 = CommutableOpIdx1 + 1;
+ unsigned CommutableOpIdx2 = Desc.getNumDefs() + 2;
+ // Check if the first input is tied. If there isn't one then we only
+ // need to skip the mask operand which we did above.
+ if ((MI.getDesc().getOperandConstraint(Desc.getNumDefs(),
+ MCOI::TIED_TO) != -1)) {
+ // If this is zero masking instruction with a tied operand, we need to
+ // move the first index back to the first input since this must
+ // be a 3 input instruction and we want the first two non-mask inputs.
+ // Otherwise this is a 2 input instruction with a preserved input and
+ // mask, so we need to move the indices to skip one more input.
+ if (Desc.TSFlags & X86II::EVEX_Z)
+ --CommutableOpIdx1;
+ else {
+ ++CommutableOpIdx1;
+ ++CommutableOpIdx2;
+ }
+ }
+
if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
CommutableOpIdx1, CommutableOpIdx2))
return false;
Modified: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll?rev=287621&r1=287620&r2=287621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll Mon Nov 21 22:57:34 2016
@@ -369,8 +369,8 @@ define i16 @test16(i1 *%addr, i16 %a) {
; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k2} {z}
; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,16,11,12,13,14,15]
-; KNL-NEXT: vpermt2d %zmm0, %zmm2, %zmm1
-; KNL-NEXT: vpslld $31, %zmm1, %zmm0
+; KNL-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; KNL-NEXT: vpslld $31, %zmm2, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: retq
@@ -384,8 +384,8 @@ define i16 @test16(i1 *%addr, i16 %a) {
; SKX-NEXT: vpmovm2d %k1, %zmm0
; SKX-NEXT: vpmovm2d %k0, %zmm1
; SKX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,16,11,12,13,14,15]
-; SKX-NEXT: vpermt2d %zmm1, %zmm2, %zmm0
-; SKX-NEXT: vpmovd2m %zmm0, %k0
+; SKX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; SKX-NEXT: vpmovd2m %zmm2, %k0
; SKX-NEXT: kmovw %k0, %eax
; SKX-NEXT: retq
%x = load i1 , i1 * %addr, align 128
@@ -406,8 +406,8 @@ define i8 @test17(i1 *%addr, i8 %a) {
; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k2} {z}
; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,8,5,6,7]
-; KNL-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; KNL-NEXT: vpsllq $63, %zmm1, %zmm0
+; KNL-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; KNL-NEXT: vpsllq $63, %zmm2, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: retq
@@ -421,8 +421,8 @@ define i8 @test17(i1 *%addr, i8 %a) {
; SKX-NEXT: vpmovm2q %k1, %zmm0
; SKX-NEXT: vpmovm2q %k0, %zmm1
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,8,5,6,7]
-; SKX-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
-; SKX-NEXT: vpmovq2m %zmm0, %k0
+; SKX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; SKX-NEXT: vpmovq2m %zmm2, %k0
; SKX-NEXT: kmovb %k0, %eax
; SKX-NEXT: retq
%x = load i1 , i1 * %addr, align 128
@@ -1217,8 +1217,8 @@ define i32 @test_insertelement_v32i1(i32
; SKX-NEXT: vpmovm2w %k1, %zmm0
; SKX-NEXT: vpmovm2w %k0, %zmm1
; SKX-NEXT: vmovdqu16 {{.*#+}} zmm2 = [0,1,2,3,32,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
-; SKX-NEXT: vpermt2w %zmm1, %zmm2, %zmm0
-; SKX-NEXT: vpmovw2m %zmm0, %k0
+; SKX-NEXT: vpermi2w %zmm1, %zmm0, %zmm2
+; SKX-NEXT: vpmovw2m %zmm2, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: retq
%cmp_res_i1 = icmp ult i32 %a, %b
@@ -1249,14 +1249,14 @@ define i8 @test_iinsertelement_v4i1(i32
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: vmovdqa64 %zmm1, %zmm3 {%k2} {z}
; KNL-NEXT: vmovdqa64 {{.*#+}} zmm4 = [0,8,2,3,4,5,6,7]
-; KNL-NEXT: vpermt2q %zmm2, %zmm4, %zmm3
-; KNL-NEXT: vpsllq $63, %zmm3, %zmm2
+; KNL-NEXT: vpermi2q %zmm2, %zmm3, %zmm4
+; KNL-NEXT: vpsllq $63, %zmm4, %zmm2
; KNL-NEXT: vptestmq %zmm2, %zmm2, %k2
; KNL-NEXT: vmovdqa64 %zmm1, %zmm2 {%k2} {z}
; KNL-NEXT: vmovdqa64 %zmm1, %zmm3 {%k1} {z}
; KNL-NEXT: vmovdqa64 {{.*#+}} zmm4 = [0,1,8,3,4,5,6,7]
-; KNL-NEXT: vpermt2q %zmm3, %zmm4, %zmm2
-; KNL-NEXT: vpsllq $63, %zmm2, %zmm2
+; KNL-NEXT: vpermi2q %zmm3, %zmm2, %zmm4
+; KNL-NEXT: vpsllq $63, %zmm4, %zmm2
; KNL-NEXT: vptestmq %zmm2, %zmm2, %k1
; KNL-NEXT: vmovdqa64 %zmm1, %zmm2 {%k1} {z}
; KNL-NEXT: vpextrd $3, %xmm0, %eax
@@ -1264,8 +1264,8 @@ define i8 @test_iinsertelement_v4i1(i32
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z}
; KNL-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,2,8,4,5,6,7]
-; KNL-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
-; KNL-NEXT: vpsllq $63, %zmm2, %zmm0
+; KNL-NEXT: vpermi2q %zmm0, %zmm2, %zmm1
+; KNL-NEXT: vpsllq $63, %zmm1, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: retq
@@ -1310,8 +1310,8 @@ define i8 @test_iinsertelement_v2i1(i32
; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k2} {z}
; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; KNL-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; KNL-NEXT: vpsllq $63, %zmm1, %zmm0
+; KNL-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; KNL-NEXT: vpsllq $63, %zmm2, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=287621&r1=287620&r2=287621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Mon Nov 21 22:57:34 2016
@@ -643,8 +643,8 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} {z}
; KNL-NEXT: vmovdqa64 %zmm0, %zmm2 {%k2} {z}
; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,2,3,4,5,8,7]
-; KNL-NEXT: vpermt2q %zmm2, %zmm3, %zmm1
-; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
+; KNL-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; KNL-NEXT: vpsllq $63, %zmm3, %zmm1
; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: kshiftrw $1, %k1, %k1
@@ -665,8 +665,8 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
; SKX-NEXT: vpmovm2q %k0, %zmm0
; SKX-NEXT: vpmovm2q %k1, %zmm1
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7]
-; SKX-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
-; SKX-NEXT: vpmovq2m %zmm0, %k0
+; SKX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; SKX-NEXT: vpmovq2m %zmm2, %k0
; SKX-NEXT: kshiftlb $1, %k0, %k0
; SKX-NEXT: kshiftrb $1, %k0, %k0
; SKX-NEXT: kshiftlb $7, %k2, %k1
Modified: llvm/trunk/test/CodeGen/X86/avx512-vpermv3-commute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-vpermv3-commute.ll?rev=287621&r1=287620&r2=287621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-vpermv3-commute.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-vpermv3-commute.ll Mon Nov 21 22:57:34 2016
@@ -8,8 +8,7 @@ declare <16 x i32> @llvm.x86.avx512.mask
define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpermi2d (%rdi), %zmm0, %zmm1
-; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: vpermt2d (%rdi), %zmm1, %zmm0
; CHECK-NEXT: retq
%x2 = load <16 x i32>, <16 x i32>* %x2p
%res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
@@ -21,8 +20,7 @@ declare <8 x double> @llvm.x86.avx512.ma
define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1
-; CHECK-NEXT: vmovapd %zmm1, %zmm0
+; CHECK-NEXT: vpermt2pd %zmm2, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
ret <8 x double> %res
@@ -33,8 +31,7 @@ declare <16 x float> @llvm.x86.avx512.ma
define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vpermt2ps %zmm2, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
ret <16 x float> %res
@@ -45,8 +42,7 @@ declare <8 x i64> @llvm.x86.avx512.mask.
define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm1
-; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: vpermt2q %zmm2, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
ret <8 x i64> %res
@@ -58,8 +54,7 @@ define <16 x i32>@test_int_x86_avx512_ma
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
-; CHECK-NEXT: vpermt2d (%rdi), %zmm0, %zmm1 {%k1} {z}
-; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: vpermi2d (%rdi), %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%x2 = load <16 x i32>, <16 x i32>* %x2p
%res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
@@ -72,8 +67,7 @@ define <8 x double>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
-; CHECK-NEXT: vpermt2pd (%rdi){1to8}, %zmm0, %zmm1 {%k1} {z}
-; CHECK-NEXT: vmovapd %zmm1, %zmm0
+; CHECK-NEXT: vpermi2pd (%rdi){1to8}, %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%x2s = load double, double* %x2ptr
%x2ins = insertelement <8 x double> undef, double %x2s, i32 0
@@ -88,8 +82,7 @@ define <16 x float>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm1 {%k1} {z}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: vpermi2ps %zmm2, %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
ret <16 x float> %res
@@ -102,8 +95,7 @@ define <8 x i64>@test_int_x86_avx512_mas
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %edi, %k1
-; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm1 {%k1} {z}
-; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: vpermi2q %zmm2, %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
ret <8 x i64> %res
@@ -114,8 +106,7 @@ declare <16 x i32> @llvm.x86.avx512.mask
define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm1
-; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: vpermi2d %zmm2, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
ret <16 x i32> %res
@@ -126,8 +117,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.
define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm1
-; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0
+; CHECK-NEXT: vpermi2d %xmm2, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
ret <4 x i32> %res
@@ -139,8 +129,7 @@ define <4 x i32>@test_int_x86_avx512_mas
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %edi, %k1
-; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 {%k1} {z}
-; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0
+; CHECK-NEXT: vpermi2d %xmm2, %xmm1, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
ret <4 x i32> %res
@@ -150,8 +139,7 @@ define <4 x i32>@test_int_x86_avx512_mas
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128_broadcast:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
-; CHECK-NEXT: vpermt2d (%rdi){1to4}, %xmm0, %xmm1 {%k1} {z}
-; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0
+; CHECK-NEXT: vpermi2d (%rdi){1to4}, %xmm1, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%x2s = load i32, i32* %x2ptr
%x2ins = insertelement <4 x i32> undef, i32 %x2s, i32 0
@@ -165,8 +153,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.
define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm1
-; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0
+; CHECK-NEXT: vpermi2d %ymm2, %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
ret <8 x i32> %res
@@ -178,8 +165,7 @@ define <8 x i32>@test_int_x86_avx512_mas
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %edi, %k1
-; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 {%k1} {z}
-; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0
+; CHECK-NEXT: vpermi2d %ymm2, %ymm1, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
ret <8 x i32> %res
@@ -190,8 +176,7 @@ declare <2 x double> @llvm.x86.avx512.ma
define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpermi2pd %xmm2, %xmm0, %xmm1
-; CHECK-NEXT: vmovapd %xmm1, %xmm0
+; CHECK-NEXT: vpermt2pd %xmm2, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
ret <2 x double> %res
@@ -202,8 +187,7 @@ declare <4 x double> @llvm.x86.avx512.ma
define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1
-; CHECK-NEXT: vmovapd %ymm1, %ymm0
+; CHECK-NEXT: vpermt2pd %ymm2, %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
ret <4 x double> %res
@@ -214,8 +198,7 @@ declare <4 x float> @llvm.x86.avx512.mas
define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1
-; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: vpermt2ps %xmm2, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
ret <4 x float> %res
@@ -226,8 +209,7 @@ declare <8 x float> @llvm.x86.avx512.mas
define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpermi2ps %ymm2, %ymm0, %ymm1
-; CHECK-NEXT: vmovaps %ymm1, %ymm0
+; CHECK-NEXT: vpermt2ps %ymm2, %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
ret <8 x float> %res
@@ -236,8 +218,7 @@ define <8 x float>@test_int_x86_avx512_m
define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256_load(<8 x float> %x0, <8 x i32> %x1, <8 x float>* %x2p) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256_load:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpermi2ps (%rdi), %ymm0, %ymm1
-; CHECK-NEXT: vmovaps %ymm1, %ymm0
+; CHECK-NEXT: vpermt2ps (%rdi), %ymm1, %ymm0
; CHECK-NEXT: retq
%x2 = load <8 x float>, <8 x float>* %x2p
%res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
@@ -247,8 +228,7 @@ define <8 x float>@test_int_x86_avx512_m
define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256_broadcast(<8 x float> %x0, <8 x i32> %x1, float* %x2ptr) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256_broadcast:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpermi2ps (%rdi){1to8}, %ymm0, %ymm1
-; CHECK-NEXT: vmovaps %ymm1, %ymm0
+; CHECK-NEXT: vpermt2ps (%rdi){1to8}, %ymm1, %ymm0
; CHECK-NEXT: retq
%x2s = load float, float* %x2ptr
%x2ins = insertelement <8 x float> undef, float %x2s, i32 0
@@ -262,8 +242,7 @@ declare <16 x i8> @llvm.x86.avx512.mask.
define <16 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpermi2b %xmm2, %xmm0, %xmm1
-; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0
+; CHECK-NEXT: vpermt2b %xmm2, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
ret <16 x i8> %res
@@ -274,8 +253,7 @@ declare <32 x i8> @llvm.x86.avx512.mask.
define <32 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpermi2b %ymm2, %ymm0, %ymm1
-; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0
+; CHECK-NEXT: vpermt2b %ymm2, %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
ret <32 x i8> %res
@@ -286,8 +264,7 @@ declare <16 x i8> @llvm.x86.avx512.mask.
define <16 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm1
-; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0
+; CHECK-NEXT: vpermi2b %xmm2, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
ret <16 x i8> %res
@@ -296,8 +273,7 @@ define <16 x i8>@test_int_x86_avx512_mas
define <16 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_128_load(<16 x i8> %x0, <16 x i8> %x1, <16 x i8>* %x2p) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_128_load:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpermt2b (%rdi), %xmm0, %xmm1
-; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0
+; CHECK-NEXT: vpermi2b (%rdi), %xmm1, %xmm0
; CHECK-NEXT: retq
%x2 = load <16 x i8>, <16 x i8>* %x2p
%res = call <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
@@ -309,8 +285,7 @@ declare <32 x i8> @llvm.x86.avx512.mask.
define <32 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm1
-; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0
+; CHECK-NEXT: vpermi2b %ymm2, %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
ret <32 x i8> %res
@@ -322,8 +297,7 @@ define <16 x i8>@test_int_x86_avx512_mas
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_128:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm1 {%k1} {z}
-; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0
+; CHECK-NEXT: vpermi2b %xmm2, %xmm1, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
ret <16 x i8> %res
@@ -333,8 +307,7 @@ define <16 x i8>@test_int_x86_avx512_mas
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_128_load:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
-; CHECK-NEXT: vpermt2b (%rdi), %xmm0, %xmm1 {%k1} {z}
-; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0
+; CHECK-NEXT: vpermi2b (%rdi), %xmm1, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%x2 = load <16 x i8>, <16 x i8>* %x2p
%res = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
@@ -347,8 +320,7 @@ define <32 x i8>@test_int_x86_avx512_mas
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm1 {%k1} {z}
-; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0
+; CHECK-NEXT: vpermi2b %ymm2, %ymm1, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <32 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
ret <32 x i8> %res
@@ -358,8 +330,7 @@ define <32 x i8>@test_int_x86_avx512_mas
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_256_load:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %esi, %k1
-; CHECK-NEXT: vpermt2b (%rdi), %ymm0, %ymm1 {%k1} {z}
-; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0
+; CHECK-NEXT: vpermi2b (%rdi), %ymm1, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%x2 = load <32 x i8>, <32 x i8>* %x2p
%res = call <32 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
Modified: llvm/trunk/test/CodeGen/X86/avx512vbmi-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vbmi-intrinsics.ll?rev=287621&r1=287620&r2=287621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vbmi-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vbmi-intrinsics.ll Mon Nov 21 22:57:34 2016
@@ -90,8 +90,7 @@ define <64 x i8>@test_int_x86_avx512_mas
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovq %rdi, %k1
-; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm1 {%k1} {z}
-; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: vpermi2b %zmm2, %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <64 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
ret <64 x i8> %res
Modified: llvm/trunk/test/CodeGen/X86/avx512vbmivl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vbmivl-intrinsics.ll?rev=287621&r1=287620&r2=287621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vbmivl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vbmivl-intrinsics.ll Mon Nov 21 22:57:34 2016
@@ -175,8 +175,7 @@ define <16 x i8>@test_int_x86_avx512_mas
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_128:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7d,0xca]
-; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1]
+; CHECK-NEXT: vpermi2b %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0x75,0xc2]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
ret <16 x i8> %res
@@ -188,8 +187,7 @@ define <32 x i8>@test_int_x86_avx512_mas
; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7d,0xca]
-; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1]
+; CHECK-NEXT: vpermi2b %ymm2, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x75,0xc2]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <32 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
ret <32 x i8> %res
Modified: llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll?rev=287621&r1=287620&r2=287621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll Mon Nov 21 22:57:34 2016
@@ -138,19 +138,19 @@ define <8 x double> @merge_8f64_f64_12zz
define <8 x double> @merge_8f64_f64_1u3u5zu8(double* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_8f64_f64_1u3u5zu8:
; ALL: # BB#0:
-; ALL-NEXT: vmovupd 8(%rdi), %zmm0
-; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1
-; ALL-NEXT: vmovapd {{.*#+}} zmm2 = <0,u,2,u,4,13,u,7>
-; ALL-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
+; ALL-NEXT: vmovupd 8(%rdi), %zmm1
+; ALL-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; ALL-NEXT: vmovapd {{.*#+}} zmm0 = <0,u,2,u,4,13,u,7>
+; ALL-NEXT: vpermi2pd %zmm2, %zmm1, %zmm0
; ALL-NEXT: retq
;
; X32-AVX512F-LABEL: merge_8f64_f64_1u3u5zu8:
; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT: vmovupd 8(%eax), %zmm0
-; X32-AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
-; X32-AVX512F-NEXT: vmovapd {{.*#+}} zmm2 = <0,0,u,u,2,0,u,u,4,0,13,0,u,u,7,0>
-; X32-AVX512F-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
+; X32-AVX512F-NEXT: vmovupd 8(%eax), %zmm1
+; X32-AVX512F-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; X32-AVX512F-NEXT: vmovapd {{.*#+}} zmm0 = <0,0,u,u,2,0,u,u,4,0,13,0,u,u,7,0>
+; X32-AVX512F-NEXT: vpermi2pd %zmm2, %zmm1, %zmm0
; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds double, double* %ptr, i64 1
%ptr2 = getelementptr inbounds double, double* %ptr, i64 3
@@ -225,19 +225,19 @@ define <8 x i64> @merge_8i64_i64_56zz9uz
define <8 x i64> @merge_8i64_i64_1u3u5zu8(i64* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_8i64_i64_1u3u5zu8:
; ALL: # BB#0:
-; ALL-NEXT: vmovdqu64 8(%rdi), %zmm0
-; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1
-; ALL-NEXT: vmovdqa64 {{.*#+}} zmm2 = <0,u,2,u,4,13,u,7>
-; ALL-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; ALL-NEXT: vmovdqu64 8(%rdi), %zmm1
+; ALL-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; ALL-NEXT: vmovdqa64 {{.*#+}} zmm0 = <0,u,2,u,4,13,u,7>
+; ALL-NEXT: vpermi2q %zmm2, %zmm1, %zmm0
; ALL-NEXT: retq
;
; X32-AVX512F-LABEL: merge_8i64_i64_1u3u5zu8:
; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT: vmovdqu64 8(%eax), %zmm0
-; X32-AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
-; X32-AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = <0,0,u,u,2,0,u,u,4,0,13,0,u,u,7,0>
-; X32-AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; X32-AVX512F-NEXT: vmovdqu64 8(%eax), %zmm1
+; X32-AVX512F-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; X32-AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm0 = <0,0,u,u,2,0,u,u,4,0,13,0,u,u,7,0>
+; X32-AVX512F-NEXT: vpermi2q %zmm2, %zmm1, %zmm0
; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds i64, i64* %ptr, i64 1
%ptr2 = getelementptr inbounds i64, i64* %ptr, i64 3
@@ -334,19 +334,19 @@ define <16 x float> @merge_16f32_f32_0uu
define <16 x float> @merge_16f32_f32_0uu3zzuuuuuzCuEF(float* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_16f32_f32_0uu3zzuuuuuzCuEF:
; ALL: # BB#0:
-; ALL-NEXT: vmovups (%rdi), %zmm0
-; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1
-; ALL-NEXT: vmovaps {{.*#+}} zmm2 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
-; ALL-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0
+; ALL-NEXT: vmovups (%rdi), %zmm1
+; ALL-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; ALL-NEXT: vmovaps {{.*#+}} zmm0 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
+; ALL-NEXT: vpermi2ps %zmm2, %zmm1, %zmm0
; ALL-NEXT: retq
;
; X32-AVX512F-LABEL: merge_16f32_f32_0uu3zzuuuuuzCuEF:
; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT: vmovups (%eax), %zmm0
-; X32-AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
-; X32-AVX512F-NEXT: vmovaps {{.*#+}} zmm2 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
-; X32-AVX512F-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0
+; X32-AVX512F-NEXT: vmovups (%eax), %zmm1
+; X32-AVX512F-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; X32-AVX512F-NEXT: vmovaps {{.*#+}} zmm0 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
+; X32-AVX512F-NEXT: vpermi2ps %zmm2, %zmm1, %zmm0
; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds float, float* %ptr, i64 0
%ptr3 = getelementptr inbounds float, float* %ptr, i64 3
@@ -448,19 +448,19 @@ define <16 x i32> @merge_16i32_i32_0uu3u
define <16 x i32> @merge_16i32_i32_0uu3zzuuuuuzCuEF(i32* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:
; ALL: # BB#0:
-; ALL-NEXT: vmovdqu32 (%rdi), %zmm0
-; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1
-; ALL-NEXT: vmovdqa32 {{.*#+}} zmm2 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
-; ALL-NEXT: vpermt2d %zmm1, %zmm2, %zmm0
+; ALL-NEXT: vmovdqu32 (%rdi), %zmm1
+; ALL-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; ALL-NEXT: vmovdqa32 {{.*#+}} zmm0 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
+; ALL-NEXT: vpermi2d %zmm2, %zmm1, %zmm0
; ALL-NEXT: retq
;
; X32-AVX512F-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:
; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT: vmovdqu32 (%eax), %zmm0
-; X32-AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
-; X32-AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm2 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
-; X32-AVX512F-NEXT: vpermt2d %zmm1, %zmm2, %zmm0
+; X32-AVX512F-NEXT: vmovdqu32 (%eax), %zmm1
+; X32-AVX512F-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; X32-AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm0 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
+; X32-AVX512F-NEXT: vpermi2d %zmm2, %zmm1, %zmm0
; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0
%ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll?rev=287621&r1=287620&r2=287621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll Mon Nov 21 22:57:34 2016
@@ -877,8 +877,8 @@ define <16 x i16> @shuffle_v16i16_16_16_
; AVX512VL-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,0,0,0,20,21,22,23,8,8,8,8,28,29,30,31]
-; AVX512VL-NEXT: vpermt2w %ymm0, %ymm2, %ymm1
-; AVX512VL-NEXT: vmovdqa64 %ymm1, %ymm0
+; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
+; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 4, i32 5, i32 6, i32 7, i32 24, i32 24, i32 24, i32 24, i32 12, i32 13, i32 14, i32 15>
ret <16 x i16> %shuffle
@@ -910,8 +910,8 @@ define <16 x i16> @shuffle_v16i16_19_18_
; AVX512VL-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [3,2,1,0,23,22,21,20,11,10,9,8,31,30,29,28]
-; AVX512VL-NEXT: vpermt2w %ymm0, %ymm2, %ymm1
-; AVX512VL-NEXT: vmovdqa64 %ymm1, %ymm0
+; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
+; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 7, i32 6, i32 5, i32 4, i32 27, i32 26, i32 25, i32 24, i32 15, i32 14, i32 13, i32 12>
ret <16 x i16> %shuffle
@@ -941,8 +941,8 @@ define <16 x i16> @shuffle_v16i16_19_18_
; AVX512VL-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [3,2,1,0,19,18,17,16,11,10,9,8,27,26,25,24]
-; AVX512VL-NEXT: vpermt2w %ymm0, %ymm2, %ymm1
-; AVX512VL-NEXT: vmovdqa64 %ymm1, %ymm0
+; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
+; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 3, i32 2, i32 1, i32 0, i32 27, i32 26, i32 25, i32 24, i32 11, i32 10, i32 9, i32 8>
ret <16 x i16> %shuffle
@@ -3279,8 +3279,8 @@ define <16 x i16> @shuffle_v16i16_16_00_
; AVX512VL-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,16,1,17,2,18,3,27,8,24,9,25,10,26,11,27]
-; AVX512VL-NEXT: vpermt2w %ymm0, %ymm2, %ymm1
-; AVX512VL-NEXT: vmovdqa64 %ymm1, %ymm0
+; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
+; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 0, i32 17, i32 1, i32 18, i32 2, i32 19, i32 11, i32 24, i32 8, i32 25, i32 9, i32 26, i32 10, i32 27, i32 11>
ret <16 x i16> %shuffle
@@ -3313,8 +3313,8 @@ define <16 x i16> @shuffle_v16i16_20_04_
; AVX512VL-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [4,20,5,21,6,22,7,31,12,28,13,29,14,30,15,31]
-; AVX512VL-NEXT: vpermt2w %ymm0, %ymm2, %ymm1
-; AVX512VL-NEXT: vmovdqa64 %ymm1, %ymm0
+; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
+; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 4, i32 21, i32 5, i32 22, i32 6, i32 23, i32 15, i32 28, i32 12, i32 29, i32 13, i32 30, i32 14, i32 31, i32 15>
ret <16 x i16> %shuffle
@@ -3476,8 +3476,8 @@ define <16 x i16> @shuffle_v16i16_uu_uu_
; AVX512VL-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = <u,u,u,u,4,5,6,27,u,u,u,u,12,13,14,27>
-; AVX512VL-NEXT: vpermt2w %ymm0, %ymm2, %ymm1
-; AVX512VL-NEXT: vmovdqa64 %ymm1, %ymm0
+; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
+; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 20, i32 21, i32 22, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11>
ret <16 x i16> %shuffle
@@ -3504,8 +3504,8 @@ define <16 x i16> @shuffle_v16i16_20_21_
; AVX512VL-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = <4,5,6,19,u,u,u,u,12,13,14,27,u,u,u,u>
-; AVX512VL-NEXT: vpermt2w %ymm0, %ymm2, %ymm1
-; AVX512VL-NEXT: vmovdqa64 %ymm1, %ymm0
+; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
+; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 21, i32 22, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
ret <16 x i16> %shuffle
@@ -3628,8 +3628,8 @@ define <16 x i16> @shuffle_v16i16_uu_uu_
; AVX512VL-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = <u,u,20,u,0,2,4,u,u,u,28,u,8,10,12,u>
-; AVX512VL-NEXT: vpermt2w %ymm0, %ymm2, %ymm1
-; AVX512VL-NEXT: vmovdqa64 %ymm1, %ymm0
+; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
+; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 16, i32 18, i32 20, i32 undef, i32 undef, i32 undef, i32 12, i32 undef, i32 24, i32 26, i32 28, i32 undef>
ret <16 x i16> %shuffle
@@ -3774,8 +3774,8 @@ define <16 x i16> @shuffle_v16i16_19_20_
; AVX512VL-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [3,4,5,6,7,16,17,26,11,12,13,14,15,24,25,26]
-; AVX512VL-NEXT: vpermt2w %ymm0, %ymm2, %ymm1
-; AVX512VL-NEXT: vmovdqa64 %ymm1, %ymm0
+; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
+; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 10, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10>
ret <16 x i16> %shuffle
@@ -3938,8 +3938,8 @@ define <16 x i16> @shuffle_v16i16_05_06_
; AVX512VL-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [21,22,23,0,1,2,3,12,29,30,31,8,9,10,11,12]
-; AVX512VL-NEXT: vpermt2w %ymm0, %ymm2, %ymm1
-; AVX512VL-NEXT: vmovdqa64 %ymm1, %ymm0
+; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
+; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 28, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28>
ret <16 x i16> %shuffle
@@ -3986,8 +3986,8 @@ define <16 x i16> @shuffle_v16i16_23_uu_
; AVX512VL-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = <7,u,19,u,4,4,21,u,15,u,27,u,12,12,29,u>
-; AVX512VL-NEXT: vpermt2w %ymm0, %ymm2, %ymm1
-; AVX512VL-NEXT: vmovdqa64 %ymm1, %ymm0
+; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
+; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 undef, i32 3, i32 undef, i32 20, i32 20, i32 5, i32 undef, i32 31, i32 undef, i32 11, i32 undef, i32 28, i32 28, i32 13, i32 undef>
ret <16 x i16> %shuffle
@@ -4167,8 +4167,8 @@ define <16 x i16> @shuffle_v16i16_02_18_
; AVX512VL-LABEL: shuffle_v16i16_02_18_03_19_10_26_11_27_00_16_01_17_08_24_09_25:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [2,18,3,19,0,16,1,17,10,26,11,27,8,24,9,25]
-; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
-; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX512VL-NEXT: vpermi2w %ymm1, %ymm0, %ymm2
+; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm2[0,2,1,3]
; AVX512VL-NEXT: retq
%1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 0, i32 16, i32 1, i32 17, i32 10, i32 26, i32 11, i32 27, i32 8, i32 24, i32 9, i32 25>
%2 = bitcast <16 x i16> %1 to <4 x i64>
@@ -4257,8 +4257,8 @@ define <16 x i16> @PR24935(<16 x i16> %a
; AVX512VL-LABEL: PR24935:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [11,10,17,13,10,7,27,0,17,25,0,12,29,20,16,8]
-; AVX512VL-NEXT: vpermt2w %ymm0, %ymm2, %ymm1
-; AVX512VL-NEXT: vmovdqa64 %ymm1, %ymm0
+; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
+; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 27, i32 26, i32 1, i32 29, i32 26, i32 23, i32 11, i32 16, i32 1, i32 9, i32 16, i32 28, i32 13, i32 4, i32 0, i32 24>
ret <16 x i16> %shuffle
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll?rev=287621&r1=287620&r2=287621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll Mon Nov 21 22:57:34 2016
@@ -312,10 +312,9 @@ define <8 x float> @shuffle_v8f32_08991a
;
; AVX512VL-LABEL: shuffle_v8f32_08991abb:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
-; AVX512VL-NEXT: vmovaps {{.*#+}} ymm2 = [8,0,1,1,10,2,3,3]
-; AVX512VL-NEXT: vpermt2ps %ymm0, %ymm2, %ymm1
-; AVX512VL-NEXT: vmovaps %ymm1, %ymm0
+; AVX512VL-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
+; AVX512VL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,1,1,10,2,3,3]
+; AVX512VL-NEXT: vpermi2ps %ymm2, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
ret <8 x float> %shuffle
@@ -675,8 +674,8 @@ define <8 x float> @shuffle_v8f32_c348cd
; AVX512VL-LABEL: shuffle_v8f32_c348cda0:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vmovaps {{.*#+}} ymm2 = [4,11,12,0,4,5,2,8]
-; AVX512VL-NEXT: vpermt2ps %ymm0, %ymm2, %ymm1
-; AVX512VL-NEXT: vmovaps %ymm1, %ymm0
+; AVX512VL-NEXT: vpermi2ps %ymm0, %ymm1, %ymm2
+; AVX512VL-NEXT: vmovaps %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
ret <8 x float> %shuffle
@@ -1316,9 +1315,9 @@ define <8 x i32> @shuffle_v8i32_08192a3b
;
; AVX512VL-LABEL: shuffle_v8i32_08192a3b:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm2 = [0,8,2,9,4,10,6,11]
-; AVX512VL-NEXT: vpermt2d %ymm1, %ymm2, %ymm0
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm0 = [0,8,2,9,4,10,6,11]
+; AVX512VL-NEXT: vpermi2d %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
ret <8 x i32> %shuffle
@@ -1345,10 +1344,9 @@ define <8 x i32> @shuffle_v8i32_08991abb
;
; AVX512VL-LABEL: shuffle_v8i32_08991abb:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm2 = [8,0,1,1,10,2,3,3]
-; AVX512VL-NEXT: vpermt2d %ymm0, %ymm2, %ymm1
-; AVX512VL-NEXT: vmovdqa64 %ymm1, %ymm0
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
+; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm0 = [8,0,1,1,10,2,3,3]
+; AVX512VL-NEXT: vpermi2d %ymm2, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
ret <8 x i32> %shuffle
@@ -1992,8 +1990,8 @@ define <8 x i32> @shuffle_v8i32_6caa87e5
; AVX512VL-LABEL: shuffle_v8i32_6caa87e5:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm2 = [14,4,2,2,0,15,6,13]
-; AVX512VL-NEXT: vpermt2d %ymm0, %ymm2, %ymm1
-; AVX512VL-NEXT: vmovdqa64 %ymm1, %ymm0
+; AVX512VL-NEXT: vpermi2d %ymm0, %ymm1, %ymm2
+; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
ret <8 x i32> %shuffle
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll?rev=287621&r1=287620&r2=287621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll Mon Nov 21 22:57:34 2016
@@ -262,15 +262,15 @@ define <8 x double> @shuffle_v8f64_8823c
; AVX512F-LABEL: shuffle_v8f64_8823cc67:
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovapd {{.*#+}} zmm2 = [0,0,10,11,4,4,14,15]
-; AVX512F-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vmovapd %zmm1, %zmm0
+; AVX512F-NEXT: vpermi2pd %zmm0, %zmm1, %zmm2
+; AVX512F-NEXT: vmovapd %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_8823cc67:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm2 = [0,0,0,0,10,0,11,0,4,0,4,0,14,0,15,0]
-; AVX512F-32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; AVX512F-32-NEXT: vmovapd %zmm1, %zmm0
+; AVX512F-32-NEXT: vpermi2pd %zmm0, %zmm1, %zmm2
+; AVX512F-32-NEXT: vmovapd %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
ret <8 x double> %shuffle
@@ -281,15 +281,15 @@ define <8 x double> @shuffle_v8f64_9832d
; AVX512F-LABEL: shuffle_v8f64_9832dc76:
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovapd {{.*#+}} zmm2 = [1,0,11,10,5,4,15,14]
-; AVX512F-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vmovapd %zmm1, %zmm0
+; AVX512F-NEXT: vpermi2pd %zmm0, %zmm1, %zmm2
+; AVX512F-NEXT: vmovapd %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_9832dc76:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm2 = [1,0,0,0,11,0,10,0,5,0,4,0,15,0,14,0]
-; AVX512F-32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; AVX512F-32-NEXT: vmovapd %zmm1, %zmm0
+; AVX512F-32-NEXT: vpermi2pd %zmm0, %zmm1, %zmm2
+; AVX512F-32-NEXT: vmovapd %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
ret <8 x double> %shuffle
@@ -300,15 +300,15 @@ define <8 x double> @shuffle_v8f64_9810d
; AVX512F-LABEL: shuffle_v8f64_9810dc54:
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovapd {{.*#+}} zmm2 = [1,0,9,8,5,4,13,12]
-; AVX512F-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vmovapd %zmm1, %zmm0
+; AVX512F-NEXT: vpermi2pd %zmm0, %zmm1, %zmm2
+; AVX512F-NEXT: vmovapd %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_9810dc54:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm2 = [1,0,0,0,9,0,8,0,5,0,4,0,13,0,12,0]
-; AVX512F-32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; AVX512F-32-NEXT: vmovapd %zmm1, %zmm0
+; AVX512F-32-NEXT: vpermi2pd %zmm0, %zmm1, %zmm2
+; AVX512F-32-NEXT: vmovapd %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
ret <8 x double> %shuffle
@@ -370,15 +370,15 @@ define <8 x double> @shuffle_v8f64_08991
; AVX512F-LABEL: shuffle_v8f64_08991abb:
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovapd {{.*#+}} zmm2 = [8,0,1,1,9,2,3,3]
-; AVX512F-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vmovapd %zmm1, %zmm0
+; AVX512F-NEXT: vpermi2pd %zmm0, %zmm1, %zmm2
+; AVX512F-NEXT: vmovapd %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_08991abb:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm2 = [8,0,0,0,1,0,1,0,9,0,2,0,3,0,3,0]
-; AVX512F-32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; AVX512F-32-NEXT: vmovapd %zmm1, %zmm0
+; AVX512F-32-NEXT: vpermi2pd %zmm0, %zmm1, %zmm2
+; AVX512F-32-NEXT: vmovapd %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
ret <8 x double> %shuffle
@@ -406,15 +406,15 @@ define <8 x double> @shuffle_v8f64_09ab1
; AVX512F-LABEL: shuffle_v8f64_09ab1def:
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovapd {{.*#+}} zmm2 = [8,1,2,3,9,5,6,7]
-; AVX512F-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vmovapd %zmm1, %zmm0
+; AVX512F-NEXT: vpermi2pd %zmm0, %zmm1, %zmm2
+; AVX512F-NEXT: vmovapd %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_09ab1def:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm2 = [8,0,1,0,2,0,3,0,9,0,5,0,6,0,7,0]
-; AVX512F-32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; AVX512F-32-NEXT: vmovapd %zmm1, %zmm0
+; AVX512F-32-NEXT: vpermi2pd %zmm0, %zmm1, %zmm2
+; AVX512F-32-NEXT: vmovapd %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
ret <8 x double> %shuffle
@@ -927,15 +927,15 @@ define <8 x double> @shuffle_v8f64_c348c
; AVX512F-LABEL: shuffle_v8f64_c348cda0:
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovapd {{.*#+}} zmm2 = [4,11,12,0,4,5,2,8]
-; AVX512F-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vmovapd %zmm1, %zmm0
+; AVX512F-NEXT: vpermi2pd %zmm0, %zmm1, %zmm2
+; AVX512F-NEXT: vmovapd %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_c348cda0:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm2 = [4,0,11,0,12,0,0,0,4,0,5,0,2,0,8,0]
-; AVX512F-32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; AVX512F-32-NEXT: vmovapd %zmm1, %zmm0
+; AVX512F-32-NEXT: vpermi2pd %zmm0, %zmm1, %zmm2
+; AVX512F-32-NEXT: vmovapd %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
ret <8 x double> %shuffle
@@ -1180,15 +1180,15 @@ define <8 x i64> @shuffle_v8i64_81a3c5e7
; AVX512F-LABEL: shuffle_v8i64_81a3c5e7:
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,9,2,11,4,13,6,15]
-; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
+; AVX512F-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_81a3c5e7:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,9,0,2,0,11,0,4,0,13,0,6,0,15,0]
-; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
+; AVX512F-32-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
ret <8 x i64> %shuffle
@@ -1233,15 +1233,15 @@ define <8 x i64> @shuffle_v8i64_8823cc67
; AVX512F-LABEL: shuffle_v8i64_8823cc67:
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,10,11,4,4,14,15]
-; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
+; AVX512F-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_8823cc67:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,0,0,10,0,11,0,4,0,4,0,14,0,15,0]
-; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
+; AVX512F-32-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
ret <8 x i64> %shuffle
@@ -1252,15 +1252,15 @@ define <8 x i64> @shuffle_v8i64_9832dc76
; AVX512F-LABEL: shuffle_v8i64_9832dc76:
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,11,10,5,4,15,14]
-; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
+; AVX512F-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_9832dc76:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,11,0,10,0,5,0,4,0,15,0,14,0]
-; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
+; AVX512F-32-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
ret <8 x i64> %shuffle
@@ -1271,15 +1271,15 @@ define <8 x i64> @shuffle_v8i64_9810dc54
; AVX512F-LABEL: shuffle_v8i64_9810dc54:
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,9,8,5,4,13,12]
-; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
+; AVX512F-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_9810dc54:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,9,0,8,0,5,0,4,0,13,0,12,0]
-; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
+; AVX512F-32-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
ret <8 x i64> %shuffle
@@ -1341,15 +1341,15 @@ define <8 x i64> @shuffle_v8i64_08991abb
; AVX512F-LABEL: shuffle_v8i64_08991abb:
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,0,1,1,9,2,3,3]
-; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
+; AVX512F-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_08991abb:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,0,0,0,1,0,1,0,9,0,2,0,3,0,3,0]
-; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
+; AVX512F-32-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
ret <8 x i64> %shuffle
@@ -1377,15 +1377,15 @@ define <8 x i64> @shuffle_v8i64_09ab1def
; AVX512F-LABEL: shuffle_v8i64_09ab1def:
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,1,2,3,9,5,6,7]
-; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
+; AVX512F-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_09ab1def:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,0,1,0,2,0,3,0,9,0,5,0,6,0,7,0]
-; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
+; AVX512F-32-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
ret <8 x i64> %shuffle
@@ -1914,15 +1914,15 @@ define <8 x i64> @shuffle_v8i64_6caa87e5
; AVX512F-LABEL: shuffle_v8i64_6caa87e5:
; AVX512F: # BB#0:
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,4,2,2,0,15,6,13]
-; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
+; AVX512F-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_6caa87e5:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,0,4,0,2,0,2,0,0,0,15,0,6,0,13,0]
-; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
+; AVX512F-32-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
ret <8 x i64> %shuffle
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll?rev=287621&r1=287620&r2=287621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll Mon Nov 21 22:57:34 2016
@@ -123,18 +123,18 @@ define <8 x double> @combine_vpermt2var_
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: kmovd %eax, %k1
; X32-NEXT: vmovapd {{.*#+}} zmm2 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
-; X32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0 {%k1} {z}
-; X32-NEXT: vmovapd {{.*#+}} zmm1 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
-; X32-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 {%k1} {z}
+; X32-NEXT: vpermi2pd %zmm1, %zmm0, %zmm2 {%k1} {z}
+; X32-NEXT: vmovapd {{.*#+}} zmm0 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
+; X32-NEXT: vpermi2pd %zmm2, %zmm2, %zmm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_8f64_identity_mask:
; X64: # BB#0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vmovapd {{.*#+}} zmm2 = [7,6,5,4,3,2,1,0]
-; X64-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0 {%k1} {z}
-; X64-NEXT: vmovapd {{.*#+}} zmm1 = [7,14,5,12,3,10,1,8]
-; X64-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 {%k1} {z}
+; X64-NEXT: vpermi2pd %zmm1, %zmm0, %zmm2 {%k1} {z}
+; X64-NEXT: vmovapd {{.*#+}} zmm0 = [7,14,5,12,3,10,1,8]
+; X64-NEXT: vpermi2pd %zmm2, %zmm2, %zmm0 {%k1} {z}
; X64-NEXT: retq
%res0 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x double> %x0, <8 x double> %x1, i8 %m)
%res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> <i64 7, i64 14, i64 5, i64 12, i64 3, i64 10, i64 1, i64 8>, <8 x double> %res0, <8 x double> %res0, i8 %m)
@@ -190,9 +190,9 @@ define <8 x i64> @combine_vpermt2var_8i6
; X32-LABEL: combine_vpermt2var_8i64_identity:
; X32: # BB#0:
; X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = <u,u,6,0,5,0,4,0,3,0,2,0,1,0,0,0>
-; X32-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
-; X32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <u,u,14,0,5,0,12,0,3,0,10,0,1,0,8,0>
-; X32-NEXT: vpermt2q %zmm0, %zmm1, %zmm0
+; X32-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; X32-NEXT: vmovdqa64 {{.*#+}} zmm0 = <u,u,14,0,5,0,12,0,3,0,10,0,1,0,8,0>
+; X32-NEXT: vpermi2q %zmm2, %zmm2, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_8i64_identity:
@@ -208,18 +208,18 @@ define <8 x i64> @combine_vpermt2var_8i6
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: kmovd %eax, %k1
; X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
-; X32-NEXT: vpermt2q %zmm1, %zmm2, %zmm0 {%k1} {z}
-; X32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
-; X32-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 {%k1} {z}
+; X32-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 {%k1} {z}
+; X32-NEXT: vmovdqa64 {{.*#+}} zmm0 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
+; X32-NEXT: vpermi2q %zmm2, %zmm2, %zmm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_8i64_identity_mask:
; X64: # BB#0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,6,5,4,3,2,1,0]
-; X64-NEXT: vpermt2q %zmm1, %zmm2, %zmm0 {%k1} {z}
-; X64-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,14,5,12,3,10,1,8]
-; X64-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 {%k1} {z}
+; X64-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 {%k1} {z}
+; X64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [7,14,5,12,3,10,1,8]
+; X64-NEXT: vpermi2q %zmm2, %zmm2, %zmm0 {%k1} {z}
; X64-NEXT: retq
%res0 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x i64> %x0, <8 x i64> %x1, i8 %m)
%res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> <i64 7, i64 14, i64 5, i64 12, i64 3, i64 10, i64 1, i64 8>, <8 x i64> %res0, <8 x i64> %res0, i8 %m)
@@ -243,18 +243,18 @@ define <16 x float> @combine_vpermt2var_
; X32: # BB#0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vmovaps {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
-; X32-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0 {%k1} {z}
-; X32-NEXT: vmovaps {{.*#+}} zmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
-; X32-NEXT: vpermt2ps %zmm0, %zmm1, %zmm0 {%k1} {z}
+; X32-NEXT: vpermi2ps %zmm1, %zmm0, %zmm2 {%k1} {z}
+; X32-NEXT: vmovaps {{.*#+}} zmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
+; X32-NEXT: vpermi2ps %zmm2, %zmm2, %zmm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_16f32_identity_mask:
; X64: # BB#0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vmovaps {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
-; X64-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0 {%k1} {z}
-; X64-NEXT: vmovaps {{.*#+}} zmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
-; X64-NEXT: vpermt2ps %zmm0, %zmm1, %zmm0 {%k1} {z}
+; X64-NEXT: vpermi2ps %zmm1, %zmm0, %zmm2 {%k1} {z}
+; X64-NEXT: vmovaps {{.*#+}} zmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
+; X64-NEXT: vpermi2ps %zmm2, %zmm2, %zmm0 {%k1} {z}
; X64-NEXT: retq
%res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x float> %x0, <16 x float> %x1, i16 %m)
%res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> <i32 15, i32 30, i32 13, i32 28, i32 11, i32 26, i32 9, i32 24, i32 7, i32 22, i32 5, i32 20, i32 3, i32 18, i32 1, i32 16>, <16 x float> %res0, <16 x float> %res0, i16 %m)
@@ -280,17 +280,17 @@ define <16 x float> @combine_vpermt2var_
; X32-LABEL: combine_vpermt2var_16f32_vmovddup_load:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: vmovaps (%eax), %zmm1
-; X32-NEXT: vmovaps {{.*#+}} zmm2 = [0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13]
-; X32-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1
+; X32-NEXT: vmovaps (%eax), %zmm2
+; X32-NEXT: vmovaps {{.*#+}} zmm1 = [0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13]
+; X32-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1
; X32-NEXT: vmovaps %zmm1, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_16f32_vmovddup_load:
; X64: # BB#0:
-; X64-NEXT: vmovaps (%rdi), %zmm1
-; X64-NEXT: vmovaps {{.*#+}} zmm2 = [0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13]
-; X64-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1
+; X64-NEXT: vmovaps (%rdi), %zmm2
+; X64-NEXT: vmovaps {{.*#+}} zmm1 = [0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13]
+; X64-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1
; X64-NEXT: vmovaps %zmm1, %zmm0
; X64-NEXT: retq
%x0 = load <16 x float>, <16 x float> *%p0
@@ -319,18 +319,18 @@ define <16 x float> @combine_vpermt2var_
; X32: # BB#0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: vmovaps (%eax), %zmm1
-; X32-NEXT: vmovaps {{.*#+}} zmm2 = [0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13]
-; X32-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1 {%k1} {z}
+; X32-NEXT: vmovaps (%eax), %zmm2
+; X32-NEXT: vmovaps {{.*#+}} zmm1 = [0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13]
+; X32-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1 {%k1} {z}
; X32-NEXT: vmovaps %zmm1, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_16f32_vmovddup_mask_load:
; X64: # BB#0:
; X64-NEXT: kmovw %esi, %k1
-; X64-NEXT: vmovaps (%rdi), %zmm1
-; X64-NEXT: vmovaps {{.*#+}} zmm2 = [0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13]
-; X64-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1 {%k1} {z}
+; X64-NEXT: vmovaps (%rdi), %zmm2
+; X64-NEXT: vmovaps {{.*#+}} zmm1 = [0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13]
+; X64-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1 {%k1} {z}
; X64-NEXT: vmovaps %zmm1, %zmm0
; X64-NEXT: retq
%x0 = load <16 x float>, <16 x float> *%p0
@@ -521,18 +521,18 @@ define <16 x i32> @combine_vpermt2var_16
; X32: # BB#0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vmovdqa32 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
-; X32-NEXT: vpermt2d %zmm1, %zmm2, %zmm0 {%k1} {z}
-; X32-NEXT: vmovdqa32 {{.*#+}} zmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
-; X32-NEXT: vpermt2d %zmm0, %zmm1, %zmm0 {%k1} {z}
+; X32-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 {%k1} {z}
+; X32-NEXT: vmovdqa32 {{.*#+}} zmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
+; X32-NEXT: vpermi2d %zmm2, %zmm2, %zmm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_16i32_identity_mask:
; X64: # BB#0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vmovdqa32 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
-; X64-NEXT: vpermt2d %zmm1, %zmm2, %zmm0 {%k1} {z}
-; X64-NEXT: vmovdqa32 {{.*#+}} zmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
-; X64-NEXT: vpermt2d %zmm0, %zmm1, %zmm0 {%k1} {z}
+; X64-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 {%k1} {z}
+; X64-NEXT: vmovdqa32 {{.*#+}} zmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
+; X64-NEXT: vpermi2d %zmm2, %zmm2, %zmm0 {%k1} {z}
; X64-NEXT: retq
%res0 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x i32> %x0, <16 x i32> %x1, i16 %m)
%res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 15, i32 30, i32 13, i32 28, i32 11, i32 26, i32 9, i32 24, i32 7, i32 22, i32 5, i32 20, i32 3, i32 18, i32 1, i32 16>, <16 x i32> %res0, <16 x i32> %res0, i16 %m)
@@ -556,18 +556,18 @@ define <32 x i16> @combine_vpermt2var_32
; X32: # BB#0:
; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; X32-NEXT: vmovdqu16 {{.*#+}} zmm2 = [31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
-; X32-NEXT: vpermt2w %zmm1, %zmm2, %zmm0 {%k1} {z}
-; X32-NEXT: vmovdqu16 {{.*#+}} zmm1 = [63,30,61,28,59,26,57,24,55,22,53,20,51,18,49,16,47,46,13,44,11,42,9,40,7,38,5,36,3,34,1,32]
-; X32-NEXT: vpermt2w %zmm0, %zmm1, %zmm0 {%k1} {z}
+; X32-NEXT: vpermi2w %zmm1, %zmm0, %zmm2 {%k1} {z}
+; X32-NEXT: vmovdqu16 {{.*#+}} zmm0 = [63,30,61,28,59,26,57,24,55,22,53,20,51,18,49,16,47,46,13,44,11,42,9,40,7,38,5,36,3,34,1,32]
+; X32-NEXT: vpermi2w %zmm2, %zmm2, %zmm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_32i16_identity_mask:
; X64: # BB#0:
; X64-NEXT: kmovd %edi, %k1
; X64-NEXT: vmovdqu16 {{.*#+}} zmm2 = [31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
-; X64-NEXT: vpermt2w %zmm1, %zmm2, %zmm0 {%k1} {z}
-; X64-NEXT: vmovdqu16 {{.*#+}} zmm1 = [63,30,61,28,59,26,57,24,55,22,53,20,51,18,49,16,47,46,13,44,11,42,9,40,7,38,5,36,3,34,1,32]
-; X64-NEXT: vpermt2w %zmm0, %zmm1, %zmm0 {%k1} {z}
+; X64-NEXT: vpermi2w %zmm1, %zmm0, %zmm2 {%k1} {z}
+; X64-NEXT: vmovdqu16 {{.*#+}} zmm0 = [63,30,61,28,59,26,57,24,55,22,53,20,51,18,49,16,47,46,13,44,11,42,9,40,7,38,5,36,3,34,1,32]
+; X64-NEXT: vpermi2w %zmm2, %zmm2, %zmm0 {%k1} {z}
; X64-NEXT: retq
%res0 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> <i16 31, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <32 x i16> %x0, <32 x i16> %x1, i32 %m)
%res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> <i16 63, i16 30, i16 61, i16 28, i16 59, i16 26, i16 57, i16 24, i16 55, i16 22, i16 53, i16 20, i16 51, i16 18, i16 49, i16 16, i16 47, i16 46, i16 13, i16 44, i16 11, i16 42, i16 9, i16 40, i16 7, i16 38, i16 5, i16 36, i16 3, i16 34, i16 1, i16 32>, <32 x i16> %res0, <32 x i16> %res0, i32 %m)
@@ -938,9 +938,9 @@ define <8 x i64> @combine_vpermt2var_8i6
; X32-LABEL: combine_vpermt2var_8i64_as_vpermq:
; X32: # BB#0:
; X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,0,2,0,1,0,0,0,7,0,6,0,5,0,4,0]
-; X32-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
-; X32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [12,0,5,0,14,0,7,0,8,0,1,0,10,0,3,0]
-; X32-NEXT: vpermt2q %zmm0, %zmm1, %zmm0
+; X32-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; X32-NEXT: vmovdqa64 {{.*#+}} zmm0 = [12,0,5,0,14,0,7,0,8,0,1,0,10,0,3,0]
+; X32-NEXT: vpermi2q %zmm2, %zmm2, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_8i64_as_vpermq:
@@ -1008,15 +1008,15 @@ define <8 x double> @combine_vpermi2var_
; X32-LABEL: combine_vpermi2var_vpermt2var_8f64_as_vperm2:
; X32: # BB#0:
; X32-NEXT: vmovapd {{.*#+}} zmm2 = [4,0,14,0,3,0,12,0,7,0,8,0,0,0,15,0]
-; X32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; X32-NEXT: vmovapd %zmm1, %zmm0
+; X32-NEXT: vpermi2pd %zmm0, %zmm1, %zmm2
+; X32-NEXT: vmovapd %zmm2, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermi2var_vpermt2var_8f64_as_vperm2:
; X64: # BB#0:
; X64-NEXT: vmovapd {{.*#+}} zmm2 = [4,14,3,12,7,8,0,15]
-; X64-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
-; X64-NEXT: vmovapd %zmm1, %zmm0
+; X64-NEXT: vpermi2pd %zmm0, %zmm1, %zmm2
+; X64-NEXT: vmovapd %zmm2, %zmm0
; X64-NEXT: retq
%res0 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> <i64 15, i64 0, i64 8, i64 7, i64 12, i64 6, i64 11, i64 4>, <8 x double> %x1, i8 -1)
%res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> <i64 12, i64 5, i64 14, i64 7, i64 8, i64 1, i64 10, i64 3>, <8 x double> %res0, <8 x double> %res0, i8 -1)
@@ -1044,15 +1044,15 @@ define <32 x i16> @combine_vpermt2var_vp
; X32-LABEL: combine_vpermt2var_vpermi2var_32i16_as_permw:
; X32: # BB#0:
; X32-NEXT: vmovdqu16 {{.*#+}} zmm2 = [17,39,19,38,21,37,23,36,25,35,27,34,29,33,31,32,1,47,3,46,5,45,7,44,9,43,11,42,13,41,15,40]
-; X32-NEXT: vpermt2w %zmm0, %zmm2, %zmm1
-; X32-NEXT: vmovdqa64 %zmm1, %zmm0
+; X32-NEXT: vpermi2w %zmm0, %zmm1, %zmm2
+; X32-NEXT: vmovdqa64 %zmm2, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_vpermi2var_32i16_as_permw:
; X64: # BB#0:
; X64-NEXT: vmovdqu16 {{.*#+}} zmm2 = [17,39,19,38,21,37,23,36,25,35,27,34,29,33,31,32,1,47,3,46,5,45,7,44,9,43,11,42,13,41,15,40]
-; X64-NEXT: vpermt2w %zmm0, %zmm2, %zmm1
-; X64-NEXT: vmovdqa64 %zmm1, %zmm0
+; X64-NEXT: vpermi2w %zmm0, %zmm1, %zmm2
+; X64-NEXT: vmovdqa64 %zmm2, %zmm0
; X64-NEXT: retq
%res0 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> <i16 0, i16 63, i16 1, i16 61, i16 2, i16 59, i16 3, i16 57, i16 4, i16 55, i16 5, i16 53, i16 6, i16 51, i16 7, i16 49, i16 8, i16 47, i16 9, i16 45, i16 10, i16 43, i16 11, i16 41, i16 12, i16 39, i16 13, i16 37, i16 14, i16 35, i16 15, i16 33>, <32 x i16> %x0, <32 x i16> %x1, i32 -1)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %res0, <32 x i16> <i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0, i16 31, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16>, <32 x i16> %res0, i32 -1)
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll?rev=287621&r1=287620&r2=287621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll Mon Nov 21 22:57:34 2016
@@ -23,18 +23,18 @@ define <16 x i16> @combine_vpermt2var_16
; X32: # BB#0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vmovdqu16 {{.*#+}} ymm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
-; X32-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 {%k1} {z}
-; X32-NEXT: vmovdqu16 {{.*#+}} ymm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
-; X32-NEXT: vpermt2w %ymm0, %ymm1, %ymm0 {%k1} {z}
+; X32-NEXT: vpermi2w %ymm1, %ymm0, %ymm2 {%k1} {z}
+; X32-NEXT: vmovdqu16 {{.*#+}} ymm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
+; X32-NEXT: vpermi2w %ymm2, %ymm2, %ymm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_16i16_identity_mask:
; X64: # BB#0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vmovdqu16 {{.*#+}} ymm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
-; X64-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 {%k1} {z}
-; X64-NEXT: vmovdqu16 {{.*#+}} ymm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
-; X64-NEXT: vpermt2w %ymm0, %ymm1, %ymm0 {%k1} {z}
+; X64-NEXT: vpermi2w %ymm1, %ymm0, %ymm2 {%k1} {z}
+; X64-NEXT: vmovdqu16 {{.*#+}} ymm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
+; X64-NEXT: vpermi2w %ymm2, %ymm2, %ymm0 {%k1} {z}
; X64-NEXT: retq
%res0 = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> <i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <16 x i16> %x0, <16 x i16> %x1, i16 %m)
%res1 = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> <i16 15, i16 30, i16 13, i16 28, i16 11, i16 26, i16 9, i16 24, i16 7, i16 22, i16 5, i16 20, i16 3, i16 18, i16 1, i16 16>, <16 x i16> %res0, <16 x i16> %res0, i16 %m)
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll?rev=287621&r1=287620&r2=287621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll Mon Nov 21 22:57:34 2016
@@ -38,18 +38,18 @@ define <16 x i8> @combine_vpermt2var_16i
; X32: # BB#0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vmovdqu8 {{.*#+}} xmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
-; X32-NEXT: vpermt2b %xmm1, %xmm2, %xmm0 {%k1} {z}
-; X32-NEXT: vmovdqu8 {{.*#+}} xmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
-; X32-NEXT: vpermt2b %xmm0, %xmm1, %xmm0 {%k1} {z}
+; X32-NEXT: vpermi2b %xmm1, %xmm0, %xmm2 {%k1} {z}
+; X32-NEXT: vmovdqu8 {{.*#+}} xmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
+; X32-NEXT: vpermi2b %xmm2, %xmm2, %xmm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_16i8_identity_mask:
; X64: # BB#0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vmovdqu8 {{.*#+}} xmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
-; X64-NEXT: vpermt2b %xmm1, %xmm2, %xmm0 {%k1} {z}
-; X64-NEXT: vmovdqu8 {{.*#+}} xmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
-; X64-NEXT: vpermt2b %xmm0, %xmm1, %xmm0 {%k1} {z}
+; X64-NEXT: vpermi2b %xmm1, %xmm0, %xmm2 {%k1} {z}
+; X64-NEXT: vmovdqu8 {{.*#+}} xmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
+; X64-NEXT: vpermi2b %xmm2, %xmm2, %xmm0 {%k1} {z}
; X64-NEXT: retq
%res0 = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <16 x i8> %x0, <16 x i8> %x1, i16 %m)
%res1 = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> <i8 15, i8 30, i8 13, i8 28, i8 11, i8 26, i8 9, i8 24, i8 7, i8 22, i8 5, i8 20, i8 3, i8 18, i8 1, i8 16>, <16 x i8> %res0, <16 x i8> %res0, i16 %m)
@@ -109,8 +109,7 @@ define <16 x i8> @combine_vpermt2var_vpe
; X32-NEXT: vmovdqu8 {{.*#+}} xmm2 = [0,31,2,29,4,27,6,25,8,23,10,21,12,19,14,17]
; X32-NEXT: vpermi2b %xmm1, %xmm0, %xmm2
; X32-NEXT: vmovdqu8 {{.*#+}} xmm0 = [0,17,2,18,4,19,6,21,8,23,10,25,12,27,14,29]
-; X32-NEXT: vpermt2b %xmm2, %xmm0, %xmm2
-; X32-NEXT: vmovdqa64 %xmm2, %xmm0
+; X32-NEXT: vpermi2b %xmm2, %xmm2, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_vpermi2var_16i8_as_vperm2:
@@ -118,8 +117,7 @@ define <16 x i8> @combine_vpermt2var_vpe
; X64-NEXT: vmovdqu8 {{.*#+}} xmm2 = [0,31,2,29,4,27,6,25,8,23,10,21,12,19,14,17]
; X64-NEXT: vpermi2b %xmm1, %xmm0, %xmm2
; X64-NEXT: vmovdqu8 {{.*#+}} xmm0 = [0,17,2,18,4,19,6,21,8,23,10,25,12,27,14,29]
-; X64-NEXT: vpermt2b %xmm2, %xmm0, %xmm2
-; X64-NEXT: vmovdqa64 %xmm2, %xmm0
+; X64-NEXT: vpermi2b %xmm2, %xmm2, %xmm0
; X64-NEXT: retq
%res0 = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> <i8 0, i8 31, i8 2, i8 29, i8 4, i8 27, i8 6, i8 25, i8 8, i8 23, i8 10, i8 21, i8 12, i8 19, i8 14, i8 17>, <16 x i8> %x1, i16 -1)
%res1 = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> <i8 0, i8 17, i8 2, i8 18, i8 4, i8 19, i8 6, i8 21, i8 8, i8 23, i8 10, i8 25, i8 12, i8 27, i8 14, i8 29>, <16 x i8> %res0, <16 x i8> %res0, i16 -1)
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll?rev=287621&r1=287620&r2=287621&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll Mon Nov 21 22:57:34 2016
@@ -105,8 +105,8 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7
; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k2} {z}
; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z}
; AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm3 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
-; AVX512F-NEXT: vpermt2d %zmm1, %zmm3, %zmm2
-; AVX512F-NEXT: vpslld $31, %zmm2, %zmm1
+; AVX512F-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; AVX512F-NEXT: vpslld $31, %zmm3, %zmm1
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1
; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
@@ -119,8 +119,8 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7
; VL_BW_DQ-NEXT: vpmovm2d %k1, %zmm0
; VL_BW_DQ-NEXT: vpmovm2d %k0, %zmm1
; VL_BW_DQ-NEXT: vmovdqa32 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
-; VL_BW_DQ-NEXT: vpermt2d %zmm0, %zmm2, %zmm1
-; VL_BW_DQ-NEXT: vpmovd2m %zmm1, %k0
+; VL_BW_DQ-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; VL_BW_DQ-NEXT: vpmovd2m %zmm2, %k0
; VL_BW_DQ-NEXT: vpmovm2b %k0, %xmm0
; VL_BW_DQ-NEXT: retq
%a2 = icmp eq <16 x i32> %a, %a1
@@ -189,8 +189,8 @@ define i8 @shuf8i1_10_2_9_u_3_u_2_u(i8 %
; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = <8,2,10,u,3,u,2,u>
-; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
-; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
+; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: retq
@@ -201,8 +201,8 @@ define i8 @shuf8i1_10_2_9_u_3_u_2_u(i8 %
; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0
; VL_BW_DQ-NEXT: vpxord %zmm1, %zmm1, %zmm1
; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm2 = <8,2,10,u,3,u,2,u>
-; VL_BW_DQ-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
-; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; VL_BW_DQ-NEXT: vpmovq2m %zmm2, %k0
; VL_BW_DQ-NEXT: kmovb %k0, %eax
; VL_BW_DQ-NEXT: retq
%b = bitcast i8 %a to <8 x i1>
@@ -245,8 +245,8 @@ define i8 @shuf8i1_9_6_1_0_3_7_7_0(i8 %a
; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,6,1,0,3,7,7,0]
-; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
-; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
+; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: retq
@@ -257,8 +257,8 @@ define i8 @shuf8i1_9_6_1_0_3_7_7_0(i8 %a
; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0
; VL_BW_DQ-NEXT: vpxord %zmm1, %zmm1, %zmm1
; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,6,1,0,3,7,7,0]
-; VL_BW_DQ-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
-; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; VL_BW_DQ-NEXT: vpmovq2m %zmm2, %k0
; VL_BW_DQ-NEXT: kmovb %k0, %eax
; VL_BW_DQ-NEXT: retq
%b = bitcast i8 %a to <8 x i1>
@@ -307,8 +307,8 @@ define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8
; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k2} {z}
; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [9,6,1,0,3,7,7,1]
-; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm0
+; AVX512F-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: retq
@@ -321,8 +321,8 @@ define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8
; VL_BW_DQ-NEXT: vpmovm2q %k1, %zmm0
; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm1
; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm2 = [9,6,1,0,3,7,7,1]
-; VL_BW_DQ-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
-; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; VL_BW_DQ-NEXT: vpmovq2m %zmm2, %k0
; VL_BW_DQ-NEXT: kmovb %k0, %eax
; VL_BW_DQ-NEXT: retq
%b = bitcast i8 %a to <8 x i1>
@@ -340,8 +340,8 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} {z}
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [9,1,2,3,4,5,6,7]
-; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
-; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
+; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: retq
More information about the llvm-commits
mailing list