[llvm] r277327 - [AVX-512] Fix duplicate column in AVX512 execution dependency table that was preventing VMOVDQU32/VMOVDQA32 from being recognized. Fix a bug in the code that stops execution dependency fix from turning operations on 32-bit integer element types into operations on 64-bit integer element types.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 1 00:55:34 PDT 2016
Author: ctopper
Date: Mon Aug 1 02:55:33 2016
New Revision: 277327
URL: http://llvm.org/viewvc/llvm-project?rev=277327&view=rev
Log:
[AVX-512] Fix duplicate column in AVX512 execution dependency table that was preventing VMOVDQU32/VMOVDQA32 from being recognized. Fix a bug in the code that stops execution dependency fix from turning operations on 32-bit integer element types into operations on 64-bit integer element types.
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/avx512-arith.ll
llvm/trunk/test/CodeGen/X86/avx512-bugfix-25270.ll
llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll
llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
llvm/trunk/test/CodeGen/X86/avx512-extract-subvector.ll
llvm/trunk/test/CodeGen/X86/avx512-logic.ll
llvm/trunk/test/CodeGen/X86/avx512-mov.ll
llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll
llvm/trunk/test/CodeGen/X86/avx512vl-mov.ll
llvm/trunk/test/CodeGen/X86/fma_patterns.ll
llvm/trunk/test/CodeGen/X86/fma_patterns_wide.ll
llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll
llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll
llvm/trunk/test/CodeGen/X86/vector-lzcnt-128.ll
llvm/trunk/test/CodeGen/X86/vector-lzcnt-256.ll
llvm/trunk/test/CodeGen/X86/vector-tzcnt-128.ll
llvm/trunk/test/CodeGen/X86/vector-tzcnt-256.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=277327&r1=277326&r2=277327&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Mon Aug 1 02:55:33 2016
@@ -7318,22 +7318,22 @@ static const uint16_t ReplaceableInstrsA
static const uint16_t ReplaceableInstrsAVX512[][4] = {
// Two integer columns for 64-bit and 32-bit elements.
- //PackedSingle PackedDouble PackedInt PackedInt
- { X86::VMOVAPSZ128mr, X86::VMOVAPDZ128mr, X86::VMOVDQA64Z128mr, X86::VMOVDQA64Z128mr },
- { X86::VMOVAPSZ128rm, X86::VMOVAPDZ128rm, X86::VMOVDQA64Z128rm, X86::VMOVDQA64Z128rm },
- { X86::VMOVAPSZ128rr, X86::VMOVAPDZ128rr, X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128rr },
- { X86::VMOVUPSZ128mr, X86::VMOVUPDZ128mr, X86::VMOVDQU64Z128mr, X86::VMOVDQU64Z128mr },
- { X86::VMOVUPSZ128rm, X86::VMOVUPDZ128rm, X86::VMOVDQU64Z128rm, X86::VMOVDQU64Z128rm },
- { X86::VMOVAPSZ256mr, X86::VMOVAPDZ256mr, X86::VMOVDQA64Z256mr, X86::VMOVDQA64Z256mr },
- { X86::VMOVAPSZ256rm, X86::VMOVAPDZ256rm, X86::VMOVDQA64Z256rm, X86::VMOVDQA64Z256rm },
- { X86::VMOVAPSZ256rr, X86::VMOVAPDZ256rr, X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256rr },
- { X86::VMOVUPSZ256mr, X86::VMOVUPDZ256mr, X86::VMOVDQU64Z256mr, X86::VMOVDQU64Z256mr },
- { X86::VMOVUPSZ256rm, X86::VMOVUPDZ256rm, X86::VMOVDQU64Z256rm, X86::VMOVDQU64Z256rm },
- { X86::VMOVAPSZmr, X86::VMOVAPDZmr, X86::VMOVDQA64Zmr, X86::VMOVDQA64Zmr },
- { X86::VMOVAPSZrm, X86::VMOVAPDZrm, X86::VMOVDQA64Zrm, X86::VMOVDQA64Zrm },
- { X86::VMOVAPSZrr, X86::VMOVAPDZrr, X86::VMOVDQA64Zrr, X86::VMOVDQA64Zrr },
- { X86::VMOVUPSZmr, X86::VMOVUPDZmr, X86::VMOVDQU64Zmr, X86::VMOVDQU64Zmr },
- { X86::VMOVUPSZrm, X86::VMOVUPDZrm, X86::VMOVDQU64Zrm, X86::VMOVDQU64Zrm },
+ //PackedSingle PackedDouble PackedInt PackedInt
+ { X86::VMOVAPSZ128mr, X86::VMOVAPDZ128mr, X86::VMOVDQA64Z128mr, X86::VMOVDQA32Z128mr },
+ { X86::VMOVAPSZ128rm, X86::VMOVAPDZ128rm, X86::VMOVDQA64Z128rm, X86::VMOVDQA32Z128rm },
+ { X86::VMOVAPSZ128rr, X86::VMOVAPDZ128rr, X86::VMOVDQA64Z128rr, X86::VMOVDQA32Z128rr },
+ { X86::VMOVUPSZ128mr, X86::VMOVUPDZ128mr, X86::VMOVDQU64Z128mr, X86::VMOVDQU32Z128mr },
+ { X86::VMOVUPSZ128rm, X86::VMOVUPDZ128rm, X86::VMOVDQU64Z128rm, X86::VMOVDQU32Z128rm },
+ { X86::VMOVAPSZ256mr, X86::VMOVAPDZ256mr, X86::VMOVDQA64Z256mr, X86::VMOVDQA32Z256mr },
+ { X86::VMOVAPSZ256rm, X86::VMOVAPDZ256rm, X86::VMOVDQA64Z256rm, X86::VMOVDQA32Z256rm },
+ { X86::VMOVAPSZ256rr, X86::VMOVAPDZ256rr, X86::VMOVDQA64Z256rr, X86::VMOVDQA32Z256rr },
+ { X86::VMOVUPSZ256mr, X86::VMOVUPDZ256mr, X86::VMOVDQU64Z256mr, X86::VMOVDQU32Z256mr },
+ { X86::VMOVUPSZ256rm, X86::VMOVUPDZ256rm, X86::VMOVDQU64Z256rm, X86::VMOVDQU32Z256rm },
+ { X86::VMOVAPSZmr, X86::VMOVAPDZmr, X86::VMOVDQA64Zmr, X86::VMOVDQA32Zmr },
+ { X86::VMOVAPSZrm, X86::VMOVAPDZrm, X86::VMOVDQA64Zrm, X86::VMOVDQA32Zrm },
+ { X86::VMOVAPSZrr, X86::VMOVAPDZrr, X86::VMOVDQA64Zrr, X86::VMOVDQA32Zrr },
+ { X86::VMOVUPSZmr, X86::VMOVUPDZmr, X86::VMOVDQU64Zmr, X86::VMOVDQU32Zmr },
+ { X86::VMOVUPSZrm, X86::VMOVUPDZrm, X86::VMOVDQU64Zrm, X86::VMOVDQU32Zrm },
};
static const uint16_t ReplaceableInstrsAVX512DQ[][4] = {
@@ -7427,14 +7427,14 @@ void X86InstrInfo::setExecutionDomain(Ma
assert(Subtarget.hasAVX512() && "Requires AVX-512");
table = lookupAVX512(MI.getOpcode(), dom);
// Don't change integer Q instructions to D instructions.
- if (table && dom == 3 && table[3] == MI.getOpcode())
+ if (table && Domain == 3 && table[3] == MI.getOpcode())
Domain = 4;
}
if (!table) { // try the AVX512DQ table
assert((Subtarget.hasDQI() || Domain >=3) && "Requires AVX-512DQ");
table = lookupAVX512DQ(MI.getOpcode(), dom);
// Don't change integer Q instructions to D instructions.
- if (table && dom == 3 && table[3] == MI.getOpcode())
+ if (table && Domain == 3 && table[3] == MI.getOpcode())
Domain = 4;
}
assert(table && "Cannot change domain");
Modified: llvm/trunk/test/CodeGen/X86/avx512-arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-arith.ll?rev=277327&r1=277326&r2=277327&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-arith.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-arith.ll Mon Aug 1 02:55:33 2016
@@ -603,10 +603,30 @@ define <8 x i64> @orq_broadcast(<8 x i64
}
define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
-; CHECK-LABEL: andd512fold:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpandd (%rdi), %zmm0, %zmm0
-; CHECK-NEXT: retq
+; AVX512F-LABEL: andd512fold:
+; AVX512F: ## BB#0: ## %entry
+; AVX512F-NEXT: vpandd (%rdi), %zmm0, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: andd512fold:
+; AVX512VL: ## BB#0: ## %entry
+; AVX512VL-NEXT: vpandd (%rdi), %zmm0, %zmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: andd512fold:
+; AVX512BW: ## BB#0: ## %entry
+; AVX512BW-NEXT: vpandd (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQ-LABEL: andd512fold:
+; AVX512DQ: ## BB#0: ## %entry
+; AVX512DQ-NEXT: vandps (%rdi), %zmm0, %zmm0
+; AVX512DQ-NEXT: retq
+;
+; SKX-LABEL: andd512fold:
+; SKX: ## BB#0: ## %entry
+; SKX-NEXT: vandps (%rdi), %zmm0, %zmm0
+; SKX-NEXT: retq
entry:
%a = load <16 x i32>, <16 x i32>* %x, align 4
%b = and <16 x i32> %y, %a
Modified: llvm/trunk/test/CodeGen/X86/avx512-bugfix-25270.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-bugfix-25270.ll?rev=277327&r1=277326&r2=277327&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-bugfix-25270.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-bugfix-25270.ll Mon Aug 1 02:55:33 2016
@@ -9,8 +9,8 @@ define void @bar__512(<16 x i32>* %var)
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: subq $112, %rsp
; CHECK-NEXT: movq %rdi, %rbx
-; CHECK-NEXT: vmovdqu32 (%rbx), %zmm0
-; CHECK-NEXT: vmovdqu64 %zmm0, (%rsp) ## 64-byte Spill
+; CHECK-NEXT: vmovups (%rbx), %zmm0
+; CHECK-NEXT: vmovups %zmm0, (%rsp) ## 64-byte Spill
; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm1
; CHECK-NEXT: vmovdqa32 %zmm1, (%rbx)
; CHECK-NEXT: callq _Print__512
Modified: llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll?rev=277327&r1=277326&r2=277327&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll Mon Aug 1 02:55:33 2016
@@ -4,15 +4,10 @@
; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=knl | FileCheck %s --check-prefix=KNL_X32
define <16 x i1> @test1() {
-; KNL-LABEL: test1:
-; KNL: ## BB#0:
-; KNL-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; KNL-NEXT: retq
-;
-; SKX-LABEL: test1:
-; SKX: ## BB#0:
-; SKX-NEXT: vpxord %xmm0, %xmm0, %xmm0
-; SKX-NEXT: retq
+; ALL_X64-LABEL: test1:
+; ALL_X64: ## BB#0:
+; ALL_X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; ALL_X64-NEXT: retq
;
; KNL_X32-LABEL: test1:
; KNL_X32: ## BB#0:
Modified: llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-cvt.ll?rev=277327&r1=277326&r2=277327&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-cvt.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-cvt.ll Mon Aug 1 02:55:33 2016
@@ -761,7 +761,7 @@ define <16 x double> @sitofp_16i1_double
;
; SKX-LABEL: sitofp_16i1_double:
; SKX: ## BB#0:
-; SKX-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; SKX-NEXT: vxorpd %zmm2, %zmm2, %zmm2
; SKX-NEXT: vcmpltpd %zmm1, %zmm2, %k0
; SKX-NEXT: vcmpltpd %zmm0, %zmm2, %k1
; SKX-NEXT: vpmovm2d %k1, %ymm0
@@ -787,7 +787,7 @@ define <8 x double> @sitofp_8i1_double(<
;
; SKX-LABEL: sitofp_8i1_double:
; SKX: ## BB#0:
-; SKX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; SKX-NEXT: vxorpd %zmm1, %zmm1, %zmm1
; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k0
; SKX-NEXT: vpmovm2d %k0, %ymm0
; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0
@@ -811,7 +811,7 @@ define <8 x float> @sitofp_8i1_float(<8
;
; SKX-LABEL: sitofp_8i1_float:
; SKX: ## BB#0:
-; SKX-NEXT: vpxord %ymm1, %ymm1, %ymm1
+; SKX-NEXT: vxorps %ymm1, %ymm1, %ymm1
; SKX-NEXT: vcmpltps %ymm0, %ymm1, %k0
; SKX-NEXT: vpmovm2d %k0, %ymm0
; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0
@@ -831,7 +831,7 @@ define <4 x float> @sitofp_4i1_float(<4
;
; SKX-LABEL: sitofp_4i1_float:
; SKX: ## BB#0:
-; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; SKX-NEXT: vcmpltps %xmm0, %xmm1, %k0
; SKX-NEXT: vpmovm2d %k0, %xmm0
; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0
@@ -854,7 +854,7 @@ define <4 x double> @sitofp_4i1_double(<
;
; SKX-LABEL: sitofp_4i1_double:
; SKX: ## BB#0:
-; SKX-NEXT: vpxord %ymm1, %ymm1, %ymm1
+; SKX-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; SKX-NEXT: vcmpltpd %ymm0, %ymm1, %k0
; SKX-NEXT: vpmovm2d %k0, %xmm0
; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0
@@ -890,7 +890,7 @@ define <2 x float> @sitofp_2i1_float(<2
;
; SKX-LABEL: sitofp_2i1_float:
; SKX: ## BB#0:
-; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; SKX-NEXT: vcmpltps %xmm0, %xmm1, %k0
; SKX-NEXT: vpmovm2d %k0, %xmm0
; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0
@@ -911,7 +911,7 @@ define <2 x double> @sitofp_2i1_double(<
;
; SKX-LABEL: sitofp_2i1_double:
; SKX: ## BB#0:
-; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %k0
; SKX-NEXT: vpmovm2q %k0, %xmm0
; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0
Modified: llvm/trunk/test/CodeGen/X86/avx512-extract-subvector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-extract-subvector.ll?rev=277327&r1=277326&r2=277327&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-extract-subvector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-extract-subvector.ll Mon Aug 1 02:55:33 2016
@@ -156,7 +156,7 @@ entry:
define void @extract_subvector256_v2i64_store_lo(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
; SKX-LABEL: extract_subvector256_v2i64_store_lo:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: vmovdqu64 %xmm0, (%rdi)
+; SKX-NEXT: vmovups %xmm0, (%rdi)
; SKX-NEXT: retq
entry:
%0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
@@ -168,7 +168,7 @@ entry:
define void @extract_subvector256_v4i32_store_lo(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp {
; SKX-LABEL: extract_subvector256_v4i32_store_lo:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: vmovdqu32 %xmm0, (%rdi)
+; SKX-NEXT: vmovups %xmm0, (%rdi)
; SKX-NEXT: retq
entry:
%0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -180,7 +180,7 @@ entry:
define void @extract_subvector256_v8i16_store_lo(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp {
; SKX-LABEL: extract_subvector256_v8i16_store_lo:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: vmovdqu32 %xmm0, (%rdi)
+; SKX-NEXT: vmovups %xmm0, (%rdi)
; SKX-NEXT: retq
entry:
%0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -192,7 +192,7 @@ entry:
define void @extract_subvector256_v16i8_store_lo(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp {
; SKX-LABEL: extract_subvector256_v16i8_store_lo:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: vmovdqu32 %xmm0, (%rdi)
+; SKX-NEXT: vmovups %xmm0, (%rdi)
; SKX-NEXT: retq
entry:
%0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -228,7 +228,7 @@ entry:
define void @extract_subvector512_v2i64_store_lo(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
; SKX-LABEL: extract_subvector512_v2i64_store_lo:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: vmovdqu64 %xmm0, (%rdi)
+; SKX-NEXT: vmovups %xmm0, (%rdi)
; SKX-NEXT: retq
entry:
%0 = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
@@ -240,7 +240,7 @@ entry:
define void @extract_subvector512_v4i32_store_lo(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
; SKX-LABEL: extract_subvector512_v4i32_store_lo:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: vmovdqu32 %xmm0, (%rdi)
+; SKX-NEXT: vmovups %xmm0, (%rdi)
; SKX-NEXT: retq
entry:
%0 = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -252,7 +252,7 @@ entry:
define void @extract_subvector512_v8i16_store_lo(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp {
; SKX-LABEL: extract_subvector512_v8i16_store_lo:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: vmovdqu32 %xmm0, (%rdi)
+; SKX-NEXT: vmovups %xmm0, (%rdi)
; SKX-NEXT: retq
entry:
%0 = shufflevector <32 x i16> %a, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -264,7 +264,7 @@ entry:
define void @extract_subvector512_v16i8_store_lo(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
; SKX-LABEL: extract_subvector512_v16i8_store_lo:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: vmovdqu32 %xmm0, (%rdi)
+; SKX-NEXT: vmovups %xmm0, (%rdi)
; SKX-NEXT: retq
entry:
%0 = shufflevector <64 x i8> %a, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -300,7 +300,7 @@ entry:
define void @extract_subvector512_v4i64_store_lo(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
; SKX-LABEL: extract_subvector512_v4i64_store_lo:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: vmovdqu64 %ymm0, (%rdi)
+; SKX-NEXT: vmovups %ymm0, (%rdi)
; SKX-NEXT: retq
entry:
%0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -312,7 +312,7 @@ entry:
define void @extract_subvector512_v8i32_store_lo(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
; SKX-LABEL: extract_subvector512_v8i32_store_lo:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: vmovdqu32 %ymm0, (%rdi)
+; SKX-NEXT: vmovups %ymm0, (%rdi)
; SKX-NEXT: retq
entry:
%0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -324,7 +324,7 @@ entry:
define void @extract_subvector512_v16i16_store_lo(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp {
; SKX-LABEL: extract_subvector512_v16i16_store_lo:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: vmovdqu32 %ymm0, (%rdi)
+; SKX-NEXT: vmovups %ymm0, (%rdi)
; SKX-NEXT: retq
entry:
%0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -336,7 +336,7 @@ entry:
define void @extract_subvector512_v32i8_store_lo(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
; SKX-LABEL: extract_subvector512_v32i8_store_lo:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: vmovdqu32 %ymm0, (%rdi)
+; SKX-NEXT: vmovups %ymm0, (%rdi)
; SKX-NEXT: retq
entry:
%0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
Modified: llvm/trunk/test/CodeGen/X86/avx512-logic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-logic.ll?rev=277327&r1=277326&r2=277327&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-logic.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-logic.ll Mon Aug 1 02:55:33 2016
@@ -125,10 +125,15 @@ define <8 x i64> @orq_broadcast(<8 x i64
}
define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
-; ALL-LABEL: andd512fold:
-; ALL: ## BB#0: ## %entry
-; ALL-NEXT: vpandd (%rdi), %zmm0, %zmm0
-; ALL-NEXT: retq
+; KNL-LABEL: andd512fold:
+; KNL: ## BB#0: ## %entry
+; KNL-NEXT: vpandd (%rdi), %zmm0, %zmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: andd512fold:
+; SKX: ## BB#0: ## %entry
+; SKX-NEXT: vandps (%rdi), %zmm0, %zmm0
+; SKX-NEXT: retq
entry:
%a = load <16 x i32>, <16 x i32>* %x, align 4
%b = and <16 x i32> %y, %a
Modified: llvm/trunk/test/CodeGen/X86/avx512-mov.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mov.ll?rev=277327&r1=277326&r2=277327&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mov.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mov.ll Mon Aug 1 02:55:33 2016
@@ -151,7 +151,7 @@ define <4 x i32> @test15(i32* %x) {
define <16 x i32> @test16(i8 * %addr) {
; CHECK-LABEL: test16:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqu32 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7e,0x48,0x6f,0x07]
+; CHECK-NEXT: vmovups (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <16 x i32>*
%res = load <16 x i32>, <16 x i32>* %vaddr, align 1
@@ -161,7 +161,7 @@ define <16 x i32> @test16(i8 * %addr) {
define <16 x i32> @test17(i8 * %addr) {
; CHECK-LABEL: test17:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6f,0x07]
+; CHECK-NEXT: vmovaps (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <16 x i32>*
%res = load <16 x i32>, <16 x i32>* %vaddr, align 64
@@ -171,7 +171,7 @@ define <16 x i32> @test17(i8 * %addr) {
define void @test18(i8 * %addr, <8 x i64> %data) {
; CHECK-LABEL: test18:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqa64 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x07]
+; CHECK-NEXT: vmovaps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x i64>*
store <8 x i64>%data, <8 x i64>* %vaddr, align 64
@@ -181,7 +181,7 @@ define void @test18(i8 * %addr, <8 x i64
define void @test19(i8 * %addr, <16 x i32> %data) {
; CHECK-LABEL: test19:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqu32 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x48,0x7f,0x07]
+; CHECK-NEXT: vmovups %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <16 x i32>*
store <16 x i32>%data, <16 x i32>* %vaddr, align 1
@@ -191,7 +191,7 @@ define void @test19(i8 * %addr, <16 x i3
define void @test20(i8 * %addr, <16 x i32> %data) {
; CHECK-LABEL: test20:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqa32 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x48,0x7f,0x07]
+; CHECK-NEXT: vmovaps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <16 x i32>*
store <16 x i32>%data, <16 x i32>* %vaddr, align 64
@@ -201,7 +201,7 @@ define void @test20(i8 * %addr, <16 x i3
define <8 x i64> @test21(i8 * %addr) {
; CHECK-LABEL: test21:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x07]
+; CHECK-NEXT: vmovaps (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x i64>*
%res = load <8 x i64>, <8 x i64>* %vaddr, align 64
@@ -211,7 +211,7 @@ define <8 x i64> @test21(i8 * %addr) {
define void @test22(i8 * %addr, <8 x i64> %data) {
; CHECK-LABEL: test22:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqu64 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x07]
+; CHECK-NEXT: vmovups %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x i64>*
store <8 x i64>%data, <8 x i64>* %vaddr, align 1
@@ -221,7 +221,7 @@ define void @test22(i8 * %addr, <8 x i64
define <8 x i64> @test23(i8 * %addr) {
; CHECK-LABEL: test23:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07]
+; CHECK-NEXT: vmovups (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x i64>*
%res = load <8 x i64>, <8 x i64>* %vaddr, align 1
Modified: llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll?rev=277327&r1=277326&r2=277327&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll Mon Aug 1 02:55:33 2016
@@ -79,7 +79,7 @@ define <4 x float> @test7(<4 x float> %a
;
; SKX-LABEL: test7:
; SKX: ## BB#0:
-; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2
; SKX-NEXT: vcmpltps %xmm2, %xmm0, %k1
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
; SKX-NEXT: retq
@@ -99,7 +99,7 @@ define <2 x double> @test8(<2 x double>
;
; SKX-LABEL: test8:
; SKX: ## BB#0:
-; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; SKX-NEXT: vcmpltpd %xmm2, %xmm0, %k1
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
; SKX-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-mov.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-mov.ll?rev=277327&r1=277326&r2=277327&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-mov.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-mov.ll Mon Aug 1 02:55:33 2016
@@ -4,7 +4,7 @@
define <8 x i32> @test_256_1(i8 * %addr) {
; CHECK-LABEL: test_256_1:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x6f,0x07]
+; CHECK-NEXT: vmovups (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x i32>*
%res = load <8 x i32>, <8 x i32>* %vaddr, align 1
@@ -14,7 +14,7 @@ define <8 x i32> @test_256_1(i8 * %addr)
define <8 x i32> @test_256_2(i8 * %addr) {
; CHECK-LABEL: test_256_2:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x07]
+; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x i32>*
%res = load <8 x i32>, <8 x i32>* %vaddr, align 32
@@ -24,7 +24,7 @@ define <8 x i32> @test_256_2(i8 * %addr)
define void @test_256_3(i8 * %addr, <4 x i64> %data) {
; CHECK-LABEL: test_256_3:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqa64 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x28,0x7f,0x07]
+; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x i64>*
store <4 x i64>%data, <4 x i64>* %vaddr, align 32
@@ -34,7 +34,7 @@ define void @test_256_3(i8 * %addr, <4 x
define void @test_256_4(i8 * %addr, <8 x i32> %data) {
; CHECK-LABEL: test_256_4:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqu32 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x28,0x7f,0x07]
+; CHECK-NEXT: vmovups %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x i32>*
store <8 x i32>%data, <8 x i32>* %vaddr, align 1
@@ -44,7 +44,7 @@ define void @test_256_4(i8 * %addr, <8 x
define void @test_256_5(i8 * %addr, <8 x i32> %data) {
; CHECK-LABEL: test_256_5:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqa32 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x28,0x7f,0x07]
+; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x i32>*
store <8 x i32>%data, <8 x i32>* %vaddr, align 32
@@ -54,7 +54,7 @@ define void @test_256_5(i8 * %addr, <8 x
define <4 x i64> @test_256_6(i8 * %addr) {
; CHECK-LABEL: test_256_6:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0x07]
+; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x i64>*
%res = load <4 x i64>, <4 x i64>* %vaddr, align 32
@@ -64,7 +64,7 @@ define <4 x i64> @test_256_6(i8 * %addr
define void @test_256_7(i8 * %addr, <4 x i64> %data) {
; CHECK-LABEL: test_256_7:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqu64 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x28,0x7f,0x07]
+; CHECK-NEXT: vmovups %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x i64>*
store <4 x i64>%data, <4 x i64>* %vaddr, align 1
@@ -74,7 +74,7 @@ define void @test_256_7(i8 * %addr, <4 x
define <4 x i64> @test_256_8(i8 * %addr) {
; CHECK-LABEL: test_256_8:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfe,0x28,0x6f,0x07]
+; CHECK-NEXT: vmovups (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x i64>*
%res = load <4 x i64>, <4 x i64>* %vaddr, align 1
@@ -392,7 +392,7 @@ define <4 x double> @test_256_32(i8 * %a
define <4 x i32> @test_128_1(i8 * %addr) {
; CHECK-LABEL: test_128_1:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqu32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x6f,0x07]
+; CHECK-NEXT: vmovups (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x i32>*
%res = load <4 x i32>, <4 x i32>* %vaddr, align 1
@@ -402,7 +402,7 @@ define <4 x i32> @test_128_1(i8 * %addr)
define <4 x i32> @test_128_2(i8 * %addr) {
; CHECK-LABEL: test_128_2:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6f,0x07]
+; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x i32>*
%res = load <4 x i32>, <4 x i32>* %vaddr, align 16
@@ -412,7 +412,7 @@ define <4 x i32> @test_128_2(i8 * %addr)
define void @test_128_3(i8 * %addr, <2 x i64> %data) {
; CHECK-LABEL: test_128_3:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqa64 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x08,0x7f,0x07]
+; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <2 x i64>*
store <2 x i64>%data, <2 x i64>* %vaddr, align 16
@@ -422,7 +422,7 @@ define void @test_128_3(i8 * %addr, <2 x
define void @test_128_4(i8 * %addr, <4 x i32> %data) {
; CHECK-LABEL: test_128_4:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqu32 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x08,0x7f,0x07]
+; CHECK-NEXT: vmovups %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x i32>*
store <4 x i32>%data, <4 x i32>* %vaddr, align 1
@@ -432,7 +432,7 @@ define void @test_128_4(i8 * %addr, <4 x
define void @test_128_5(i8 * %addr, <4 x i32> %data) {
; CHECK-LABEL: test_128_5:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqa32 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x08,0x7f,0x07]
+; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x i32>*
store <4 x i32>%data, <4 x i32>* %vaddr, align 16
@@ -442,7 +442,7 @@ define void @test_128_5(i8 * %addr, <4 x
define <2 x i64> @test_128_6(i8 * %addr) {
; CHECK-LABEL: test_128_6:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x07]
+; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <2 x i64>*
%res = load <2 x i64>, <2 x i64>* %vaddr, align 16
@@ -452,7 +452,7 @@ define <2 x i64> @test_128_6(i8 * %addr
define void @test_128_7(i8 * %addr, <2 x i64> %data) {
; CHECK-LABEL: test_128_7:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqu64 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x08,0x7f,0x07]
+; CHECK-NEXT: vmovups %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <2 x i64>*
store <2 x i64>%data, <2 x i64>* %vaddr, align 1
@@ -462,7 +462,7 @@ define void @test_128_7(i8 * %addr, <2 x
define <2 x i64> @test_128_8(i8 * %addr) {
; CHECK-LABEL: test_128_8:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovdqu64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x6f,0x07]
+; CHECK-NEXT: vmovups (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <2 x i64>*
%res = load <2 x i64>, <2 x i64>* %vaddr, align 1
Modified: llvm/trunk/test/CodeGen/X86/fma_patterns.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma_patterns.ll?rev=277327&r1=277326&r2=277327&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma_patterns.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma_patterns.ll Mon Aug 1 02:55:33 2016
@@ -1131,7 +1131,7 @@ define <4 x float> @test_v4f32_fneg_fmul
;
; AVX512-LABEL: test_v4f32_fneg_fmul:
; AVX512: # BB#0:
-; AVX512-NEXT: vpxord %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
; AVX512-NEXT: retq
%m = fmul nsz <4 x float> %x, %y
@@ -1154,7 +1154,7 @@ define <4 x double> @test_v4f64_fneg_fmu
;
; AVX512-LABEL: test_v4f64_fneg_fmul:
; AVX512: # BB#0:
-; AVX512-NEXT: vpxord %ymm2, %ymm2, %ymm2
+; AVX512-NEXT: vxorpd %ymm2, %ymm2, %ymm2
; AVX512-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0
; AVX512-NEXT: retq
%m = fmul nsz <4 x double> %x, %y
Modified: llvm/trunk/test/CodeGen/X86/fma_patterns_wide.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma_patterns_wide.ll?rev=277327&r1=277326&r2=277327&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma_patterns_wide.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma_patterns_wide.ll Mon Aug 1 02:55:33 2016
@@ -749,7 +749,7 @@ define <16 x float> @test_v16f32_fneg_fm
;
; AVX512-LABEL: test_v16f32_fneg_fmul:
; AVX512: # BB#0:
-; AVX512-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; AVX512-NEXT: vxorps %zmm2, %zmm2, %zmm2
; AVX512-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0
; AVX512-NEXT: retq
%m = fmul nsz <16 x float> %x, %y
@@ -774,7 +774,7 @@ define <8 x double> @test_v8f64_fneg_fmu
;
; AVX512-LABEL: test_v8f64_fneg_fmul:
; AVX512: # BB#0:
-; AVX512-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; AVX512-NEXT: vxorpd %zmm2, %zmm2, %zmm2
; AVX512-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0
; AVX512-NEXT: retq
%m = fmul nsz <8 x double> %x, %y
Modified: llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll?rev=277327&r1=277326&r2=277327&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll Mon Aug 1 02:55:33 2016
@@ -419,13 +419,13 @@ define <16 x i32> @merge_16i32_i32_23u5u
define <16 x i32> @merge_16i32_i32_0uu3uuuuuuuuCuEF(i32* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF:
; ALL: # BB#0:
-; ALL-NEXT: vmovdqu32 (%rdi), %zmm0
+; ALL-NEXT: vmovups (%rdi), %zmm0
; ALL-NEXT: retq
;
; X32-AVX512F-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF:
; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT: vmovdqu32 (%eax), %zmm0
+; X32-AVX512F-NEXT: vmovups (%eax), %zmm0
; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0
%ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3
Modified: llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll?rev=277327&r1=277326&r2=277327&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll (original)
+++ llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll Mon Aug 1 02:55:33 2016
@@ -59,7 +59,7 @@ define <4 x i32> @test_v4i32(<4 x i32>*
;
; AVX512VL-LABEL: test_v4i32:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa32 (%rdi), %xmm0
+; AVX512VL-NEXT: vmovaps (%rdi), %xmm0
; AVX512VL-NEXT: retq
%1 = load <4 x i32>, <4 x i32>* %src, align 16, !nontemporal !1
ret <4 x i32> %1
@@ -229,7 +229,7 @@ define <8 x i32> @test_v8i32(<8 x i32>*
;
; AVX512VL-LABEL: test_v8i32:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa32 (%rdi), %ymm0
+; AVX512VL-NEXT: vmovaps (%rdi), %ymm0
; AVX512VL-NEXT: retq
%1 = load <8 x i32>, <8 x i32>* %src, align 32, !nontemporal !1
ret <8 x i32> %1
@@ -1165,20 +1165,10 @@ define <4 x i32> @test_unaligned_v4i32(<
; AVX-NEXT: vmovups (%rdi), %xmm0
; AVX-NEXT: retq
;
-; AVX512F-LABEL: test_unaligned_v4i32:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovups (%rdi), %xmm0
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: test_unaligned_v4i32:
-; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vmovups (%rdi), %xmm0
-; AVX512BW-NEXT: retq
-;
-; AVX512VL-LABEL: test_unaligned_v4i32:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu32 (%rdi), %xmm0
-; AVX512VL-NEXT: retq
+; AVX512-LABEL: test_unaligned_v4i32:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovups (%rdi), %xmm0
+; AVX512-NEXT: retq
%1 = load <4 x i32>, <4 x i32>* %src, align 1, !nontemporal !1
ret <4 x i32> %1
}
@@ -1213,20 +1203,10 @@ define <2 x i64> @test_unaligned_v2i64(<
; AVX-NEXT: vmovups (%rdi), %xmm0
; AVX-NEXT: retq
;
-; AVX512F-LABEL: test_unaligned_v2i64:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovups (%rdi), %xmm0
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: test_unaligned_v2i64:
-; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vmovups (%rdi), %xmm0
-; AVX512BW-NEXT: retq
-;
-; AVX512VL-LABEL: test_unaligned_v2i64:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu64 (%rdi), %xmm0
-; AVX512VL-NEXT: retq
+; AVX512-LABEL: test_unaligned_v2i64:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovups (%rdi), %xmm0
+; AVX512-NEXT: retq
%1 = load <2 x i64>, <2 x i64>* %src, align 1, !nontemporal !1
ret <2 x i64> %1
}
@@ -1242,20 +1222,10 @@ define <8 x i16> @test_unaligned_v8i16(<
; AVX-NEXT: vmovups (%rdi), %xmm0
; AVX-NEXT: retq
;
-; AVX512F-LABEL: test_unaligned_v8i16:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovups (%rdi), %xmm0
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: test_unaligned_v8i16:
-; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vmovups (%rdi), %xmm0
-; AVX512BW-NEXT: retq
-;
-; AVX512VL-LABEL: test_unaligned_v8i16:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu64 (%rdi), %xmm0
-; AVX512VL-NEXT: retq
+; AVX512-LABEL: test_unaligned_v8i16:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovups (%rdi), %xmm0
+; AVX512-NEXT: retq
%1 = load <8 x i16>, <8 x i16>* %src, align 1, !nontemporal !1
ret <8 x i16> %1
}
@@ -1271,20 +1241,10 @@ define <16 x i8> @test_unaligned_v16i8(<
; AVX-NEXT: vmovups (%rdi), %xmm0
; AVX-NEXT: retq
;
-; AVX512F-LABEL: test_unaligned_v16i8:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovups (%rdi), %xmm0
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: test_unaligned_v16i8:
-; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vmovups (%rdi), %xmm0
-; AVX512BW-NEXT: retq
-;
-; AVX512VL-LABEL: test_unaligned_v16i8:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu64 (%rdi), %xmm0
-; AVX512VL-NEXT: retq
+; AVX512-LABEL: test_unaligned_v16i8:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovups (%rdi), %xmm0
+; AVX512-NEXT: retq
%1 = load <16 x i8>, <16 x i8>* %src, align 1, !nontemporal !1
ret <16 x i8> %1
}
@@ -1323,20 +1283,10 @@ define <8 x i32> @test_unaligned_v8i32(<
; AVX-NEXT: vmovups (%rdi), %ymm0
; AVX-NEXT: retq
;
-; AVX512F-LABEL: test_unaligned_v8i32:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovups (%rdi), %ymm0
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: test_unaligned_v8i32:
-; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vmovups (%rdi), %ymm0
-; AVX512BW-NEXT: retq
-;
-; AVX512VL-LABEL: test_unaligned_v8i32:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu32 (%rdi), %ymm0
-; AVX512VL-NEXT: retq
+; AVX512-LABEL: test_unaligned_v8i32:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovups (%rdi), %ymm0
+; AVX512-NEXT: retq
%1 = load <8 x i32>, <8 x i32>* %src, align 1, !nontemporal !1
ret <8 x i32> %1
}
@@ -1373,20 +1323,10 @@ define <4 x i64> @test_unaligned_v4i64(<
; AVX-NEXT: vmovups (%rdi), %ymm0
; AVX-NEXT: retq
;
-; AVX512F-LABEL: test_unaligned_v4i64:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovups (%rdi), %ymm0
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: test_unaligned_v4i64:
-; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vmovups (%rdi), %ymm0
-; AVX512BW-NEXT: retq
-;
-; AVX512VL-LABEL: test_unaligned_v4i64:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu64 (%rdi), %ymm0
-; AVX512VL-NEXT: retq
+; AVX512-LABEL: test_unaligned_v4i64:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovups (%rdi), %ymm0
+; AVX512-NEXT: retq
%1 = load <4 x i64>, <4 x i64>* %src, align 1, !nontemporal !1
ret <4 x i64> %1
}
@@ -1403,20 +1343,10 @@ define <16 x i16> @test_unaligned_v16i16
; AVX-NEXT: vmovups (%rdi), %ymm0
; AVX-NEXT: retq
;
-; AVX512F-LABEL: test_unaligned_v16i16:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovups (%rdi), %ymm0
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: test_unaligned_v16i16:
-; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vmovups (%rdi), %ymm0
-; AVX512BW-NEXT: retq
-;
-; AVX512VL-LABEL: test_unaligned_v16i16:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu64 (%rdi), %ymm0
-; AVX512VL-NEXT: retq
+; AVX512-LABEL: test_unaligned_v16i16:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovups (%rdi), %ymm0
+; AVX512-NEXT: retq
%1 = load <16 x i16>, <16 x i16>* %src, align 1, !nontemporal !1
ret <16 x i16> %1
}
@@ -1433,20 +1363,10 @@ define <32 x i8> @test_unaligned_v32i8(<
; AVX-NEXT: vmovups (%rdi), %ymm0
; AVX-NEXT: retq
;
-; AVX512F-LABEL: test_unaligned_v32i8:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovups (%rdi), %ymm0
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: test_unaligned_v32i8:
-; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vmovups (%rdi), %ymm0
-; AVX512BW-NEXT: retq
-;
-; AVX512VL-LABEL: test_unaligned_v32i8:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu64 (%rdi), %ymm0
-; AVX512VL-NEXT: retq
+; AVX512-LABEL: test_unaligned_v32i8:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovups (%rdi), %ymm0
+; AVX512-NEXT: retq
%1 = load <32 x i8>, <32 x i8>* %src, align 1, !nontemporal !1
ret <32 x i8> %1
}
@@ -1493,7 +1413,7 @@ define <16 x i32> @test_unaligned_v16i32
;
; AVX512-LABEL: test_unaligned_v16i32:
; AVX512: # BB#0:
-; AVX512-NEXT: vmovdqu32 (%rdi), %zmm0
+; AVX512-NEXT: vmovups (%rdi), %zmm0
; AVX512-NEXT: retq
%1 = load <16 x i32>, <16 x i32>* %src, align 1, !nontemporal !1
ret <16 x i32> %1
@@ -1539,7 +1459,7 @@ define <8 x i64> @test_unaligned_v8i64(<
;
; AVX512-LABEL: test_unaligned_v8i64:
; AVX512: # BB#0:
-; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
+; AVX512-NEXT: vmovups (%rdi), %zmm0
; AVX512-NEXT: retq
%1 = load <8 x i64>, <8 x i64>* %src, align 1, !nontemporal !1
ret <8 x i64> %1
@@ -1573,8 +1493,8 @@ define <32 x i16> @test_unaligned_v32i16
;
; AVX512VL-LABEL: test_unaligned_v32i16:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu64 (%rdi), %ymm0
-; AVX512VL-NEXT: vmovdqu64 32(%rdi), %ymm1
+; AVX512VL-NEXT: vmovups (%rdi), %ymm0
+; AVX512VL-NEXT: vmovups 32(%rdi), %ymm1
; AVX512VL-NEXT: retq
%1 = load <32 x i16>, <32 x i16>* %src, align 1, !nontemporal !1
ret <32 x i16> %1
@@ -1608,8 +1528,8 @@ define <64 x i8> @test_unaligned_v64i8(<
;
; AVX512VL-LABEL: test_unaligned_v64i8:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu64 (%rdi), %ymm0
-; AVX512VL-NEXT: vmovdqu64 32(%rdi), %ymm1
+; AVX512VL-NEXT: vmovups (%rdi), %ymm0
+; AVX512VL-NEXT: vmovups 32(%rdi), %ymm1
; AVX512VL-NEXT: retq
%1 = load <64 x i8>, <64 x i8>* %src, align 1, !nontemporal !1
ret <64 x i8> %1
Modified: llvm/trunk/test/CodeGen/X86/vector-lzcnt-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-lzcnt-128.ll?rev=277327&r1=277326&r2=277327&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-lzcnt-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-lzcnt-128.ll Mon Aug 1 02:55:33 2016
@@ -1632,15 +1632,10 @@ define <4 x i32> @foldv4i32() nounwind {
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
; AVX-NEXT: retq
;
-; AVX512VLCD-LABEL: foldv4i32:
-; AVX512VLCD: ## BB#0:
-; AVX512VLCD-NEXT: vmovdqa32 {{.*#+}} xmm0 = [23,0,32,24]
-; AVX512VLCD-NEXT: retq
-;
-; AVX512CD-LABEL: foldv4i32:
-; AVX512CD: ## BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
-; AVX512CD-NEXT: retq
+; AVX512-LABEL: foldv4i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
+; AVX512-NEXT: retq
;
; X32-SSE-LABEL: foldv4i32:
; X32-SSE: # BB#0:
@@ -1661,15 +1656,10 @@ define <4 x i32> @foldv4i32u() nounwind
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
; AVX-NEXT: retq
;
-; AVX512VLCD-LABEL: foldv4i32u:
-; AVX512VLCD: ## BB#0:
-; AVX512VLCD-NEXT: vmovdqa32 {{.*#+}} xmm0 = [23,0,32,24]
-; AVX512VLCD-NEXT: retq
-;
-; AVX512CD-LABEL: foldv4i32u:
-; AVX512CD: ## BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
-; AVX512CD-NEXT: retq
+; AVX512-LABEL: foldv4i32u:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
+; AVX512-NEXT: retq
;
; X32-SSE-LABEL: foldv4i32u:
; X32-SSE: # BB#0:
@@ -1690,15 +1680,10 @@ define <8 x i16> @foldv8i16() nounwind {
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
; AVX-NEXT: retq
;
-; AVX512VLCD-LABEL: foldv8i16:
-; AVX512VLCD: ## BB#0:
-; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
-; AVX512VLCD-NEXT: retq
-;
-; AVX512CD-LABEL: foldv8i16:
-; AVX512CD: ## BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
-; AVX512CD-NEXT: retq
+; AVX512-LABEL: foldv8i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
+; AVX512-NEXT: retq
;
; X32-SSE-LABEL: foldv8i16:
; X32-SSE: # BB#0:
@@ -1719,15 +1704,10 @@ define <8 x i16> @foldv8i16u() nounwind
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
; AVX-NEXT: retq
;
-; AVX512VLCD-LABEL: foldv8i16u:
-; AVX512VLCD: ## BB#0:
-; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
-; AVX512VLCD-NEXT: retq
-;
-; AVX512CD-LABEL: foldv8i16u:
-; AVX512CD: ## BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
-; AVX512CD-NEXT: retq
+; AVX512-LABEL: foldv8i16u:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
+; AVX512-NEXT: retq
;
; X32-SSE-LABEL: foldv8i16u:
; X32-SSE: # BB#0:
@@ -1748,15 +1728,10 @@ define <16 x i8> @foldv16i8() nounwind {
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
; AVX-NEXT: retq
;
-; AVX512VLCD-LABEL: foldv16i8:
-; AVX512VLCD: ## BB#0:
-; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
-; AVX512VLCD-NEXT: retq
-;
-; AVX512CD-LABEL: foldv16i8:
-; AVX512CD: ## BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
-; AVX512CD-NEXT: retq
+; AVX512-LABEL: foldv16i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
+; AVX512-NEXT: retq
;
; X32-SSE-LABEL: foldv16i8:
; X32-SSE: # BB#0:
@@ -1777,15 +1752,10 @@ define <16 x i8> @foldv16i8u() nounwind
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
; AVX-NEXT: retq
;
-; AVX512VLCD-LABEL: foldv16i8u:
-; AVX512VLCD: ## BB#0:
-; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
-; AVX512VLCD-NEXT: retq
-;
-; AVX512CD-LABEL: foldv16i8u:
-; AVX512CD: ## BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
-; AVX512CD-NEXT: retq
+; AVX512-LABEL: foldv16i8u:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
+; AVX512-NEXT: retq
;
; X32-SSE-LABEL: foldv16i8u:
; X32-SSE: # BB#0:
Modified: llvm/trunk/test/CodeGen/X86/vector-lzcnt-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-lzcnt-256.ll?rev=277327&r1=277326&r2=277327&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-lzcnt-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-lzcnt-256.ll Mon Aug 1 02:55:33 2016
@@ -596,15 +596,10 @@ define <4 x i64> @foldv4i64() nounwind {
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56]
; AVX-NEXT: retq
;
-; AVX512VLCD-LABEL: foldv4i64:
-; AVX512VLCD: ## BB#0:
-; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [55,0,64,56]
-; AVX512VLCD-NEXT: retq
-;
-; AVX512CD-LABEL: foldv4i64:
-; AVX512CD: ## BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56]
-; AVX512CD-NEXT: retq
+; AVX512-LABEL: foldv4i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56]
+; AVX512-NEXT: retq
%out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 0)
ret <4 x i64> %out
}
@@ -615,15 +610,10 @@ define <4 x i64> @foldv4i64u() nounwind
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56]
; AVX-NEXT: retq
;
-; AVX512VLCD-LABEL: foldv4i64u:
-; AVX512VLCD: ## BB#0:
-; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [55,0,64,56]
-; AVX512VLCD-NEXT: retq
-;
-; AVX512CD-LABEL: foldv4i64u:
-; AVX512CD: ## BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56]
-; AVX512CD-NEXT: retq
+; AVX512-LABEL: foldv4i64u:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56]
+; AVX512-NEXT: retq
%out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 -1)
ret <4 x i64> %out
}
@@ -634,15 +624,10 @@ define <8 x i32> @foldv8i32() nounwind {
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
; AVX-NEXT: retq
;
-; AVX512VLCD-LABEL: foldv8i32:
-; AVX512VLCD: ## BB#0:
-; AVX512VLCD-NEXT: vmovdqa32 {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
-; AVX512VLCD-NEXT: retq
-;
-; AVX512CD-LABEL: foldv8i32:
-; AVX512CD: ## BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
-; AVX512CD-NEXT: retq
+; AVX512-LABEL: foldv8i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
+; AVX512-NEXT: retq
%out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 0)
ret <8 x i32> %out
}
@@ -653,15 +638,10 @@ define <8 x i32> @foldv8i32u() nounwind
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
; AVX-NEXT: retq
;
-; AVX512VLCD-LABEL: foldv8i32u:
-; AVX512VLCD: ## BB#0:
-; AVX512VLCD-NEXT: vmovdqa32 {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
-; AVX512VLCD-NEXT: retq
-;
-; AVX512CD-LABEL: foldv8i32u:
-; AVX512CD: ## BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
-; AVX512CD-NEXT: retq
+; AVX512-LABEL: foldv8i32u:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
+; AVX512-NEXT: retq
%out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 -1)
ret <8 x i32> %out
}
@@ -672,15 +652,10 @@ define <16 x i16> @foldv16i16() nounwind
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
; AVX-NEXT: retq
;
-; AVX512VLCD-LABEL: foldv16i16:
-; AVX512VLCD: ## BB#0:
-; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
-; AVX512VLCD-NEXT: retq
-;
-; AVX512CD-LABEL: foldv16i16:
-; AVX512CD: ## BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
-; AVX512CD-NEXT: retq
+; AVX512-LABEL: foldv16i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
+; AVX512-NEXT: retq
%out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 0)
ret <16 x i16> %out
}
@@ -691,15 +666,10 @@ define <16 x i16> @foldv16i16u() nounwin
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
; AVX-NEXT: retq
;
-; AVX512VLCD-LABEL: foldv16i16u:
-; AVX512VLCD: ## BB#0:
-; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
-; AVX512VLCD-NEXT: retq
-;
-; AVX512CD-LABEL: foldv16i16u:
-; AVX512CD: ## BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
-; AVX512CD-NEXT: retq
+; AVX512-LABEL: foldv16i16u:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
+; AVX512-NEXT: retq
%out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 -1)
ret <16 x i16> %out
}
@@ -710,15 +680,10 @@ define <32 x i8> @foldv32i8() nounwind {
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
; AVX-NEXT: retq
;
-; AVX512VLCD-LABEL: foldv32i8:
-; AVX512VLCD: ## BB#0:
-; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
-; AVX512VLCD-NEXT: retq
-;
-; AVX512CD-LABEL: foldv32i8:
-; AVX512CD: ## BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
-; AVX512CD-NEXT: retq
+; AVX512-LABEL: foldv32i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
+; AVX512-NEXT: retq
%out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 0)
ret <32 x i8> %out
}
@@ -729,15 +694,10 @@ define <32 x i8> @foldv32i8u() nounwind
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
; AVX-NEXT: retq
;
-; AVX512VLCD-LABEL: foldv32i8u:
-; AVX512VLCD: ## BB#0:
-; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
-; AVX512VLCD-NEXT: retq
-;
-; AVX512CD-LABEL: foldv32i8u:
-; AVX512CD: ## BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
-; AVX512CD-NEXT: retq
+; AVX512-LABEL: foldv32i8u:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
+; AVX512-NEXT: retq
%out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 -1)
ret <32 x i8> %out
}
Modified: llvm/trunk/test/CodeGen/X86/vector-tzcnt-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-tzcnt-128.ll?rev=277327&r1=277326&r2=277327&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-tzcnt-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-tzcnt-128.ll Mon Aug 1 02:55:33 2016
@@ -1435,25 +1435,10 @@ define <4 x i32> @foldv4i32() nounwind {
; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,32,0]
; SSE-NEXT: retq
;
-; AVX1-LABEL: foldv4i32:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: foldv4i32:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
-; AVX2-NEXT: retq
-;
-; AVX512CDVL-LABEL: foldv4i32:
-; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vmovdqa32 {{.*#+}} xmm0 = [8,0,32,0]
-; AVX512CDVL-NEXT: retq
-;
-; AVX512CD-LABEL: foldv4i32:
-; AVX512CD: # BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
-; AVX512CD-NEXT: retq
+; AVX-LABEL: foldv4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
+; AVX-NEXT: retq
;
; X32-SSE-LABEL: foldv4i32:
; X32-SSE: # BB#0:
@@ -1469,25 +1454,10 @@ define <4 x i32> @foldv4i32u() nounwind
; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,32,0]
; SSE-NEXT: retq
;
-; AVX1-LABEL: foldv4i32u:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: foldv4i32u:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
-; AVX2-NEXT: retq
-;
-; AVX512CDVL-LABEL: foldv4i32u:
-; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vmovdqa32 {{.*#+}} xmm0 = [8,0,32,0]
-; AVX512CDVL-NEXT: retq
-;
-; AVX512CD-LABEL: foldv4i32u:
-; AVX512CD: # BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
-; AVX512CD-NEXT: retq
+; AVX-LABEL: foldv4i32u:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
+; AVX-NEXT: retq
;
; X32-SSE-LABEL: foldv4i32u:
; X32-SSE: # BB#0:
@@ -1503,25 +1473,10 @@ define <8 x i16> @foldv8i16() nounwind {
; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
; SSE-NEXT: retq
;
-; AVX1-LABEL: foldv8i16:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: foldv8i16:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
-; AVX2-NEXT: retq
-;
-; AVX512CDVL-LABEL: foldv8i16:
-; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
-; AVX512CDVL-NEXT: retq
-;
-; AVX512CD-LABEL: foldv8i16:
-; AVX512CD: # BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
-; AVX512CD-NEXT: retq
+; AVX-LABEL: foldv8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
+; AVX-NEXT: retq
;
; X32-SSE-LABEL: foldv8i16:
; X32-SSE: # BB#0:
@@ -1537,25 +1492,10 @@ define <8 x i16> @foldv8i16u() nounwind
; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
; SSE-NEXT: retq
;
-; AVX1-LABEL: foldv8i16u:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: foldv8i16u:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
-; AVX2-NEXT: retq
-;
-; AVX512CDVL-LABEL: foldv8i16u:
-; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
-; AVX512CDVL-NEXT: retq
-;
-; AVX512CD-LABEL: foldv8i16u:
-; AVX512CD: # BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
-; AVX512CD-NEXT: retq
+; AVX-LABEL: foldv8i16u:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
+; AVX-NEXT: retq
;
; X32-SSE-LABEL: foldv8i16u:
; X32-SSE: # BB#0:
@@ -1571,25 +1511,10 @@ define <16 x i8> @foldv16i8() nounwind {
; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
; SSE-NEXT: retq
;
-; AVX1-LABEL: foldv16i8:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: foldv16i8:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
-; AVX2-NEXT: retq
-;
-; AVX512CDVL-LABEL: foldv16i8:
-; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
-; AVX512CDVL-NEXT: retq
-;
-; AVX512CD-LABEL: foldv16i8:
-; AVX512CD: # BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
-; AVX512CD-NEXT: retq
+; AVX-LABEL: foldv16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
+; AVX-NEXT: retq
;
; X32-SSE-LABEL: foldv16i8:
; X32-SSE: # BB#0:
@@ -1605,25 +1530,10 @@ define <16 x i8> @foldv16i8u() nounwind
; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
; SSE-NEXT: retq
;
-; AVX1-LABEL: foldv16i8u:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: foldv16i8u:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
-; AVX2-NEXT: retq
-;
-; AVX512CDVL-LABEL: foldv16i8u:
-; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
-; AVX512CDVL-NEXT: retq
-;
-; AVX512CD-LABEL: foldv16i8u:
-; AVX512CD: # BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
-; AVX512CD-NEXT: retq
+; AVX-LABEL: foldv16i8u:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
+; AVX-NEXT: retq
;
; X32-SSE-LABEL: foldv16i8u:
; X32-SSE: # BB#0:
Modified: llvm/trunk/test/CodeGen/X86/vector-tzcnt-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-tzcnt-256.ll?rev=277327&r1=277326&r2=277327&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-tzcnt-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-tzcnt-256.ll Mon Aug 1 02:55:33 2016
@@ -713,193 +713,73 @@ define <32 x i8> @testv32i8u(<32 x i8> %
}
define <4 x i64> @foldv4i64() nounwind {
-; AVX1-LABEL: foldv4i64:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: foldv4i64:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
-; AVX2-NEXT: retq
-;
-; AVX512CDVL-LABEL: foldv4i64:
-; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,64,0]
-; AVX512CDVL-NEXT: retq
-;
-; AVX512CD-LABEL: foldv4i64:
-; AVX512CD: # BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
-; AVX512CD-NEXT: retq
+; ALL-LABEL: foldv4i64:
+; ALL: # BB#0:
+; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
+; ALL-NEXT: retq
%out = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 0)
ret <4 x i64> %out
}
define <4 x i64> @foldv4i64u() nounwind {
-; AVX1-LABEL: foldv4i64u:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: foldv4i64u:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
-; AVX2-NEXT: retq
-;
-; AVX512CDVL-LABEL: foldv4i64u:
-; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,64,0]
-; AVX512CDVL-NEXT: retq
-;
-; AVX512CD-LABEL: foldv4i64u:
-; AVX512CD: # BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
-; AVX512CD-NEXT: retq
+; ALL-LABEL: foldv4i64u:
+; ALL: # BB#0:
+; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
+; ALL-NEXT: retq
%out = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 -1)
ret <4 x i64> %out
}
define <8 x i32> @foldv8i32() nounwind {
-; AVX1-LABEL: foldv8i32:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: foldv8i32:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
-; AVX2-NEXT: retq
-;
-; AVX512CDVL-LABEL: foldv8i32:
-; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vmovdqa32 {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
-; AVX512CDVL-NEXT: retq
-;
-; AVX512CD-LABEL: foldv8i32:
-; AVX512CD: # BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
-; AVX512CD-NEXT: retq
+; ALL-LABEL: foldv8i32:
+; ALL: # BB#0:
+; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
+; ALL-NEXT: retq
%out = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 0)
ret <8 x i32> %out
}
define <8 x i32> @foldv8i32u() nounwind {
-; AVX1-LABEL: foldv8i32u:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: foldv8i32u:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
-; AVX2-NEXT: retq
-;
-; AVX512CDVL-LABEL: foldv8i32u:
-; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vmovdqa32 {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
-; AVX512CDVL-NEXT: retq
-;
-; AVX512CD-LABEL: foldv8i32u:
-; AVX512CD: # BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
-; AVX512CD-NEXT: retq
+; ALL-LABEL: foldv8i32u:
+; ALL: # BB#0:
+; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
+; ALL-NEXT: retq
%out = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 -1)
ret <8 x i32> %out
}
define <16 x i16> @foldv16i16() nounwind {
-; AVX1-LABEL: foldv16i16:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: foldv16i16:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
-; AVX2-NEXT: retq
-;
-; AVX512CDVL-LABEL: foldv16i16:
-; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
-; AVX512CDVL-NEXT: retq
-;
-; AVX512CD-LABEL: foldv16i16:
-; AVX512CD: # BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
-; AVX512CD-NEXT: retq
+; ALL-LABEL: foldv16i16:
+; ALL: # BB#0:
+; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
+; ALL-NEXT: retq
%out = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 0)
ret <16 x i16> %out
}
define <16 x i16> @foldv16i16u() nounwind {
-; AVX1-LABEL: foldv16i16u:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: foldv16i16u:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
-; AVX2-NEXT: retq
-;
-; AVX512CDVL-LABEL: foldv16i16u:
-; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
-; AVX512CDVL-NEXT: retq
-;
-; AVX512CD-LABEL: foldv16i16u:
-; AVX512CD: # BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
-; AVX512CD-NEXT: retq
+; ALL-LABEL: foldv16i16u:
+; ALL: # BB#0:
+; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
+; ALL-NEXT: retq
%out = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 -1)
ret <16 x i16> %out
}
define <32 x i8> @foldv32i8() nounwind {
-; AVX1-LABEL: foldv32i8:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: foldv32i8:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
-; AVX2-NEXT: retq
-;
-; AVX512CDVL-LABEL: foldv32i8:
-; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
-; AVX512CDVL-NEXT: retq
-;
-; AVX512CD-LABEL: foldv32i8:
-; AVX512CD: # BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
-; AVX512CD-NEXT: retq
+; ALL-LABEL: foldv32i8:
+; ALL: # BB#0:
+; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
+; ALL-NEXT: retq
%out = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 0)
ret <32 x i8> %out
}
define <32 x i8> @foldv32i8u() nounwind {
-; AVX1-LABEL: foldv32i8u:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: foldv32i8u:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
-; AVX2-NEXT: retq
-;
-; AVX512CDVL-LABEL: foldv32i8u:
-; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
-; AVX512CDVL-NEXT: retq
-;
-; AVX512CD-LABEL: foldv32i8u:
-; AVX512CD: # BB#0:
-; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
-; AVX512CD-NEXT: retq
+; ALL-LABEL: foldv32i8u:
+; ALL: # BB#0:
+; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
+; ALL-NEXT: retq
%out = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 -1)
ret <32 x i8> %out
}
More information about the llvm-commits
mailing list